From b9cf4a0eb47568de29b97f49a48120efadc14a8c Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 18:57:11 -0400 Subject: [PATCH 01/12] docs(harness-cleanup): add PR 10 implementation plan; remove stale unified-harness planning doc PR 10 is stacked on PR 9 (#423). The plan sequences the 13 cleanup-scope items plus a filesystem layout/naming consolidation (every harness -> __sync.py + __turn.py under _modules/, openai moved out of providers/_modules/) and a harness.md docs refresh. The pre-unified unified-harness-surface plan doc is removed now that the stack is merged. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...unified-harness-surface-pr4-pydantic-ai.md | 246 -------- .../plans/2026-06-22-pr10-harness-cleanup.md | 566 ++++++++++++++++++ 2 files changed, 566 insertions(+), 246 deletions(-) delete mode 100644 docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md create mode 100644 docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md diff --git a/docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md b/docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md deleted file mode 100644 index 2fa1892fe..000000000 --- a/docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md +++ /dev/null @@ -1,246 +0,0 @@ -# Unified Harness Surface — PR 4: pydantic-ai Migration Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Migrate the pydantic-ai harness onto the unified harness surface so it emits streaming + persisted messages + tracing + turn usage through ONE source of truth, over both delivery channels (yield + auto-send), with no public regression — and ship its 3 integration test agents (sync/async/temporal). - -**Architecture:** Wrap a pydantic-ai run as a `HarnessTurn` (canonical `StreamTaskMessage*` stream + normalized `TurnUsage`). Reuse the existing `convert_pydantic_ai_to_agentex_events` mapping as the tap. Reimplement the existing public auto-send helper on top of `UnifiedEmitter.auto_send_turn`, and route sync ACP agents through `UnifiedEmitter.yield_turn`. Retire the bespoke `_pydantic_ai_tracing` handler in favor of the surface's derived spans (keep the old symbol as a deprecated shim). - -**Tech Stack:** Python 3, pydantic-ai (`pydantic_ai`), pydantic v2, pytest + pytest-asyncio, the `agentex.lib.core.harness` package from PRs 1-3. - -**Foundation:** `src/agentex/lib/core/harness/` (`UnifiedEmitter`, `SpanTracer`, `SpanDeriver`, `HarnessTurn`, `TurnUsage`, `TurnResult`, `yield_events`, `auto_send`, conformance scaffold). Design: `docs/superpowers/specs/2026-06-18-unified-harness-surface-design.md`. - ---- - -## Dependencies (must land first) - -- **AGX1-373** — cross-channel conformance equivalence + `Full` wire reconciliation. PR 4's conformance fixtures register into the upgraded cross-channel runner. **Do not start Task 6 until 373 is merged into the foundation branch.** -- **AGX1-375** — public `adk` import path for the harness surface. If merged, import the surface via the public path in this PR; if not, import from `agentex.lib.core.harness` and add a follow-up note. (Tasks below assume `from agentex.lib.core.harness import UnifiedEmitter, TurnUsage, ...`; swap to the public path if 375 landed.) - -This is one PR (target < 1000 lines code, excluding any recorded fixtures). The 3 test agents are the largest chunk; if the diff exceeds budget, split the test agents into a follow-up PR 4b (note in the PR description). - ---- - -## File Structure - -- Modify `src/agentex/lib/adk/_modules/_pydantic_ai_sync.py` — add an optional `on_result` callback to `convert_pydantic_ai_to_agentex_events` (additive) so usage can be captured. Behavior unchanged when omitted. -- Create `src/agentex/lib/adk/_modules/_pydantic_ai_turn.py` — `PydanticAITurn(HarnessTurn)` + `pydantic_ai_usage_to_turn_usage(...)`. -- Modify `src/agentex/lib/adk/_modules/_pydantic_ai_async.py` — reimplement `stream_pydantic_ai_events` on `UnifiedEmitter.auto_send_turn`, preserving signature + return. -- Modify `src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py` — mark `create_pydantic_ai_tracing_handler` / `AgentexPydanticAITracingHandler` deprecated (docstring + `DeprecationWarning`); keep importable. -- Create `tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py` — register pydantic-ai fixtures into the cross-channel conformance runner. -- Create `examples/tutorials/harness-pydantic-ai-{sync,async,temporal}/` — 3 test agents (modeled on the `sync-pydantic-ai` / `default-pydantic-ai` / `temporal-pydantic-ai` CLI templates) using the unified surface. -- Modify `.github/workflows/harness-integration.yml` — enable the pydantic-ai rows of the `live-matrix` job. -- Modify `.github/workflows/agentex-tutorials-test.yml` (or its agent list) — include the 3 new test agents if that workflow enumerates agents. - ---- - -## Task 1: Expose the pydantic-ai run result for usage capture - -**Files:** -- Modify: `src/agentex/lib/adk/_modules/_pydantic_ai_sync.py` -- Test: `tests/lib/adk/test_pydantic_ai_sync.py` (create if absent) - -The converter already iterates the pydantic-ai event stream and currently *ignores* `AgentRunResultEvent` (the terminal event carrying the run result + usage). Add an optional callback so a caller can capture it without changing existing behavior. - -- [ ] **Step 1: Write the failing test.** - -```python -import pytest -from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events - - -class _FakeResultEvent: # stand-in for pydantic_ai.run.AgentRunResultEvent - def __init__(self, result): - self.result = result - - -async def _stream(events): - for e in events: - yield e - - -@pytest.mark.asyncio -async def test_on_result_callback_receives_terminal_event(monkeypatch): - # When the stream ends with an AgentRunResultEvent, on_result is invoked with it, - # and the converter still yields no extra events for it. - captured = {} - # Use a real AgentRunResultEvent if constructable; otherwise patch isinstance check. - # (Implementer: see Step 3 note — match the real terminal event type.) - ... -``` - -Implementer note: the exact terminal event type is `pydantic_ai.run.AgentRunResultEvent` (already imported in `_pydantic_ai_sync.py`). Write the test to feed a stream ending in a real `AgentRunResultEvent` (construct it as the installed pydantic-ai version requires; inspect `python -c "import pydantic_ai.run, inspect; print(inspect.signature(pydantic_ai.run.AgentRunResultEvent))"`). Assert `on_result` is called once with that event and that the converter yields the same `StreamTaskMessage*` sequence as without the callback (no behavior change for the streaming output). - -- [ ] **Step 2: Run** `uv run pytest tests/lib/adk/test_pydantic_ai_sync.py -v` — expect FAIL (no `on_result` param). - -- [ ] **Step 3: Implement.** Add `on_result: Callable[[AgentRunResultEvent], None] | None = None` (and an async-callable variant if needed) to `convert_pydantic_ai_to_agentex_events`. In the existing `elif isinstance(event, (FunctionToolCallEvent, FinalResultEvent, AgentRunResultEvent))` branch, when the event is an `AgentRunResultEvent` and `on_result` is set, call it (await if it's a coroutine). Keep yielding nothing for it. No other change. - -- [ ] **Step 4: Run** the test — expect PASS, plus run the existing `_pydantic_ai_sync` tests if any to confirm no regression. - -- [ ] **Step 5: Commit** `feat(pydantic-ai): optional on_result callback to expose run result for usage capture`. - ---- - -## Task 2: Normalize pydantic-ai usage to `TurnUsage` - -**Files:** -- Create: `src/agentex/lib/adk/_modules/_pydantic_ai_turn.py` -- Test: `tests/lib/adk/test_pydantic_ai_turn.py` - -- [ ] **Step 1: Verify the real usage shape FIRST.** Run `uv run python -c "from pydantic_ai.usage import RunUsage; import inspect; print([f for f in RunUsage.model_fields])"` (the type/name may be `RunUsage` or `Usage` depending on the installed version). Record the exact field names (commonly: `input_tokens`, `output_tokens`, `total_tokens`, `requests`, and a cache/`details` field). The mapping in Step 3 MUST use the real field names. - -- [ ] **Step 2: Write the failing test.** - -```python -from agentex.lib.adk._modules._pydantic_ai_turn import pydantic_ai_usage_to_turn_usage - - -def test_usage_normalization_maps_fields(): - # Build a usage object matching the installed pydantic-ai RunUsage shape - # (see Task 2 Step 1 for the real fields), then assert the mapping. - usage_obj = ... # construct RunUsage(input_tokens=10, output_tokens=20, requests=2, ...) - tu = pydantic_ai_usage_to_turn_usage(usage_obj, model="openai:gpt-4o") - assert tu.model == "openai:gpt-4o" - assert tu.input_tokens == 10 - assert tu.output_tokens == 20 - assert tu.num_llm_calls == 2 -``` - -- [ ] **Step 3: Implement** `pydantic_ai_usage_to_turn_usage(usage, model) -> TurnUsage` mapping the verified RunUsage fields onto `TurnUsage` (`input_tokens`, `output_tokens`, `total_tokens`, `cached_input_tokens` if available, `num_llm_calls` ← `requests`). Use `getattr(usage, "", None)` defensively so a version field rename degrades to `None` rather than crashing. Then implement `PydanticAITurn`: - -```python -class PydanticAITurn: - """A pydantic-ai run as a HarnessTurn: canonical event stream + normalized usage.""" - - def __init__(self, stream, model: str | None = None): - self._stream = stream - self._model = model - self._usage = TurnUsage(model=model) - - @property - async def events(self): - def _capture(result_event): - run_result = getattr(result_event, "result", None) - usage_obj = run_result.usage() if run_result is not None else None - if usage_obj is not None: - self._usage = pydantic_ai_usage_to_turn_usage(usage_obj, self._model) - async for ev in convert_pydantic_ai_to_agentex_events(self._stream, on_result=_capture): - yield ev - - def usage(self) -> TurnUsage: - return self._usage -``` - -(Verify `run_result.usage()` is the correct accessor for the installed version; adjust if it's an attribute.) - -- [ ] **Step 4: Add a `PydanticAITurn` test** that feeds a small stream ending in an `AgentRunResultEvent` whose `result.usage()` returns a known usage, drives `turn.events` to exhaustion, then asserts `turn.usage()` reflects the normalized values and that `events` yielded the expected `StreamTaskMessage*`. Confirm `usage()` BEFORE exhaustion returns the default (documented single-pass contract). - -- [ ] **Step 5: Run** the tests — expect PASS. - -- [ ] **Step 6: Commit** `feat(pydantic-ai): PydanticAITurn HarnessTurn + usage normalization`. - ---- - -## Task 3: Reimplement the auto-send helper on the unified surface - -**Files:** -- Modify: `src/agentex/lib/adk/_modules/_pydantic_ai_async.py` -- Test: `tests/lib/adk/test_pydantic_ai_async.py` - -`stream_pydantic_ai_events(stream, task_id, ...)` currently hand-drives `adk.streaming`. Reimplement it to delegate to `UnifiedEmitter.auto_send_turn(PydanticAITurn(stream, model))`, preserving its signature and return value (the accumulated final text). Feature-add: traces by default. - -- [ ] **Step 1: Capture current behavior as a characterization test.** Before changing anything, write a test that runs the CURRENT `stream_pydantic_ai_events` over a fixture stream with a fake `adk.streaming` and records the messages produced (text, tool request/response). This is the backward-compat baseline ("equivalent messages before/after" from the design). - -- [ ] **Step 2: Run** it green against the current implementation. Commit the test alone: `test(pydantic-ai): characterize stream_pydantic_ai_events output`. - -- [ ] **Step 3: Reimplement** `stream_pydantic_ai_events` to build a `PydanticAITurn` and call `UnifiedEmitter(task_id=task_id, trace_id=, parent_span_id=, streaming=).auto_send_turn(turn)`, returning `result.final_text`. Resolve `trace_id`/`parent_span_id` the same way the module does today (from the streaming/tracing context vars it already reads). Preserve the exact public signature and return type. - -- [ ] **Step 4: Run** the characterization test — it must still pass (same messages). Adjust the test only if AGX1-373 deliberately changed the tool-message wire shape; in that case assert the post-373 shape and note it. Confirm tracing now occurs by default (assert spans via a fake tracer). - -- [ ] **Step 5: Commit** `refactor(pydantic-ai): reimplement stream_pydantic_ai_events on UnifiedEmitter (default tracing)`. - ---- - -## Task 4: Route sync ACP delivery through the surface + deprecate the bespoke tracing handler - -**Files:** -- Modify: `src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py` -- (Reference) the sync ACP usage pattern in the pydantic-ai docs/templates. - -- [ ] **Step 1: Deprecate the bespoke tracing handler.** Add a `DeprecationWarning` (via `warnings.warn(...)`) and a docstring note to `create_pydantic_ai_tracing_handler` / `AgentexPydanticAITracingHandler` stating the unified surface (`UnifiedEmitter`, which derives spans from the canonical stream) supersedes it. Keep the symbols importable and functional (no removal — backward compat). - -- [ ] **Step 2: Confirm the sync path.** The sync tap remains `convert_pydantic_ai_to_agentex_events`. Document (in the module docstring of `_pydantic_ai_sync.py`) the recommended sync ACP usage: - -```python -turn = PydanticAITurn(agent.run_stream_events(...), model=...) -async for event in emitter.yield_turn(turn): - yield event -``` - -No code change beyond the docstring (the sync converter already yields the canonical stream; `yield_turn` adds tracing). Add a test that `emitter.yield_turn(PydanticAITurn(...))` forwards the same events the bare converter would and derives spans. - -- [ ] **Step 3: Run** tests; **Commit** `refactor(pydantic-ai): deprecate bespoke tracing handler; document unified sync path`. - ---- - -## Task 5: pydantic-ai cross-channel conformance fixtures - -**Files:** -- Create: `tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py` - -**Blocked by AGX1-373** (the cross-channel conformance runner). Once 373 is merged into the foundation branch: - -- [ ] **Step 1: Record canonical fixtures.** For 3-4 representative pydantic-ai runs (text-only; single tool; reasoning/thinking; multi-step text+tool), capture the `StreamTaskMessage*` sequence the tap produces (run `convert_pydantic_ai_to_agentex_events` over recorded `AgentStreamEvent` inputs, or hand-author the canonical sequences). Store as `Fixture(name=..., events=[...])`. - -- [ ] **Step 2: Register** each fixture with the conformance runner and let the cross-channel parametrized test (from AGX1-373) assert yield-vs-auto-send equivalence + span equivalence for each. Register/parametrize within THIS module (per the runner's documented per-module registry semantics). - -- [ ] **Step 3: Run** `./scripts/test tests/lib/core/harness/ -v` — all green. **Commit** `test(pydantic-ai): cross-channel conformance fixtures`. - ---- - -## Task 6: Three integration test agents (sync / async / temporal) - -**Files:** -- Create: `examples/tutorials/harness-pydantic-ai-sync/` , `…-async/` , `…-temporal/` (each a minimal Agentex agent). -- Modify: `.github/workflows/harness-integration.yml` (enable pydantic-ai `live-matrix` rows). -- Modify: `.github/workflows/agentex-tutorials-test.yml` if it enumerates agents. - -Each agent is the smallest agent that exercises one delivery channel through the unified surface with the pydantic-ai harness. - -- [ ] **Step 1: Scaffold from the existing templates.** Base each agent on the corresponding CLI template: `sync-pydantic-ai`, `default-pydantic-ai` (async), `temporal-pydantic-ai` (under `src/agentex/lib/cli/templates/`). In each, the message handler builds `PydanticAITurn(agent.run_stream_events(params.content.content), model=...)` and: - - sync agent: `async for ev in emitter.yield_turn(turn): yield ev` - - async + temporal agents: `await emitter.auto_send_turn(turn)` (temporal: inside the activity, as the template already structures it). - Use a tiny pydantic-ai agent with ONE trivial tool so the run exercises text + a tool call + tool response. - -- [ ] **Step 2: Write an integration test per agent** that drives it with a fixed prompt and asserts: valid ordered messages (text + tool request + tool response) and a well-formed span tree. Use the repo's existing tutorial-agent test harness pattern (see `agentex-tutorials-test.yml` and how current tutorial agents are tested). - -- [ ] **Step 3: Wire CI.** In `.github/workflows/harness-integration.yml`, replace the `if: false` placeholder `live-matrix` job (or add a real matrix) with the pydantic-ai × {sync, async, temporal} entries, each running its agent's integration test. If `agentex-tutorials-test.yml` enumerates agents, add the three there too. `log`/document any agent-type not covered (none expected for pydantic-ai). - -- [ ] **Step 4: Run** the integration tests locally (as far as the env allows) and the conformance + unit suites. **Commit** `test(pydantic-ai): sync/async/temporal integration agents + enable CI live-matrix rows`. - ---- - -## Task 7: Full suite, type check, and backward-compat audit - -- [ ] **Step 1:** `./scripts/test tests/lib/core/harness/ tests/lib/adk/ -v` — all green on 3.12 + 3.13. -- [ ] **Step 2:** `uv run pyright src/agentex/lib/` (or the harness + pydantic modules) — 0 new errors. -- [ ] **Step 3: Backward-compat audit.** Confirm the public signatures are unchanged: `convert_pydantic_ai_to_agentex_events` (only gained an optional kwarg), `stream_pydantic_ai_events` (same signature + return), `create_pydantic_ai_tracing_handler` (still importable, now warns). Grep the repo + templates for callers and confirm none broke. -- [ ] **Step 4:** If any fix was needed, **Commit** `chore(pydantic-ai): type/back-compat fixes`. - ---- - -## Self-Review checklist (run before opening the PR) - -- Every public symbol that existed before still exists with the same signature (additive-only): `convert_pydantic_ai_to_agentex_events`, `stream_pydantic_ai_events`, `create_pydantic_ai_tracing_handler`. -- The auto-send helper returns the same final text as before (characterization test passes, or the post-373 shape is asserted with a note). -- Tracing is now on by default for both channels and is overridable (emitter `tracer=False`). -- Usage normalization uses the REAL pydantic-ai usage field names (verified in Task 2 Step 1), with defensive `getattr`. -- Conformance fixtures register per-module and pass the cross-channel assertion from AGX1-373. -- 3 test agents exist and their CI rows are enabled. -- No `# type: ignore` added without justification. - -## Notes for the PR description - -- Link AGX1-373 (dependency) and AGX1-375 (import path); note AGX1-374 (reasoning/mixed-ordering auto_send tests) is foundation-level and orthogonal. -- State the diff size; if test agents pushed it over budget, note the PR 4b split. -- This is the template the langgraph (PR 5) and openai (PR 6) migrations follow. diff --git a/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md b/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md new file mode 100644 index 000000000..2217dcecf --- /dev/null +++ b/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md @@ -0,0 +1,566 @@ +# PR 10 — Post-Merge Harness Cleanup Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Remove the transitional artifacts left behind by the additive harness-surface stack (deprecated tracing handlers, resolved-workaround comments, duplicated test scaffolding, divergent per-harness structures), consolidate the harness source/test/tutorial filesystem onto one convention, retire the duplicate pre-unified tutorials, and bring `adk/docs/harness.md` in line with the final merged surface. + +**Architecture:** The harness-surface stack (#412 foundation, #414 conformance, #415/#416/#417/#420/#421 migrations, #423 facade+docs) was built additively so nothing regressed and each PR stayed reviewable. PR 10 is the single, deliberate cleanup that runs once the whole stack is merged and the deprecation/migration preconditions hold. Work is ordered so non-breaking refactors land first and the breaking removals (deprecated public symbols + the tutorials that import them) land last, behind the version-bump gate. + +**Tech Stack:** Python 3.12/3.13, `rye`/`uv`, `pytest`, `ruff`, `pyright`/`mypy`, Temporal, pydantic. Tutorials use per-project `uv` envs. + +> **Branch / base:** This plan lives on `declan-scale/pr10-harness-cleanup`, **stacked on top of `declan-scale/pr9-harness-cleanup` (PR 9, #423)**, which is itself rebased onto the latest `next`. The migration stack (#412/#414–#421) is **already merged** to `next`; PR 9 (the public adk facade + `adk/docs/harness.md`) is the base of this branch. Because PR 9 is the base, the facade and `harness.md` are already present here, so the facade-reconciliation (C1/C2) and `harness.md` (C3) tasks are directly actionable. When PR 9 merges into `next`, rebase this branch onto `next` (the PR 9 commits drop out as already-merged). + +> **Altitude note (read before executing):** This plan pins **exact file paths, the concrete transformation, and exact verification commands** — all verified against the merged `next` tree as of 2026-06-22. It deliberately does **not** hardcode line numbers (they drift as batches land). Where a step says "resolve at execution," run the named grep against the current tree first, then apply the described change. + +--- + +## Preconditions (do not start the BREAKING batches until ALL hold) + +1. **#423 (PR 9) is the base of this branch** — the facade + `harness.md` are present here, and the whole migration stack (#412/#414–#421) is already merged to `next`. When PR 9 merges to `next`, rebase this branch onto `next` before the breaking batches land. +2. **Deprecation window observed** (or a minor/major version boundary) for the publicly-deprecated symbols below — they were only docstring-deprecated, never runtime-warned, so external code may still import them. +3. **Golden agent migrated** off the bespoke paths (per the adoption plan, #422 → implementation in `agentex-agents`): it no longer constructs the deprecated tracing handlers or any pre-unified converter path. Grep the golden agent + any other internal consumers first. +4. **No external consumers** depend on the removed symbols (check downstream usage; add a changelog/release note for the removal). + +**Optional split:** Batches A–D, G, and the stale-doc removals are **non-breaking** (tests, internal helpers, docs, integration coverage) — they only need precondition 1. If the breaking removals (Batches E, F, I) are blocked on the version-bump policy, land the non-breaking batches as an earlier cleanup PR and keep E/F/I for PR 10. + +--- + +## Execution order + +| Batch | Items | Breaking? | Gated on | +|---|---|---|---| +| A — Test scaffolding consolidation | 6, 7 | No | Precond. 1 | +| B — Internal helper / sync-path consolidation | 5, 8, 9 | No | Precond. 1 | +| C — Facade reconciliation + harness.md doc update | 10, 3 | No (additive namespace) | Precond. 1 (PR 9 merged) | +| D — Conformance vestigial cleanup | 4 | No | Precond. 1 | +| G — Integration-test parity | 12 | No | Precond. 1 | +| E — Tutorial standardization + retirement | 11, 13 | Yes (deletes dirs) | Precond. 1–4 (rides with F) | +| F — Deprecated tracing-handler + workaround removal | 1, 2 | Yes (public symbols) | Precond. 1–4 | +| I — Filesystem layout + naming consolidation | NEW | Yes (file moves + import changes) | After E/F (and Precond. 1–4) | +| H — Final docs + changelog + stale-doc removal | — | — | After E/F/I land | + +Run A → B → D → G first (green, non-breaking). C lands once PR 9 is merged. Then E + F together (the old tutorials import the symbols F removes — they MUST land in the same commit range), then I (the final structural sweep, after `_tracing.py` is already gone), then H. + +--- + +## Batch A — Consolidate duplicated test scaffolding (items 6, 7) + +### Task A1: Extract the shared harness test fakes + +Verified copies on `next`: `_FakeTracing` is defined in 7 places and `_FakeSpan` in 6; `_run_yield_turn` in 2. There are also near-variants under `tests/lib/adk/` (`_FakeTracingBackend`). + +**Files:** +- Create: `tests/lib/core/harness/_fakes.py` +- Modify (delete local copy, import from `_fakes`): `tests/lib/core/harness/test_tracer.py`, `tests/lib/core/harness/test_emitter.py`, `tests/lib/core/harness/conformance/runner.py`, `tests/lib/core/harness/test_harness_pydantic_ai_sync.py`, `..._async.py`, `tests/lib/core/harness/test_harness_langgraph_sync.py`, `..._async.py`, `tests/lib/adk/test_pydantic_ai_sync_unified.py`, `tests/lib/adk/test_langgraph_sync_unified.py` + +- [ ] **Step 1: Grep the tree for every definition site** + +Run: `grep -rn "class _FakeTracing\|class _FakeSpan\|class _FakeTracingBackend\|def _run_yield_turn" tests/` +Confirm the full set before changing anything. Note `_FakeTracingBackend` (in `test_langgraph_sync_unified.py`) — decide if it is the same shape (fold it) or genuinely different (leave it, document why). + +- [ ] **Step 2: Create `_fakes.py` from the canonical copy** + +Lift the definitions from `tests/lib/core/harness/test_tracer.py` (the foundation copy) verbatim into `tests/lib/core/harness/_fakes.py`, exported as public names `FakeSpan`, `FakeTracing`, `run_yield_turn` (drop the leading underscore now that they are shared). This is a move, not a rewrite. + +- [ ] **Step 3: Replace each local copy with an import** + +In each file from the Files list, delete the local class/func block and add `from tests.lib.core.harness._fakes import FakeSpan, FakeTracing, run_yield_turn` (import only what that file uses). Update references (`_FakeTracing` → `FakeTracing`, etc.). The `tests/lib/adk/*_sync_unified.py` files import across packages — confirm the import path resolves under the test rootdir. + +- [ ] **Step 4: Verify no copies remain** + +Run: `grep -rn "class _FakeTracing\|class _FakeSpan\|def _run_yield_turn" tests/` +Expected: zero matches (only `_fakes.py`'s `class FakeTracing`/`class FakeSpan`/`def run_yield_turn`, which this grep does not match). + +- [ ] **Step 5: Run the harness + adk test suites** + +Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` +Expected: same pass count as pre-change, zero failures. + +- [ ] **Step 6: Lint + commit** + +Run: `uv run ruff check tests/` +```bash +git add tests/ +git commit -m "test(harness): extract shared FakeSpan/FakeTracing/run_yield_turn fakes" +``` + +### Task A2: Parametrize the conformance determinism test once + +Verified on `next`: `def test_span_derivation_is_deterministic` exists in `conformance/test_conformance.py` (shared), `test_langgraph_conformance.py`, and `test_pydantic_ai_conformance.py`. **Additionally**, `test_codex_conformance.py` carries the same determinism assertion (`assert derive_all(x) == derive_all(x)`) under its own test — so grep for the assertion, not just the function name. + +**Files:** +- Modify: `tests/lib/core/harness/conformance/test_conformance.py` (keep the single parametrized test) +- Modify: each `tests/lib/core/harness/conformance/test__conformance.py` (delete its determinism copy, keep fixture registration + cross-channel assertions) + +- [ ] **Step 1: Grep for every determinism copy** + +Run: `grep -rn "test_span_derivation_is_deterministic\|derive_all(.*) == derive_all" tests/lib/core/harness/conformance/` +Expected: the shared copy in `test_conformance.py` plus per-harness copies (currently langgraph, pydantic-ai, codex; check openai/claude too). + +- [ ] **Step 2: Make the shared copy parametrized over all fixtures** + +In `test_conformance.py`, ensure `test_span_derivation_is_deterministic` is parametrized by `all_fixtures()` (the registry the conformance runner exposes via `register`) so one test re-derives `derive_all(...)` over every registered fixture and asserts identical output across repeated derivation. It must reference no harness-specific symbol. + +- [ ] **Step 3: Delete the per-harness copies** + +Remove the determinism test/assertion from every `test__conformance.py`, leaving those modules with only fixture registration + cross-channel assertions. Keep `derive_all` itself in `runner.py` — it is the shared primitive the parametrized test uses (NOT vestigial; see Batch D). + +- [ ] **Step 4: Verify exactly one definition remains** + +Run: `grep -rn "def test_span_derivation_is_deterministic" tests/lib/core/harness/conformance/` +Expected: exactly one match, in `test_conformance.py`. + +- [ ] **Step 5: Run conformance tests + commit** + +Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/conformance/ -q` +```bash +git add tests/lib/core/harness/conformance/ +git commit -m "test(harness): parametrize the conformance determinism test once over all_fixtures()" +``` + +--- + +## Batch B — Consolidate internal helpers + sync paths (items 5, 8, 9) + +### Task B1: Remove leftover hand-rolled streaming branches (item 5) + +**Files (resolve exact branches at execution):** `src/agentex/lib/adk/_modules/_pydantic_ai_async.py`, `_langgraph_async.py`, and any openai/claude/codex async helper. + +- [ ] **Step 1: Confirm async helpers delegate to the emitter** + +Run: `grep -rn "auto_send_turn\|streaming_task_message_context\|adk.streaming" src/agentex/lib/adk/_modules/_*_async.py src/agentex/lib/adk/providers/_modules/` +Expected: `stream_*_events` / `run_agent_streamed_auto_send` call `UnifiedEmitter.auto_send_turn`. Flag any remaining hand-rolled `adk.streaming` loop as dead. + +- [ ] **Step 2: Delete the dead branches** the emitter delegation made unreachable. Do not touch a live delivery route. + +- [ ] **Step 3: Verify + commit** + +Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` +```bash +git add src/agentex/lib/adk/ +git commit -m "refactor(harness): drop dead hand-rolled streaming branches now covered by auto_send_turn" +``` + +### Task B2: Extract a shared usage-normalization primitive (item 8) + +The five `HarnessTurn` impls (`_pydantic_ai_turn.py`, `_langgraph_turn.py`, `providers/_modules/openai_turn.py`, `_claude_code_turn.py`, `_codex_turn.py`) repeat the same shape: wrap a tap's event stream + normalize provider usage into `TurnUsage`. + +**Files:** +- Create: `src/agentex/lib/core/harness/usage.py` (`normalize_usage(...)`) — or a `HarnessTurnBase` mixin in `core/harness/types.py` +- Create: `tests/lib/core/harness/test_usage.py` +- Modify: the five turn modules + +- [ ] **Step 1: Diff the five turn impls** for the common shape. + +Run: `wc -l src/agentex/lib/adk/_modules/_pydantic_ai_turn.py src/agentex/lib/adk/_modules/_langgraph_turn.py src/agentex/lib/adk/providers/_modules/openai_turn.py src/agentex/lib/adk/_modules/_claude_code_turn.py src/agentex/lib/adk/_modules/_codex_turn.py` +Note the existing `claude_code_usage_to_turn_usage` / `codex_usage_to_turn_usage` helpers — these are exactly the per-harness normalizers to converge. + +- [ ] **Step 2: Write the shared primitive (TDD).** Add `test_usage.py` asserting `normalize_usage` maps representative provider usage into the correct `TurnUsage` fields (aligning with `agentex.lib.core.observability.llm_metrics`). Implement `usage.py` to pass. + +Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/test_usage.py -q` → PASS. + +- [ ] **Step 3: Route each turn module through the primitive,** leaving only provider-specific mapping. Do NOT force-fit a harness whose usage genuinely diverges (check codex — it is the largest for a reason; document if you skip it). + +- [ ] **Step 4: Verify + commit** + +Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` +```bash +git add src/agentex/lib/core/harness/usage.py tests/lib/core/harness/test_usage.py src/agentex/lib/adk/ +git commit -m "refactor(harness): extract shared TurnUsage normalization primitive" +``` + +### Task B3: Converge the sync-path structures (item 9 — overlaps Batch I) + +"Sync delivery" was built three ways: openai patches `providers/_modules/sync_provider.py` (+ `openai_turn.py`); pydantic-ai/langgraph use `_*_sync.py`; claude/codex use `_claude_code_sync.py`/`_codex_sync.py`. + +- [ ] **Step 1: Adopt the per-harness `__sync.py` convention** (the majority pattern, and the target end-state in Batch I). Document the choice in the commit body. +- [ ] **Step 2: Align openai to it** — this is the structural half of Batch I's openai relocation; do them together (Task I2). +- [ ] **Step 3: Verify + commit** + +Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` +```bash +git add src/agentex/lib/adk/ +git commit -m "refactor(harness): converge the five sync paths on the __sync.py convention" +``` + +--- + +## Batch C — Reconcile the facade + update harness.md (items 10, 3) — needs PR 9 merged + +### Task C1: Fold the claude/codex ad-hoc exports into the #423 facade (item 10) + +Verified on `next` (pre-PR-9): `adk/__init__.py` already exports ad-hoc per-harness symbols — `convert_claude_code_to_agentex_events`, `ClaudeCodeTurn`, `claude_code_usage_to_turn_usage`, `convert_codex_to_agentex_events`, `CodexTurn`, `codex_usage_to_turn_usage` (plus the existing pydantic/langgraph taps + the deprecated `create_*_tracing_handler`). PR 9 adds the unified facade block (`UnifiedEmitter`, `SpanTracer`, `HarnessTurn`, `OpenSpan`, `CloseSpan`, `SpanSignal`, `StreamTaskMessage`, `TurnUsage`, `TurnResult`). + +**Files:** `src/agentex/lib/adk/__init__.py` + +- [ ] **Step 1: After rebasing onto PR-9'd `next`,** grep the facade region: + +Run: `grep -n "harness\|UnifiedEmitter\|convert_.*_to_agentex_events\|Turn\b\|usage_to_turn_usage" src/agentex/lib/adk/__init__.py` + +- [ ] **Step 2: Deduplicate.** Ensure every public harness symbol is imported once and listed once in `__all__`, organized under the unified facade block from #423. Remove duplicate import lines / `__all__` entries. Preserve the `# ruff: noqa: I001` ordering comment and the circular-import-safe ordering. + +- [ ] **Step 3: Verify the surface imports cleanly** + +Run: `uv run --all-packages python -c "import agentex.lib.adk as adk; assert len(adk.__all__) == len(set(adk.__all__)), 'dupes'; print('ok')"` +Expected: `ok`. + +- [ ] **Step 4: Lint + commit** + +Run: `uv run ruff check src/agentex/lib/adk/__init__.py && uv run pyright src/agentex/lib/adk/__init__.py` +```bash +git add src/agentex/lib/adk/__init__.py +git commit -m "refactor(adk): fold claude/codex exports into the single #423 harness facade" +``` + +### Task C2: (Decision-gated) Introduce the `adk.harness` namespace (item 3) + +> Team decision required; polish, not required. If declined, skip and record it in the PR body; `harness.md` keeps the flat `agentex.lib.adk` paths. + +**Files (if adopted):** Create `src/agentex/lib/adk/harness.py` (re-export the surface + taps); modify `adk/__init__.py` to keep flat re-exports for one release (back-compat). + +- [ ] **Step 1:** Create the namespace re-exporting `UnifiedEmitter`, `SpanTracer`, `HarnessTurn`, `OpenSpan`, `CloseSpan`, `SpanSignal`, `StreamTaskMessage`, `TurnUsage`, `TurnResult`, and each `convert__to_agentex_events` tap. +- [ ] **Step 2:** Keep flat `adk.*` re-exports with a comment they're retained for one release, slated to drop in a later major. +- [ ] **Step 3: Verify both paths** + +Run: `uv run --all-packages python -c "from agentex.lib.adk.harness import UnifiedEmitter; from agentex.lib.adk import UnifiedEmitter; print('ok')"` +- [ ] **Step 4: Commit** (`refactor(adk): add adk.harness namespace, keep flat re-exports for back-compat`). + +### Task C3: Update `adk/docs/harness.md` to the final merged surface (MANDATORY) + +> Explicitly requested: keep `harness.md` up to date and update the docs in PR 10. + +**Files:** `adk/docs/harness.md` (arrives via PR 9) + +- [ ] **Step 1: Complete the taps table.** Replace "Taps for claude-code and codex will be added in subsequent PRs (AGX1-420, AGX1-421)" with the merged reality — list all five shipped harnesses (pydantic-ai, LangGraph, OpenAI Agents, claude-code, codex), each with its `convert__to_agentex_events` tap, all exported from `agentex.lib.adk`. Remove the "will be added" sentence. + +- [ ] **Step 2: Fix the sync ACP example.** The current "Sync ACP (pydantic-ai tap)" example builds a `UnifiedEmitter` then yields the tap directly, leaving the emitter unused (Greptile flagged this on #423) under a "pre-unified sync path" caveat. Replace with the canonical post-migration flow: + +```python +import agentex.lib.adk as adk +from agentex.lib.adk import UnifiedEmitter, PydanticAITurn # Turn wrapper implements HarnessTurn + +@acp.on_message_send +async def handle(params): + task_id = params.task.id + async with adk.tracing.span(trace_id=task_id, name="message", ...) as turn_span: + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn = PydanticAITurn(pydantic_stream) + async for event in emitter.yield_turn(turn): + yield event +``` + +Delete the "For the pre-unified sync path the tap is still yielded directly..." paragraph. + +- [ ] **Step 3: Reconcile import paths with the C2 decision.** If `adk.harness` adopted, show it as primary in the import block + examples (note flat path retained one release). Else leave flat paths. + +- [ ] **Step 4: Reflect the Batch I module layout** if I lands before H — any path references in the doc (e.g. "implementation lives at `src/agentex/lib/core/harness/`") stay correct, but if examples name `_modules` paths, update to the consolidated `__sync.py`/`__turn.py` names. + +- [ ] **Step 5: Guard against dangling references** + +Run: `grep -n "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler\|AgentexLangGraphTracingHandler\|AgentexPydanticAITracingHandler" adk/docs/harness.md` +Expected: zero (so Batch F's removals leave no dangling doc reference). + +- [ ] **Step 6: Commit** + +```bash +git add adk/docs/harness.md +git commit -m "docs(harness): update harness.md to the final merged surface (all taps, canonical yield_turn example)" +``` + +--- + +## Batch D — Remove vestigial conformance paths (item 4) + +Note: `derive_all` (in `conformance/runner.py`) is **actively used** by the determinism tests — keep it. Look only for genuinely unreferenced simple/determinism-only runner code. + +- [ ] **Step 1: Find unused runner paths** + +Run: `grep -rn "derive_all\|simple_runner\|determinism_only\|run_cross_channel" tests/lib/core/harness/ src/agentex/lib/core/harness/` +For each hit, confirm whether anything still imports it after the cross-channel runner (#414) became the single entry point. + +- [ ] **Step 2: Remove dead paths** nothing imports. Keep `derive_all` and `run_cross_channel_conformance` (live). + +- [ ] **Step 3: Verify + commit** + +Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/conformance/ -q` +```bash +git add tests/lib/core/harness/ +git commit -m "test(harness): remove vestigial simple-conformance-runner paths" +``` + +--- + +## Batch G — Integration-test coverage parity (item 12) + +Verified: only pydantic-ai + langgraph ship `test_harness_*_{sync,async,temporal}` suites. openai/claude/codex ship only conformance + turn/sync tests. + +**Files:** +- Create (if adding parity): `tests/lib/core/harness/test_harness_openai_{sync,async,temporal}.py`, `..._claude_code_{...}.py`, `..._codex_{...}.py` +- Modify: the harness live-matrix workflow (collapse the two near-identical matrix jobs into one) + +- [ ] **Step 1: Decide parity vs documented difference** (with the team). Either add the missing suites mirroring the pydantic-ai shape (importing the Batch A `_fakes`), or document the intentional gap in `harness.md` / a test README. +- [ ] **Step 2 (if adding): write them against the shared fakes.** + +Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ -q` → green. +- [ ] **Step 3: Collapse the two matrix jobs into one** parametrized matrix (enabled now that fakes are shared). + +Run: `grep -rn "matrix\|harness" .github/workflows/*.yml` +- [ ] **Step 4: Commit** + +```bash +git add tests/lib/core/harness/ .github/workflows/ +git commit -m "test(harness): add integration-suite parity and collapse the live matrix to one job" +``` + +--- + +## Batch E — Tutorial standardization + retirement (items 11, 13) — BREAKING + +> Gated with Batch F: the surviving tutorials must not import the symbols F removes; the old tutorials deleted here are the ones that DO import them. Execute E and F in the same commit range. + +Verified dual tutorial sets on `next`: + +| Framework | Pre-unified (RETIRE) | Unified-surface (KEEP, rename into slot) | +|---|---|---| +| langgraph | `00_sync/030_langgraph`, `10_async/00_base/100_langgraph`, `10_async/10_temporal/130_langgraph` | `harness_langgraph` ×3 | +| pydantic-ai | `00_sync/040_pydantic_ai`, `10_async/00_base/110_pydantic_ai`, `10_async/10_temporal/110_pydantic_ai` | `harness_pydantic_ai` ×3 | +| openai | `00_sync/050_openai_agents_local_sandbox`, `10_async/00_base/120_openai_agents_local_sandbox`, `10_async/10_temporal/120_openai_agents_local_sandbox` | `060_harness_openai`, `130_harness_openai`, `140_harness_openai` | +| claude-code | — (none; already numbered) | `060/130/140_claude_code` — KEEP, no rename | +| codex | — (net-new) | `harness_codex` ×3 → fresh `NNN_codex` numbers | + +`090_claude_agents_sdk_mvp` is the Agents SDK (not the claude-code harness) — KEEP untouched. + +### Task E1: Replace-in-place onto the numbered `NNN_` paradigm + +- [ ] **Step 1: Inventory** — `find examples/tutorials -name manifest.yaml | sort`. Confirm both sets above exist. +- [ ] **Step 2: Confirm the old dirs use the deprecated path** + +Run: `grep -rln "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler\|stream_langgraph_events\|stream_pydantic_ai_events" examples/tutorials/` +Expected: the pre-unified dirs (the ones to retire) show up. + +- [ ] **Step 3: Replace in place, one framework at a time.** For each: `git rm -r` the pre-unified dir, then `git mv` the unified `harness_*` dir into that numbered slot (or copy-then-delete where content must merge). Mapping: + - `harness_pydantic_ai` → `00_sync/040_pydantic_ai`, `10_async/00_base/110_pydantic_ai`, `10_async/10_temporal/110_pydantic_ai` + - `harness_langgraph` → `00_sync/030_langgraph`, `10_async/00_base/100_langgraph`, `10_async/10_temporal/130_langgraph` + - `060_harness_openai`/`130_harness_openai`/`140_harness_openai` → drop the `harness_` infix into the retired openai slots (`NNN_openai_*`) + - `harness_codex` ×3 → fresh `NNN_codex` numbers consistent with the sequence + +- [ ] **Step 4: Confirm survivors are clean** + +Run: `grep -rln "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler" examples/tutorials/` +Expected: zero matches. + +- [ ] **Step 5: Standardize per-tutorial scaffolding (item 11).** Add the shared `.dockerignore` to the langgraph + codex tutorials (byte-identical to the pydantic-ai/openai/claude copy). Decide `conftest.py` (present only in codex): promote to shared test setup or remove — apply uniformly. + +- [ ] **Step 6: Fix index/README cross-links** + +Run: `grep -rln "harness_pydantic_ai\|harness_langgraph\|harness_openai\|harness_codex" examples/ docs/ README.md` +Update every reference to the new numbered path. Expected after: zero stale references. + +- [ ] **Step 7: Confirm glob discovery unaffected** + +Run: `grep -n "harness_\|030_langgraph\|040_pydantic_ai\|050_openai" .github/workflows/agentex-tutorials-test.yml` +Expected: no hardcoded references to renamed/removed dirs (discovery is by `manifest.yaml` glob). + +- [ ] **Step 8: Commit** combined with Batch F (Task F3). + +--- + +## Batch F — Remove deprecated tracing handlers + workaround markers (items 1, 2) — BREAKING + +### Task F1: Delete the deprecated bespoke tracing handlers (item 1) + +**Files:** +- Delete: `src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py` (`create_pydantic_ai_tracing_handler`, `AgentexPydanticAITracingHandler`) +- Delete: `src/agentex/lib/adk/_modules/_langgraph_tracing.py` (`create_langgraph_tracing_handler`, `AgentexLangGraphTracingHandler`) +- Modify: `src/agentex/lib/adk/__init__.py` (remove the two imports + two `__all__` entries) +- Delete: tests that exist only to exercise the deprecated path + +> **⚠ openai shim is NOT in this task.** `SyncStreamingModel`/`SyncStreamingProvider` in `providers/_modules/sync_provider.py` are **load-bearing** — referenced by the live CLI template `src/agentex/lib/cli/templates/sync-openai-agents/project/acp.py.j2`. They are the supported sync-openai delivery path, not a deprecated tracing shim. Do NOT delete them here. Their relocation/renaming is handled in Batch I (Task I2) and only after the template is updated. + +- [ ] **Step 1: Prove zero live references** + +Run: `grep -rn "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler\|AgentexLangGraphTracingHandler\|AgentexPydanticAITracingHandler" src/ tests/ examples/` +Expected after Batch E: matches only in the modules being deleted, their dedicated tests, and `adk/__init__.py`. If anything else matches (esp. the golden agent), STOP — precondition 3 unmet. + +- [ ] **Step 2: Delete the modules + exports.** `git rm` both `_*_tracing.py`. In `adk/__init__.py` remove the two `from ..._*_tracing import create_*_tracing_handler` lines and the two `"create_*_tracing_handler"` `__all__` entries. Delete the dedicated deprecated-path tests. Keep any genuinely-shared helper they used if still referenced (grep first). + +- [ ] **Step 3: Verify** + +Run: `grep -rn "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler" src/ tests/ examples/` → zero. +Run: `uv run --all-packages python -c "import agentex.lib.adk as adk; print('ok')"` → `ok`. + +### Task F2: Remove resolved-workaround markers + stale docstrings (item 2) + +Verified on `next`: many `AGX1-377`/`AGX1-378` references exist across `_langgraph_async.py`, `_langgraph_sync.py`, `_langgraph_turn.py`, `_pydantic_ai_turn.py`, `core/harness/auto_send.py`, `core/services/adk/providers/openai.py`, the conformance runner, and many test docstrings. **Most describe the LANDED fix / current contract** (e.g. "AGX1-377 fix: auto_send now delivers streamed tool-request messages", "AGX1-378 restored: created_at is now threaded through", "LangGraph emits tool requests as Full events"). + +**Files (resolve at execution):** `src/agentex/lib/core/harness/auto_send.py`, the per-harness turn/async/sync modules, `src/agentex/lib/core/services/adk/providers/openai.py`, the conformance runner, and test docstrings. + +- [ ] **Step 1: Find the breadcrumbs** + +Run: `grep -rn "AGX1-377\|AGX1-378\|workaround\|coalescing\|created_at limitation" src/ tests/` + +- [ ] **Step 2: Trim the historical framing, keep the current contract.** For each hit: if it documents *why the code currently behaves this way* (e.g. LangGraph Full-event tool requests, `created_at` threading) keep the explanation but strip the now-meaningless ticket-number / "workaround"/"note:" framing. Delete only comments describing removed transitional state. **No code-behavior change in this task** — comments/docstrings only. + +- [ ] **Step 3: Verify** + +Run: `grep -rn "AGX1-377\|AGX1-378" src/ tests/` +Expected: zero (or only deliberately-kept current-contract notes, justified in the commit body). + +### Task F3: Verify the breaking batch + commit E+F together + +- [ ] **Step 1:** `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` → green. +- [ ] **Step 2:** `uv run ruff check src/agentex/lib/adk/ src/agentex/lib/core/harness/ && uv run pyright src/agentex/lib/adk/__init__.py` → clean. +- [ ] **Step 3: Commit the breaking set** + +```bash +git add -A +git commit -m "refactor(harness)!: remove deprecated tracing handlers, retire pre-unified tutorials, drop resolved-workaround markers + +BREAKING CHANGE: removes the docstring-deprecated create_langgraph_tracing_handler / +create_pydantic_ai_tracing_handler and their handler classes from the public adk surface. +Use UnifiedEmitter + the convert__to_agentex_events taps instead." +``` + +--- + +## Batch I — Filesystem layout + naming consolidation (NEW) — BREAKING + +The harness modules landed in different spots with different names. Target end-state (per the directive: **every provider has just a `sync.py` and a `turn.py`, all under `adk/_modules/`, openai pulled out of `providers/_modules/`**): + +| Harness | Final source files (all under `src/agentex/lib/adk/_modules/`) | +|---|---| +| pydantic-ai | `_pydantic_ai_sync.py`, `_pydantic_ai_turn.py` | +| langgraph | `_langgraph_sync.py`, `_langgraph_turn.py` | +| claude-code | `_claude_code_sync.py`, `_claude_code_turn.py` (already correct) | +| codex | `_codex_sync.py`, `_codex_turn.py` (already correct) | +| openai | `_openai_sync.py`, `_openai_turn.py` (MOVED from `providers/_modules/`) | + +Removed/folded by this batch (or already by F): `_pydantic_ai_async.py`, `_langgraph_async.py`, `_langgraph_messages.py`, `_pydantic_ai_tracing.py` (F), `_langgraph_tracing.py` (F), and the `providers/_modules/openai_turn.py` + `sync_provider.py` (relocated/renamed). + +### Task I1: Collapse pydantic-ai / langgraph to `sync.py` + `turn.py` + +**Files:** +- Modify/remove: `src/agentex/lib/adk/_modules/_pydantic_ai_async.py`, `_langgraph_async.py`, `_langgraph_messages.py` +- Modify: `_pydantic_ai_sync.py`, `_pydantic_ai_turn.py`, `_langgraph_sync.py`, `_langgraph_turn.py`, `adk/__init__.py` + +> **Caveat — the async helpers are public.** `stream_pydantic_ai_events`, `stream_langgraph_events`, `run_agent_streamed_auto_send`, and `emit_langgraph_messages` are exported from `adk/__init__.py` and may be imported by consumers/tutorials. After Batch E migrates the tutorials, confirm no consumer needs them: + +Run: `grep -rn "stream_pydantic_ai_events\|stream_langgraph_events\|run_agent_streamed_auto_send\|emit_langgraph_messages" src/ tests/ examples/` + +- [ ] **Step 1:** If a helper is still wanted, fold it into `__sync.py` or `__turn.py` and keep a thin re-export from `adk/__init__.py` for one release; otherwise remove it (changelog the public-symbol removal — adds to Batch H). Decide per-symbol based on the grep. +- [ ] **Step 2:** `git rm` `_*_async.py` / `_langgraph_messages.py` once their content is folded and references updated. Update `adk/__init__.py` imports + `__all__`. +- [ ] **Step 3: Verify** + +Run: `uv run --all-packages --all-extras pytest tests/lib/adk/ tests/lib/core/harness/ -q` → green. +Run: `uv run --all-packages python -c "import agentex.lib.adk; print('ok')"` → `ok`. + +### Task I2: Move openai out of `providers/_modules/` into `_modules/` + +**Files:** +- `git mv src/agentex/lib/adk/providers/_modules/openai_turn.py src/agentex/lib/adk/_modules/_openai_turn.py` +- Create `src/agentex/lib/adk/_modules/_openai_sync.py` from the sync-delivery pieces of `providers/_modules/sync_provider.py` (and the harness-tap `convert_openai_to_agentex_events`), aligning naming with the other four. +- Decide placement of `providers/_modules/openai.py` (the ~745-line Temporal **activities** provider): if it is a provider-activity module rather than a harness tap, it may stay under `providers/`; the directive is about the harness surface. Confirm with the grep below before moving it. +- Update importers: `adk/__init__.py`, `src/agentex/lib/cli/templates/sync-openai-agents/project/acp.py.j2` (imports `SyncStreamingProvider, convert_openai_to_agentex_events` from `agentex.lib.adk.providers._modules.sync_provider`), and any test. + +- [ ] **Step 1: Inventory every openai import path** + +Run: `grep -rn "providers._modules.openai\|providers/_modules/openai\|sync_provider\|openai_turn\|SyncStreamingProvider\|convert_openai_to_agentex_events" src/ tests/ examples/` + +- [ ] **Step 2: Move + rename** the harness-surface modules into `_modules/_openai_sync.py` / `_openai_turn.py`. Keep `SyncStreamingProvider`/`SyncStreamingModel` (they are the supported sync path) — relocate them into `_openai_sync.py` (or keep a re-export shim at the old path for one release so the CLI template keeps working until updated). +- [ ] **Step 3: Update the CLI template** `acp.py.j2` import to the new path. Update `adk/__init__.py` and tests. +- [ ] **Step 4: Move the openai tests** `tests/lib/adk/providers/test_openai_turn.py` → `tests/lib/adk/test_openai_turn.py` (and `test_openai_activities.py` per the openai.py decision) so openai tests sit alongside the other four harnesses. +- [ ] **Step 5: Verify** + +Run: `grep -rn "providers._modules.openai_turn\|providers._modules.sync_provider" src/ tests/ examples/` → zero (or only the deliberate one-release shim). +Run: `uv run --all-packages --all-extras pytest tests/lib/adk/ tests/lib/core/harness/ -q` → green. +Run: `uv run --all-packages python -c "import agentex.lib.adk; print('ok')"` → `ok`. + +### Task I3: Normalize test naming (`_sync.py` vs `_sync_unified.py`) + +Verified duplicate-ish test files: `tests/lib/adk/test_langgraph_sync.py` + `test_langgraph_sync_unified.py`, and `test_pydantic_ai_sync.py` + `test_pydantic_ai_sync_unified.py`. + +- [ ] **Step 1: Diff each pair** to see whether `_unified` is the post-migration replacement of the pre-unified `_sync` test or genuinely separate coverage. +- [ ] **Step 2:** Merge into one `test__sync.py` per harness (folding still-relevant cases), or rename consistently. Remove the redundant file. +- [ ] **Step 3: Verify + commit I1–I3 together** + +Run: `uv run --all-packages --all-extras pytest tests/lib/adk/ tests/lib/core/harness/ -q` → green. +Run: `uv run ruff check src/ tests/ && uv run pyright src/agentex/lib/adk/__init__.py` → clean. +```bash +git add -A +git commit -m "refactor(harness)!: consolidate harness modules to __sync.py + __turn.py under _modules/ (openai moved out of providers/_modules)" +``` + +--- + +## Batch H — Final docs, changelog, and stale-plan-doc removal + +### Task H1: Remove the stale unified-harness plan doc(s) + +The pre-unified planning docs for the now-merged stack are obsolete. + +**Files:** `git rm docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md` (and any sibling `*-unified-harness-*` plan doc that lands). + +- [ ] **Step 1:** `ls docs/superpowers/plans/` and remove the unified-harness-surface plan doc(s). Keep this PR-10 plan until PR 10 itself merges. +- [ ] **Step 2: Commit** (`docs: remove stale unified-harness-surface planning doc (stack merged)`). + +### Task H2: Final docs consistency pass + changelog + +- [ ] **Step 1:** Re-read `adk/docs/harness.md` end-to-end against the post-E/F/I tree; confirm every symbol, tap, example, and module path matches reality. +- [ ] **Step 2: Re-grep for any stale reference** + +Run: `grep -rln "harness_pydantic_ai\|harness_langgraph\|harness_openai\|harness_codex\|create_.*_tracing_handler\|providers._modules.openai_turn\|sync_provider" examples/ docs/ adk/docs/ src/agentex/lib/cli/templates/ README.md` +Expected: zero (or only deliberate one-release shims, noted in the changelog). + +- [ ] **Step 3: Add the changelog / release note** documenting the breaking removals: `create_langgraph_tracing_handler` / `create_pydantic_ai_tracing_handler` (+ classes), any removed `stream_*_events`/`emit_langgraph_messages` public helper, the openai module relocation (new import path), and the `adk.harness` namespace if adopted. +- [ ] **Step 4: Commit** (`docs(harness): final docs consistency pass + changelog for the harness-cleanup removals`). + +--- + +## Verification (whole PR) + +- Grep the whole repo (and confirm with the golden agent / known consumers) for each removed symbol — zero references before deletion (Task F1 Step 1, I1/I2 inventories). +- After Batch A: the shared `_fakes` module is the only definition of the fakes; the determinism test exists once — grep confirms no per-file/per-harness copies. +- After Batch B/I: the five harnesses each have exactly `__sync.py` + `__turn.py` under `adk/_modules/`; openai no longer lives under `providers/_modules/`; the turn modules use the shared usage normalizer. `ls src/agentex/lib/adk/_modules/_*.py` shows the 10 expected files (+ any deliberately-kept shim). +- After Batch E: exactly one tutorial agent per framework per tier; none import deprecated symbols; tutorial CI job + index/README links resolve. +- `adk/docs/harness.md` documents all five taps, uses the canonical `yield_turn` example with no unused variable, and references no deprecated symbol or old module path. +- The `sync-openai-agents` CLI template imports the new openai path and renders/runs. +- Full `./scripts/test` on Python 3.12 AND 3.13. **Run the two versions separately or in shorter scoped batches** — the dual-version `./scripts/test` in one shot has tripped a 600s no-output watchdog; prefer scoped runs or background with periodic output. +- `./scripts/lint` clean (whole-repo ruff + pyright). +- Changelog / release note present (Task H2). + +## Risk + +Removing publicly-exported (deprecated) symbols and relocating public module paths are breaking changes — gate Batches E/F/I on the version-bump policy and on confirming the golden agent + any external consumers are migrated. The openai relocation touches a live CLI template; keep a one-release re-export shim if any external code may import the old path. Everything here is recoverable from history; sequence it as the final, deliberate cleanup of the harness-surface workstream. Batches A–D and G are non-breaking and can ship earlier if E/F/I are blocked. + +--- + +## Appendix — scope-item → batch mapping (auditable) + +| Scope item | Batch/Task | +|---|---| +| 1 — delete deprecated tracing handlers | F1 | +| 2 — remove resolved-workaround markers | F2 | +| 3 — adk.harness namespace (optional) | C2 | +| 4 — vestigial conformance runner | D | +| 5 — dead sync/async branches | B1 / I1 | +| 6 — shared test fakes | A1 | +| 7 — parametrize determinism test | A2 | +| 8 — shared usage normalization | B2 | +| 9 — converge sync paths | B3 / I | +| 10 — reconcile adk/__init__.py edits | C1 | +| 11 — tutorial consistency pass | E1 | +| 12 — integration-test parity | G | +| 13 — retire duplicate tutorials | E1 | +| NEW — filesystem layout + naming (sync.py/turn.py, openai→_modules) | I | +| NEW — remove stale unified-harness plan doc | H1 | + +Cross-cutting facts to preserve: +- Items 1 and 13 are coupled — the pre-unified tutorials import the symbols item 1 removes; retire them in the same commit range (Batches E+F). +- Item 11's renames ARE item 13's retirement — one operation, not two. +- Settled tutorial decision: **replace in place on the numbered `NNN_` paradigm**; codex takes fresh `NNN_codex` numbers; `090_claude_agents_sdk_mvp` (Agents SDK, not the claude-code harness) stays. +- The openai `SyncStreamingModel`/`SyncStreamingProvider` are load-bearing (CLI template) — relocate in Batch I with a shim, do NOT delete in Batch F. +- Non-breaking (A–D, G, H1) vs breaking (E, F, I) — split if the version-bump policy blocks the breaking set. From 577514611581c7e236d2996744c4f4b61640e512 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 19:02:18 -0400 Subject: [PATCH 02/12] docs(harness): refresh harness.md to the merged surface (all five taps, canonical yield_turn example) Completes the taps table with claude-code/codex/openai, names the per-harness HarnessTurn wrappers, and replaces the pre-unified sync example (which left UnifiedEmitter unused) with the canonical emitter.yield_turn(turn) flow. Co-Authored-By: Claude Opus 4.8 (1M context) --- adk/docs/harness.md | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/adk/docs/harness.md b/adk/docs/harness.md index 6a9d8947a..d81835a03 100644 --- a/adk/docs/harness.md +++ b/adk/docs/harness.md @@ -39,14 +39,17 @@ Every harness tap produces a sequence of these. Everything downstream (delivery, ## Per-harness taps: `convert__to_agentex_events` -A tap is an async generator that translates the harness's native event stream into `StreamTaskMessage*` events. The currently shipped taps are: +A tap is an async generator that translates the harness's native event stream into `StreamTaskMessage*` events. The shipped taps are: | Harness | Tap function | Exported from | |---|---|---| | pydantic-ai | `convert_pydantic_ai_to_agentex_events` | `agentex.lib.adk` | | LangGraph | `convert_langgraph_to_agentex_events` | `agentex.lib.adk` | +| claude-code | `convert_claude_code_to_agentex_events` | `agentex.lib.adk` | +| codex | `convert_codex_to_agentex_events` | `agentex.lib.adk` | +| OpenAI Agents | `convert_openai_to_agentex_events` | `agentex.lib.adk.providers._modules.sync_provider` | -Taps for claude-code and codex will be added in subsequent PRs (AGX1-420, AGX1-421) and exported from `agentex.lib.adk` in the same way. +Each harness also provides a `HarnessTurn` wrapper that pairs its tap's event stream with usage extraction: `PydanticAITurn`, `LangGraphTurn`, `ClaudeCodeTurn`, `CodexTurn`, and `OpenAITurn`. --- @@ -157,11 +160,13 @@ Spans are derived from the canonical stream by `SpanDeriver` (pure, no `adk` dep ## Usage examples by channel -### Sync ACP (pydantic-ai tap) +### Sync ACP (`yield_turn`) + +Build the harness's `HarnessTurn` wrapper and iterate `emitter.yield_turn(turn)` — the emitter forwards each event to the caller and traces spans as a side effect: ```python import agentex.lib.adk as adk -from agentex.lib.adk import UnifiedEmitter, convert_pydantic_ai_to_agentex_events +from agentex.lib.adk import UnifiedEmitter, ClaudeCodeTurn @acp.on_message_send async def handle(params): @@ -172,13 +177,12 @@ async def handle(params): trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, ) - tap = convert_pydantic_ai_to_agentex_events(pydantic_stream) - # wrap tap in a HarnessTurn then yield_turn, or yield directly: - async for event in tap: + turn = ClaudeCodeTurn(claude_code_stream) # any HarnessTurn + async for event in emitter.yield_turn(turn): yield event ``` -For the pre-unified sync path the tap is still yielded directly; `UnifiedEmitter.yield_turn` is the forward-looking integration point when a `HarnessTurn` wrapper is available. +Every harness follows the same shape — swap `ClaudeCodeTurn` for `PydanticAITurn`, `LangGraphTurn`, `CodexTurn`, or `OpenAITurn` and feed it that harness's native stream. ### Async Temporal (auto-send) From 006028e61369cbc504fb10f7fc556bd92fb63f88 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 19:10:36 -0400 Subject: [PATCH 03/12] test(harness): extract shared FakeSpan/FakeTracing + parametrize determinism test once Adds tests/lib/core/harness/_fakes.py with a single superset FakeSpan/FakeTracing (started/ended/ended_spans plus started_names/started_pairs/ended_outputs views) and migrates every consumer off its local copy. Keeps the conformance determinism test once (parametrized over all_fixtures) and drops the per-harness copies. run_yield_turn and test_langgraph_sync_unified's _FakeTracingBackend left in place (genuinely divergent). Co-Authored-By: Claude Opus 4.8 (1M context) --- .../lib/adk/test_pydantic_ai_sync_unified.py | 28 ++------- tests/lib/core/harness/_fakes.py | 63 +++++++++++++++++++ tests/lib/core/harness/conformance/runner.py | 30 +-------- .../conformance/test_codex_conformance.py | 13 +--- .../conformance/test_langgraph_conformance.py | 13 +--- .../test_pydantic_ai_conformance.py | 12 ---- tests/lib/core/harness/test_auto_send.py | 20 ++---- tests/lib/core/harness/test_emitter.py | 11 +--- .../harness/test_harness_langgraph_async.py | 29 +-------- .../harness/test_harness_langgraph_sync.py | 31 +-------- .../harness/test_harness_pydantic_ai_async.py | 38 +---------- .../harness/test_harness_pydantic_ai_sync.py | 48 +++----------- tests/lib/core/harness/test_tracer.py | 35 +++-------- tests/lib/core/harness/test_yield_delivery.py | 27 +++----- 14 files changed, 110 insertions(+), 288 deletions(-) create mode 100644 tests/lib/core/harness/_fakes.py diff --git a/tests/lib/adk/test_pydantic_ai_sync_unified.py b/tests/lib/adk/test_pydantic_ai_sync_unified.py index f920418de..2e58ef5b2 100644 --- a/tests/lib/adk/test_pydantic_ai_sync_unified.py +++ b/tests/lib/adk/test_pydantic_ai_sync_unified.py @@ -25,6 +25,7 @@ ) from agentex.lib.core.harness import UnifiedEmitter +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn @@ -37,25 +38,6 @@ async def _collect(stream: AsyncIterator[Any]) -> list[Any]: return [e async for e in stream] -class _FakeSpan: - def __init__(self, name: str): - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, str | None, Any]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None): - self.started.append((name, parent_id, input)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id, span): - self.ended.append((span.name, span.output)) - - def _make_result_event(usage: RunUsage | None = None) -> AgentRunResultEvent: result = AgentRunResult(output="done", _output_tool_name=None) if usage is not None: @@ -129,7 +111,7 @@ async def test_tool_span_opened_and_closed(self): ), ] - fake = _FakeTracing() + fake = FakeTracing() turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o") emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) @@ -152,7 +134,7 @@ async def test_reasoning_span_opened_and_closed(self): PartEndEvent(index=0, part=ThinkingPart(content="let me think")), ] - fake = _FakeTracing() + fake = FakeTracing() turn = PydanticAITurn(_aiter(reasoning_events), model="openai:gpt-4o") emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) @@ -177,7 +159,7 @@ async def test_no_trace_id_means_no_spans(self): ), ] - fake = _FakeTracing() + fake = FakeTracing() turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, tracing=fake) @@ -199,7 +181,7 @@ async def test_tracer_false_suppresses_spans_even_with_trace_id(self): ), ] - fake = _FakeTracing() + fake = FakeTracing() turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracer=False, tracing=fake) diff --git a/tests/lib/core/harness/_fakes.py b/tests/lib/core/harness/_fakes.py new file mode 100644 index 000000000..f9fd34a45 --- /dev/null +++ b/tests/lib/core/harness/_fakes.py @@ -0,0 +1,63 @@ +"""Shared test doubles for the unified harness test suites. + +A single superset implementation of the in-memory tracing backend used across +the harness tests. Three recording shapes were previously duplicated: + +- Shape-1 (richest): ``started`` = ``(name, parent_id, input)`` 3-tuples, + ``ended`` = ``(name, output)`` 2-tuples, plus an ``ended_spans`` list of the + closed ``FakeSpan`` objects (which carry ``.name``, ``.output``, ``.data``). +- Shape-2: ``started`` = ``(name, parent_id)`` 2-tuples, ``ended`` = + ``(name, output)``. +- Shape-3: ``started`` = bare names, ``ended`` = bare outputs. + +``FakeTracing`` records the richest (shape-1) form and exposes read-only +convenience properties (``started_names``, ``started_pairs``, +``ended_outputs``) so shape-2 and shape-3 assertions stay clean. +""" + +from __future__ import annotations + +from typing import Any + + +class FakeSpan: + def __init__(self, name: str) -> None: + self.name = name + self.output: Any = None + self.data: Any = None + + +class FakeTracing: + def __init__(self) -> None: + self.started: list[tuple[str, Any, Any]] = [] + self.ended: list[tuple[str, Any]] = [] + self.ended_spans: list[FakeSpan] = [] + + async def start_span( + self, + *, + trace_id: str, + name: str, + input: Any = None, + parent_id: Any = None, + data: Any = None, + task_id: Any = None, + ) -> FakeSpan: + self.started.append((name, parent_id, input)) + return FakeSpan(name) + + async def end_span(self, *, trace_id: str, span: FakeSpan) -> None: + self.ended.append((span.name, span.output)) + self.ended_spans.append(span) + + @property + def started_names(self) -> list[str]: + return [name for (name, _parent, _input) in self.started] + + @property + def started_pairs(self) -> list[tuple[str, Any]]: + return [(name, parent) for (name, parent, _input) in self.started] + + @property + def ended_outputs(self) -> list[Any]: + return [output for (_name, output) in self.ended] diff --git a/tests/lib/core/harness/conformance/runner.py b/tests/lib/core/harness/conformance/runner.py index 84e84fa51..a88c73e05 100644 --- a/tests/lib/core/harness/conformance/runner.py +++ b/tests/lib/core/harness/conformance/runner.py @@ -64,12 +64,12 @@ from __future__ import annotations import json -import types as _types from typing import Any, NamedTuple, override from dataclasses import dataclass from agentex.types.text_delta import TextDelta from agentex.types.task_message import TaskMessage +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import SpanSignal, StreamTaskMessage from agentex.lib.core.harness.tracer import SpanTracer from agentex.types.task_message_update import ( @@ -296,30 +296,6 @@ def streaming_task_message_context( return _FakeCtx(self.sink, ctype, initial_content) -class _FakeTracing: - """Minimal tracing backend: records started/ended span names + outputs.""" - - def __init__(self) -> None: - self.started: list[str] = [] - self.ended: list[Any] = [] - - async def start_span( - self, - *, - trace_id: str, - name: str, - input: Any = None, - parent_id: Any = None, - data: Any = None, - task_id: Any = None, - ) -> Any: - self.started.append(name) - return _types.SimpleNamespace() - - async def end_span(self, *, trace_id: str, span: Any) -> None: - self.ended.append(getattr(span, "output", None)) - - class _RecordingTracer(SpanTracer): """SpanTracer that records every SpanSignal it actually receives. @@ -486,7 +462,7 @@ async def run_cross_channel_conformance( from agentex.lib.core.harness.yield_delivery import yield_events # --- yield channel --- - tracer_yield = _RecordingTracer(tracing=_FakeTracing()) + tracer_yield = _RecordingTracer(tracing=FakeTracing()) yield_out = [e async for e in yield_events(_gen(fixture.events), tracer=tracer_yield)] # Span signals the yield channel actually emitted to its tracer @@ -496,7 +472,7 @@ async def run_cross_channel_conformance( yield_deliveries = _yield_text_reasoning_seq(_yield_logical_deliveries(yield_out)) # --- auto_send channel --- - tracer_auto = _RecordingTracer(tracing=_FakeTracing()) + tracer_auto = _RecordingTracer(tracing=FakeTracing()) fake_streaming = _FakeStreaming() await auto_send( _gen(fixture.events), diff --git a/tests/lib/core/harness/conformance/test_codex_conformance.py b/tests/lib/core/harness/conformance/test_codex_conformance.py index b00ed2970..b3db4f56e 100644 --- a/tests/lib/core/harness/conformance/test_codex_conformance.py +++ b/tests/lib/core/harness/conformance/test_codex_conformance.py @@ -19,7 +19,7 @@ from agentex.lib.core.harness.types import StreamTaskMessage from agentex.lib.adk._modules._codex_sync import convert_codex_to_agentex_events -from .runner import Fixture, register, derive_all +from .runner import Fixture, register async def _aiter(items: list[Any]) -> AsyncIterator[Any]: @@ -208,17 +208,6 @@ def _build(events: list[Any]) -> list[StreamTaskMessage]: _LOCAL_FIXTURES = [_CODEX_TEXT, _CODEX_TOOL, _CODEX_REASONING, _CODEX_MULTI] -@pytest.mark.parametrize("fixture", _LOCAL_FIXTURES, ids=lambda f: f.name) -def test_codex_span_derivation_is_deterministic(fixture: Fixture) -> None: - """Span derivation over codex events is deterministic (cross-channel guarantee). - - Deriving twice over the same events yields identical signals. This is the - invariant that makes ``yield`` and ``auto_send`` delivery equivalent: both - observe the same event stream, so their tracing side effects are identical. - """ - assert derive_all(fixture.events) == derive_all(fixture.events) - - @pytest.mark.parametrize("fixture", _LOCAL_FIXTURES, ids=lambda f: f.name) def test_codex_events_are_non_empty(fixture: Fixture) -> None: """Every codex fixture yields at least one StreamTaskMessage*.""" diff --git a/tests/lib/core/harness/conformance/test_langgraph_conformance.py b/tests/lib/core/harness/conformance/test_langgraph_conformance.py index 721d6aac5..a8d43aef6 100644 --- a/tests/lib/core/harness/conformance/test_langgraph_conformance.py +++ b/tests/lib/core/harness/conformance/test_langgraph_conformance.py @@ -32,7 +32,7 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.types.reasoning_content_delta import ReasoningContentDelta -from .runner import Fixture, register, derive_all, run_cross_channel_conformance +from .runner import Fixture, register, run_cross_channel_conformance # --------------------------------------------------------------------------- # Fixtures @@ -216,14 +216,3 @@ async def test_cross_channel_equivalence(fixture: Fixture) -> None: assert yield_spans == auto_spans, ( f"[{fixture.name}] span signals differ:\n yield: {yield_spans}\n auto_send: {auto_spans}" ) - - -# --------------------------------------------------------------------------- -# Backward-compatible determinism guard -# --------------------------------------------------------------------------- - - -@pytest.mark.parametrize("fixture", _LANGGRAPH_FIXTURES, ids=lambda f: f.name) -def test_span_derivation_is_deterministic(fixture: Fixture) -> None: - """Span derivation over the same event list is idempotent.""" - assert derive_all(fixture.events) == derive_all(fixture.events) diff --git a/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py index ca8234fda..feac188e4 100644 --- a/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py +++ b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py @@ -39,7 +39,6 @@ from .runner import ( Fixture, register, - derive_all, run_cross_channel_conformance, ) @@ -181,14 +180,3 @@ async def test_cross_channel_equivalence(fixture: Fixture) -> None: assert yield_spans == auto_spans, ( f"[{fixture.name}] span signals differ:\n yield: {yield_spans}\n auto_send: {auto_spans}" ) - - -# --------------------------------------------------------------------------- -# Backward-compatible determinism guard -# --------------------------------------------------------------------------- - - -@pytest.mark.parametrize("fixture", _FIXTURES, ids=lambda f: f.name) -def test_span_derivation_is_deterministic(fixture: Fixture) -> None: - """Span derivation over the same event list is idempotent.""" - assert derive_all(fixture.events) == derive_all(fixture.events) diff --git a/tests/lib/core/harness/test_auto_send.py b/tests/lib/core/harness/test_auto_send.py index 1948e9196..b599f2503 100644 --- a/tests/lib/core/harness/test_auto_send.py +++ b/tests/lib/core/harness/test_auto_send.py @@ -9,13 +9,13 @@ This mirrors _langgraph_async.py lines 62-78 and 100-127. """ -import types as _types from datetime import datetime import pytest from agentex.types.task_message import TaskMessage from agentex.types.text_content import TextContent +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.tracer import SpanTracer from agentex.types.task_message_delta import TextDelta from agentex.types.tool_request_delta import ToolRequestDelta @@ -181,21 +181,9 @@ async def test_auto_send_posts_full_tool_messages(): # --------------------------------------------------------------------------- -class _RecordTracing: - def __init__(self): - self.started, self.ended = [], [] - - async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None): - self.started.append(name) - return _types.SimpleNamespace() - - async def end_span(self, *, trace_id, span): - self.ended.append(getattr(span, "output", None)) - - @pytest.mark.asyncio async def test_auto_send_derives_tool_spans_via_tracer(): - fake_tracing = _RecordTracing() + fake_tracing = FakeTracing() tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake_tracing) streaming = _FakeStreaming() @@ -228,8 +216,8 @@ async def test_auto_send_derives_tool_spans_via_tracer(): result = await auto_send(_gen(events), task_id="task1", tracer=tracer, streaming=streaming) assert result.final_text == "" - assert fake_tracing.started == ["Bash"] - assert fake_tracing.ended == ["ok"] + assert fake_tracing.started_names == ["Bash"] + assert fake_tracing.ended_outputs == ["ok"] # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_emitter.py b/tests/lib/core/harness/test_emitter.py index df155ec44..081ccff5a 100644 --- a/tests/lib/core/harness/test_emitter.py +++ b/tests/lib/core/harness/test_emitter.py @@ -2,6 +2,7 @@ from agentex.types.task_message import TaskMessage from agentex.types.text_content import TextContent +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import TurnUsage from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.task_message_delta import TextDelta @@ -12,14 +13,6 @@ ) -class _FakeTracing: - async def start_span(self, **kw): - return None - - async def end_span(self, **kw): - pass - - class _FakeCtx: """Minimal StreamingTaskMessageContext fake (see test_auto_send.py).""" @@ -84,7 +77,7 @@ async def test_emitter_yield_mode_passes_through(): async def test_emitter_tracing_default_on_when_trace_id_present(): # Inject a fake tracing backend so the test env doesn't need temporalio. # This exercises the default-on path (tracer=None) when trace_id is truthy. - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id="p", tracing=_FakeTracing()) + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id="p", tracing=FakeTracing()) assert emitter.tracer is not None diff --git a/tests/lib/core/harness/test_harness_langgraph_async.py b/tests/lib/core/harness/test_harness_langgraph_async.py index 39bf5bc66..32369fa52 100644 --- a/tests/lib/core/harness/test_harness_langgraph_async.py +++ b/tests/lib/core/harness/test_harness_langgraph_async.py @@ -39,6 +39,7 @@ from agentex.types.task_message import TaskMessage from agentex.types.text_content import TextContent +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import TurnResult from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -102,30 +103,6 @@ def streaming_task_message_context(self, task_id: str, initial_content: Any, **k return ctx -# --------------------------------------------------------------------------- -# Fake tracing backend -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, Any]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span(self, *, trace_id: str, name: str, **kw: Any) -> _FakeSpan: - self.started.append((name, kw.get("parent_id"))) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -142,9 +119,9 @@ async def _gen(): async def _run_auto_send_turn( stream_events: list[tuple[str, Any]], trace_id: str | None = None, -) -> tuple[TurnResult, _FakeStreaming, _FakeTracing | None]: +) -> tuple[TurnResult, _FakeStreaming, FakeTracing | None]: fake_streaming = _FakeStreaming() - fake_tracing = _FakeTracing() if trace_id else None + fake_tracing = FakeTracing() if trace_id else None tracer: SpanTracer | bool = False if trace_id and fake_tracing is not None: diff --git a/tests/lib/core/harness/test_harness_langgraph_sync.py b/tests/lib/core/harness/test_harness_langgraph_sync.py index 9f67dd2b6..89c4d406b 100644 --- a/tests/lib/core/harness/test_harness_langgraph_sync.py +++ b/tests/lib/core/harness/test_harness_langgraph_sync.py @@ -36,6 +36,7 @@ import pytest +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.task_message_update import ( @@ -62,32 +63,6 @@ def _real_langchain_core(): sys.modules.update(saved) -# --------------------------------------------------------------------------- -# Fake tracing backend -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, Any]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span( - self, *, trace_id: str, name: str, input: Any = None, parent_id: Any = None, **kw: Any - ) -> _FakeSpan: - self.started.append((name, parent_id)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -103,8 +78,8 @@ async def _gen(): async def _run_yield_turn( stream_events: list[tuple[str, Any]], trace_id: str | None = None -) -> tuple[list[Any], _FakeTracing | None]: - fake_tracing = _FakeTracing() if trace_id else None +) -> tuple[list[Any], FakeTracing | None]: + fake_tracing = FakeTracing() if trace_id else None tracer: SpanTracer | bool | None = None if trace_id and fake_tracing is not None: tracer = SpanTracer(trace_id=trace_id, parent_span_id=None, task_id="task1", tracing=fake_tracing) diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_async.py b/tests/lib/core/harness/test_harness_pydantic_ai_async.py index 8bda7d020..e9b73e687 100644 --- a/tests/lib/core/harness/test_harness_pydantic_ai_async.py +++ b/tests/lib/core/harness/test_harness_pydantic_ai_async.py @@ -44,6 +44,7 @@ from pydantic_ai.models.test import TestModel from agentex.types.task_message import TaskMessage +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import TurnResult from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -120,39 +121,6 @@ def streaming_task_message_context( return _FakeCtx(self.sink, ctype, initial_content) -# --------------------------------------------------------------------------- -# Fake tracing backend -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, str | None]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span( - self, - *, - trace_id: str, - name: str, - input: Any = None, - parent_id: Any = None, - data: Any = None, - task_id: Any = None, - ) -> _FakeSpan: - self.started.append((name, parent_id)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -163,7 +131,7 @@ async def _run_auto_send_turn( user_msg: str = "What is the weather in Paris?", trace_id: str | None = None, parent_span_id: str | None = None, - fake_tracing: _FakeTracing | None = None, + fake_tracing: FakeTracing | None = None, ) -> tuple[TurnResult, _FakeStreaming]: """Drive the async (auto_send) path and return the TurnResult + fake streaming state.""" fake_streaming = _FakeStreaming() @@ -314,7 +282,7 @@ async def test_tool_span_derived_on_async_path(self) -> None: on the async/auto_send path when auto_send delivers the streamed Start+ToolRequestDelta+Done sequence.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = SpanTracer( trace_id="trace1", parent_span_id="parent", diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py index 1557d0dd1..ea7de6c28 100644 --- a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py +++ b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py @@ -38,6 +38,7 @@ from pydantic_ai.models.test import TestModel from agentex.types.text_delta import TextDelta +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import OpenSpan, CloseSpan from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -74,39 +75,6 @@ def get_weather(city: str) -> str: return agent -# --------------------------------------------------------------------------- -# Fake tracing backend (no network calls) -# --------------------------------------------------------------------------- - - -class _FakeSpan: - def __init__(self, name: str) -> None: - self.name = name - self.output: Any = None - - -class _FakeTracing: - def __init__(self) -> None: - self.started: list[tuple[str, str | None]] = [] - self.ended: list[tuple[str, Any]] = [] - - async def start_span( - self, - *, - trace_id: str, - name: str, - input: Any = None, - parent_id: Any = None, - data: Any = None, - task_id: Any = None, - ) -> _FakeSpan: - self.started.append((name, parent_id)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id: str, span: _FakeSpan) -> None: - self.ended.append((span.name, span.output)) - - # --------------------------------------------------------------------------- # Helpers # --------------------------------------------------------------------------- @@ -117,7 +85,7 @@ async def _run_yield_turn( user_msg: str = "What is the weather in Paris?", trace_id: str | None = None, parent_span_id: str | None = None, - fake_tracing: _FakeTracing | None = None, + fake_tracing: FakeTracing | None = None, ) -> list[Any]: """Drive the sync (yield) path and collect all yielded events.""" tracer: SpanTracer | bool | None = None @@ -245,7 +213,7 @@ class TestSyncYieldSpanDerivation: async def test_tool_span_opened_and_closed(self) -> None: """One tool span is opened and closed per tool call.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = SpanTracer( trace_id="trace1", parent_span_id="parent-span", @@ -266,14 +234,14 @@ async def test_tool_span_opened_and_closed(self) -> None: assert len(fake_tracing.started) == 1, "Expected exactly one tool span opened" assert len(fake_tracing.ended) == 1, "Expected exactly one tool span closed" - span_name, parent_id = fake_tracing.started[0] + span_name, parent_id, _ = fake_tracing.started[0] assert span_name == "get_weather" assert parent_id == "parent-span" async def test_tool_span_output_is_tool_result(self) -> None: """The closed tool span's output equals the tool's return value.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = SpanTracer( trace_id="trace1", parent_span_id="parent-span", @@ -299,7 +267,7 @@ async def test_tool_span_output_is_tool_result(self) -> None: async def test_no_trace_id_means_no_spans(self) -> None: """With trace_id=None, no spans are derived (emitter disables tracing).""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() async with agent.run_stream_events("What is the weather in Paris?") as stream: turn = PydanticAITurn(stream, model="test") @@ -317,7 +285,7 @@ async def test_no_trace_id_means_no_spans(self) -> None: async def test_tracer_false_suppresses_spans(self) -> None: """tracer=False disables span derivation regardless of trace_id.""" agent = _make_agent() - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() async with agent.run_stream_events("What is the weather in Paris?") as stream: turn = PydanticAITurn(stream, model="test") @@ -345,7 +313,7 @@ async def handle(self, signal: Any) -> None: received_signals.append(signal) await super().handle(signal) - fake_tracing = _FakeTracing() + fake_tracing = FakeTracing() tracer = _RecordingTracer( trace_id="trace1", parent_span_id="parent", diff --git a/tests/lib/core/harness/test_tracer.py b/tests/lib/core/harness/test_tracer.py index ed40cf595..46023ffb5 100644 --- a/tests/lib/core/harness/test_tracer.py +++ b/tests/lib/core/harness/test_tracer.py @@ -2,35 +2,14 @@ import pytest +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import OpenSpan, CloseSpan from agentex.lib.core.harness.tracer import SpanTracer -class _FakeSpan: - def __init__(self, name): - self.name = name - self.output = None - self.data = None - - -class _FakeTracing: - def __init__(self): - self.started = [] - self.ended = [] - self.ended_spans = [] - - async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None): - self.started.append((name, parent_id, input)) - return _FakeSpan(name) - - async def end_span(self, *, trace_id, span): - self.ended.append((span.name, span.output)) - self.ended_spans.append(span) - - @pytest.mark.asyncio async def test_open_then_close_starts_and_ends_span(): - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) await tracer.handle(OpenSpan(key="call_1", kind="tool", name="Bash", input={"cmd": "ls"})) await tracer.handle(CloseSpan(key="call_1", output="files", is_complete=True)) @@ -41,7 +20,7 @@ async def test_open_then_close_starts_and_ends_span(): @pytest.mark.asyncio async def test_close_records_is_error_on_span_data(): """A CloseSpan carrying is_error records the status on span.data (AGX1-371).""" - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) await tracer.handle(OpenSpan(key="call_err", kind="tool", name="Bash", input={})) await tracer.handle(CloseSpan(key="call_err", output="boom", is_complete=True, is_error=True)) @@ -51,7 +30,7 @@ async def test_close_records_is_error_on_span_data(): @pytest.mark.asyncio async def test_close_without_status_leaves_span_data_untouched(): """is_error=None (no status reported) must not write to span.data.""" - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) await tracer.handle(OpenSpan(key="call_1", kind="tool", name="Bash", input={})) await tracer.handle(CloseSpan(key="call_1", output="files", is_complete=True)) @@ -60,7 +39,7 @@ async def test_close_without_status_leaves_span_data_untouched(): @pytest.mark.asyncio async def test_no_trace_id_is_noop(): - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="", parent_span_id=None, tracing=fake) await tracer.handle(OpenSpan(key="k", kind="tool", name="X")) await tracer.handle(CloseSpan(key="k")) @@ -69,7 +48,7 @@ async def test_no_trace_id_is_noop(): @pytest.mark.asyncio async def test_tracing_failure_is_swallowed(): - class _Boom(_FakeTracing): + class _Boom(FakeTracing): @override async def start_span(self, **kw): raise RuntimeError("backend down") @@ -83,7 +62,7 @@ async def start_span(self, **kw): @pytest.mark.asyncio async def test_duplicate_open_replaces_silently(): - fake = _FakeTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t1", parent_span_id="p1", tracing=fake) await tracer.handle(OpenSpan(key="k", kind="tool", name="A")) await tracer.handle(OpenSpan(key="k", kind="tool", name="B")) diff --git a/tests/lib/core/harness/test_yield_delivery.py b/tests/lib/core/harness/test_yield_delivery.py index f3f491d84..eaa064177 100644 --- a/tests/lib/core/harness/test_yield_delivery.py +++ b/tests/lib/core/harness/test_yield_delivery.py @@ -1,7 +1,6 @@ -import types as _types - import pytest +from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.tracer import SpanTracer from agentex.types.task_message_update import ( StreamTaskMessageDone, @@ -13,18 +12,6 @@ from agentex.lib.core.harness.yield_delivery import yield_events -class _RecordTracing: - def __init__(self): - self.started, self.ended = [], [] - - async def start_span(self, *, trace_id, name, input=None, parent_id=None, data=None, task_id=None): - self.started.append(name) - return _types.SimpleNamespace() # supports arbitrary attribute assignment (span.output = ...) - - async def end_span(self, *, trace_id, span): - self.ended.append(getattr(span, "output", None)) - - async def _gen(events): for e in events: yield e @@ -32,7 +19,7 @@ async def _gen(events): @pytest.mark.asyncio async def test_yield_passes_events_through_and_traces(): - fake = _RecordTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake) events = [ StreamTaskMessageStart( @@ -53,8 +40,8 @@ async def test_yield_passes_events_through_and_traces(): ] out = [e async for e in yield_events(_gen(events), tracer=tracer)] assert out == events # passthrough unchanged - assert fake.started == ["Bash"] # span derived + opened - assert fake.ended == ["ok"] # span closed with response + assert fake.started_names == ["Bash"] # span derived + opened + assert fake.ended_outputs == ["ok"] # span closed with response @pytest.mark.asyncio @@ -68,7 +55,7 @@ async def test_yield_without_tracer_is_pure_passthrough(): @pytest.mark.asyncio async def test_flush_runs_on_early_close(): - fake = _RecordTracing() + fake = FakeTracing() tracer = SpanTracer(trace_id="t", parent_span_id="p", tracing=fake) events = [ StreamTaskMessageStart( @@ -85,5 +72,5 @@ async def test_flush_runs_on_early_close(): first = await gen.__anext__() # Start second = await gen.__anext__() # Done -> tool span opens here await gen.aclose() # triggers the finally -> flush() - assert fake.started == ["Bash"] - assert fake.ended == [None] # flush closed the unpaired span (incomplete, no output) + assert fake.started_names == ["Bash"] + assert fake.ended_outputs == [None] # flush closed the unpaired span (incomplete, no output) From a4547dc15f7d913495a3bfd999d27944049243f7 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 19:25:55 -0400 Subject: [PATCH 04/12] refactor(tutorials)!: retire duplicate pre-unified framework tutorials onto the numbered paradigm Replaces the pre-unified langgraph/pydantic-ai/openai tutorials (which imported the deprecated create_*_tracing_handler) with their unified-surface harness_* counterparts, moved into the numbered NNN_ slots. codex takes fresh numbers (070/140/150). Fixes the 060/130/140 numbering collision between harness_openai and claude_code by folding openai into the old openai slots (050/120, renamed _openai_agents). Adds the shared .dockerignore to langgraph/codex tutorials. 090_claude_agents_sdk_mvp untouched. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../tutorials/00_sync/030_langgraph/README.md | 49 ++-- .../tutorials/00_sync/030_langgraph/graph.png | Bin 16357 -> 0 bytes .../00_sync/030_langgraph/manifest.yaml | 4 +- .../00_sync/030_langgraph/project/acp.py | 65 +++--- .../00_sync/030_langgraph/project/graph.py | 18 +- .../00_sync/030_langgraph/project/tools.py | 10 +- .../00_sync/030_langgraph/pyproject.toml | 2 +- .../00_sync/030_langgraph/tests/test_agent.py | 31 +-- .../00_sync/040_pydantic_ai/README.md | 72 +++--- .../00_sync/040_pydantic_ai/manifest.yaml | 4 +- .../00_sync/040_pydantic_ai/project/acp.py | 42 ++-- .../00_sync/040_pydantic_ai/project/agent.py | 8 +- .../00_sync/040_pydantic_ai/project/tools.py | 6 +- .../00_sync/040_pydantic_ai/pyproject.toml | 2 +- .../040_pydantic_ai/tests/test_agent.py | 20 +- .../.dockerignore | 0 .../Dockerfile | 12 +- .../README.md | 4 +- .../manifest.yaml | 10 +- .../project/__init__.py | 0 .../project/acp.py | 0 .../project/agent.py | 0 .../project/tools.py | 0 .../pyproject.toml | 2 +- .../tests/test_agent.py | 0 .../Dockerfile | 50 ---- .../050_openai_agents_local_sandbox/README.md | 113 ---------- .../manifest.yaml | 61 ----- .../project/acp.py | 77 ------- .../project/agent.py | 92 -------- .../project/tools.py | 29 --- .../pyproject.toml | 36 --- .../tests/test_agent.py | 148 ------------ .../00_sync/060_harness_openai/Dockerfile | 50 ---- .../.dockerignore | 0 .../{harness_codex => 070_codex}/Dockerfile | 12 +- .../{harness_codex => 070_codex}/README.md | 4 +- .../{harness_codex => 070_codex}/conftest.py | 0 .../manifest.yaml | 10 +- .../project/__init__.py | 0 .../project/acp.py | 0 .../pyproject.toml | 2 +- .../tests/test_agent.py | 2 +- .../00_sync/harness_langgraph/README.md | 55 ----- .../00_sync/harness_langgraph/manifest.yaml | 58 ----- .../00_sync/harness_langgraph/project/acp.py | 107 --------- .../harness_langgraph/project/graph.py | 67 ------ .../harness_langgraph/project/tools.py | 24 -- .../00_sync/harness_langgraph/pyproject.toml | 37 --- .../harness_langgraph/tests/test_agent.py | 144 ------------ .../00_sync/harness_pydantic_ai/Dockerfile | 50 ---- .../00_sync/harness_pydantic_ai/README.md | 54 ----- .../00_sync/harness_pydantic_ai/manifest.yaml | 58 ----- .../harness_pydantic_ai/project/acp.py | 92 -------- .../harness_pydantic_ai/project/agent.py | 39 ---- .../harness_pydantic_ai/project/tools.py | 20 -- .../harness_pydantic_ai/pyproject.toml | 36 --- .../harness_pydantic_ai/tests/test_agent.py | 138 ------------ .../10_async/00_base/100_langgraph/README.md | 58 ++--- .../10_async/00_base/100_langgraph/graph.png | Bin 16357 -> 0 bytes .../00_base/100_langgraph/manifest.yaml | 4 +- .../00_base/100_langgraph/project/acp.py | 53 +++-- .../00_base/100_langgraph/project/graph.py | 11 +- .../00_base/100_langgraph/project/tools.py | 10 +- .../00_base/100_langgraph/pyproject.toml | 2 +- .../00_base/100_langgraph/tests/test_agent.py | 35 +-- .../00_base/110_pydantic_ai/README.md | 87 ++++--- .../00_base/110_pydantic_ai/manifest.yaml | 6 +- .../00_base/110_pydantic_ai/project/acp.py | 57 +++-- .../00_base/110_pydantic_ai/project/agent.py | 8 +- .../00_base/110_pydantic_ai/project/tools.py | 6 +- .../00_base/110_pydantic_ai/pyproject.toml | 2 +- .../110_pydantic_ai/tests/test_agent.py | 26 +-- .../00_base/120_openai_agents}/.dockerignore | 0 .../Dockerfile | 12 +- .../README.md | 2 +- .../manifest.yaml | 10 +- .../120_openai_agents}/project/__init__.py | 0 .../project/acp.py | 0 .../project/agent.py | 0 .../project/tools.py | 0 .../pyproject.toml | 2 +- .../tests/test_agent.py | 0 .../Dockerfile | 50 ---- .../120_openai_agents_local_sandbox/README.md | 119 ---------- .../manifest.yaml | 61 ----- .../project/acp.py | 149 ------------ .../project/agent.py | 95 -------- .../project/tools.py | 29 --- .../pyproject.toml | 36 --- .../tests/test_agent.py | 122 ---------- .../00_base/130_harness_openai/Dockerfile | 50 ---- .../130_harness_openai/project/__init__.py | 0 .../.dockerignore | 0 .../{harness_codex => 140_codex}/Dockerfile | 12 +- .../{harness_codex => 140_codex}/README.md | 4 +- .../{harness_codex => 140_codex}/conftest.py | 0 .../manifest.yaml | 10 +- .../00_base/140_codex}/project/__init__.py | 0 .../project/acp.py | 0 .../pyproject.toml | 2 +- .../tests/test_agent.py | 2 +- .../00_base/harness_codex/project/__init__.py | 0 .../00_base/harness_langgraph/README.md | 57 ----- .../00_base/harness_langgraph/manifest.yaml | 58 ----- .../harness_langgraph/project/__init__.py | 0 .../00_base/harness_langgraph/project/acp.py | 109 --------- .../harness_langgraph/project/graph.py | 67 ------ .../harness_langgraph/project/tools.py | 24 -- .../00_base/harness_langgraph/pyproject.toml | 37 --- .../harness_langgraph/tests/test_agent.py | 100 -------- .../00_base/harness_pydantic_ai/Dockerfile | 50 ---- .../00_base/harness_pydantic_ai/README.md | 54 ----- .../00_base/harness_pydantic_ai/manifest.yaml | 58 ----- .../harness_pydantic_ai/project/__init__.py | 0 .../harness_pydantic_ai/project/acp.py | 159 ------------- .../harness_pydantic_ai/project/agent.py | 39 ---- .../harness_pydantic_ai/project/tools.py | 20 -- .../harness_pydantic_ai/pyproject.toml | 36 --- .../harness_pydantic_ai/tests/test_agent.py | 118 ---------- .../10_temporal/110_pydantic_ai/README.md | 194 ++++------------ .../10_temporal/110_pydantic_ai/manifest.yaml | 6 +- .../110_pydantic_ai/project/acp.py | 6 +- .../110_pydantic_ai/project/agent.py | 71 +++--- .../110_pydantic_ai/project/run_worker.py | 20 +- .../110_pydantic_ai/project/tools.py | 5 +- .../110_pydantic_ai/project/workflow.py | 49 ++-- .../110_pydantic_ai/pyproject.toml | 2 +- .../110_pydantic_ai/tests/test_agent.py | 38 +--- .../120_openai_agents}/.dockerignore | 0 .../Dockerfile | 12 +- .../README.md | 2 +- .../environments.yaml | 0 .../manifest.yaml | 14 +- .../120_openai_agents}/project/__init__.py | 0 .../project/acp.py | 0 .../project/activities.py | 6 +- .../project/agent.py | 0 .../project/run_worker.py | 4 +- .../project/tools.py | 0 .../project/workflow.py | 6 +- .../pyproject.toml | 2 +- .../tests/test_agent.py | 0 .../.dockerignore | 43 ---- .../Dockerfile | 62 ----- .../120_openai_agents_local_sandbox/README.md | 130 ----------- .../manifest.yaml | 111 --------- .../project/__init__.py | 0 .../project/acp.py | 83 ------- .../project/run_worker.py | 80 ------- .../project/workflow.py | 213 ------------------ .../pyproject.toml | 36 --- .../tests/test_agent.py | 144 ------------ .../10_temporal/130_langgraph/.dockerignore | 2 +- .../10_temporal/130_langgraph/.env.example | 13 -- .../10_temporal/130_langgraph/README.md | 79 +++---- .../10_temporal/130_langgraph/dev.ipynb | 126 ----------- .../130_langgraph/environments.yaml | 64 ------ .../10_temporal/130_langgraph/manifest.yaml | 93 +------- .../10_temporal/130_langgraph/project/acp.py | 26 +-- .../130_langgraph/project/graph.py | 43 +--- .../130_langgraph/project/run_worker.py | 12 +- .../130_langgraph/project/tools.py | 41 +++- .../130_langgraph/project/workflow.py | 9 +- .../10_temporal/130_langgraph/pyproject.toml | 6 +- .../130_langgraph/tests/test_agent.py | 33 +-- .../tests/test_graph_temporal.py | 105 --------- .../140_harness_openai/.dockerignore | 43 ---- .../10_temporal/140_harness_openai/Dockerfile | 43 ---- .../140_harness_openai/environments.yaml | 64 ------ .../140_harness_openai/project/__init__.py | 0 .../150_codex}/.dockerignore | 0 .../{harness_codex => 150_codex}/Dockerfile | 12 +- .../{harness_codex => 150_codex}/README.md | 4 +- .../{harness_codex => 150_codex}/conftest.py | 6 +- .../manifest.yaml | 14 +- .../150_codex}/project/__init__.py | 0 .../project/acp.py | 0 .../project/activities.py | 0 .../project/run_worker.py | 0 .../project/workflow.py | 0 .../pyproject.toml | 2 +- .../tests/test_agent.py | 2 +- .../harness_codex/project/__init__.py | 0 .../10_temporal/harness_langgraph/README.md | 53 ----- .../harness_langgraph/manifest.yaml | 51 ----- .../harness_langgraph/project/__init__.py | 0 .../harness_langgraph/project/acp.py | 34 --- .../harness_langgraph/project/graph.py | 85 ------- .../harness_langgraph/project/run_worker.py | 46 ---- .../harness_langgraph/project/tools.py | 37 --- .../harness_langgraph/project/workflow.py | 80 ------- .../harness_langgraph/pyproject.toml | 40 ---- .../harness_langgraph/tests/test_agent.py | 106 --------- .../harness_pydantic_ai/.dockerignore | 43 ---- .../harness_pydantic_ai/Dockerfile | 43 ---- .../10_temporal/harness_pydantic_ai/README.md | 61 ----- .../harness_pydantic_ai/manifest.yaml | 62 ----- .../harness_pydantic_ai/project/__init__.py | 0 .../harness_pydantic_ai/project/acp.py | 35 --- .../harness_pydantic_ai/project/agent.py | 111 --------- .../harness_pydantic_ai/project/run_worker.py | 48 ---- .../harness_pydantic_ai/project/tools.py | 24 -- .../harness_pydantic_ai/project/workflow.py | 137 ----------- .../harness_pydantic_ai/pyproject.toml | 38 ---- .../harness_pydantic_ai/tests/test_agent.py | 114 ---------- 206 files changed, 678 insertions(+), 6895 deletions(-) delete mode 100644 examples/tutorials/00_sync/030_langgraph/graph.png rename examples/tutorials/00_sync/{050_openai_agents_local_sandbox => 050_openai_agents}/.dockerignore (100%) rename examples/tutorials/00_sync/{harness_langgraph => 050_openai_agents}/Dockerfile (73%) rename examples/tutorials/00_sync/{060_harness_openai => 050_openai_agents}/README.md (85%) rename examples/tutorials/00_sync/{060_harness_openai => 050_openai_agents}/manifest.yaml (84%) rename examples/tutorials/00_sync/{050_openai_agents_local_sandbox => 050_openai_agents}/project/__init__.py (100%) rename examples/tutorials/00_sync/{060_harness_openai => 050_openai_agents}/project/acp.py (100%) rename examples/tutorials/00_sync/{060_harness_openai => 050_openai_agents}/project/agent.py (100%) rename examples/tutorials/00_sync/{060_harness_openai => 050_openai_agents}/project/tools.py (100%) rename examples/tutorials/00_sync/{060_harness_openai => 050_openai_agents}/pyproject.toml (95%) rename examples/tutorials/00_sync/{060_harness_openai => 050_openai_agents}/tests/test_agent.py (100%) delete mode 100644 examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile delete mode 100644 examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md delete mode 100644 examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml delete mode 100644 examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py delete mode 100644 examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py delete mode 100644 examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py delete mode 100644 examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml delete mode 100644 examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py delete mode 100644 examples/tutorials/00_sync/060_harness_openai/Dockerfile rename examples/tutorials/00_sync/{060_harness_openai => 070_codex}/.dockerignore (100%) rename examples/tutorials/00_sync/{harness_codex => 070_codex}/Dockerfile (74%) rename examples/tutorials/00_sync/{harness_codex => 070_codex}/README.md (95%) rename examples/tutorials/00_sync/{harness_codex => 070_codex}/conftest.py (100%) rename examples/tutorials/00_sync/{harness_codex => 070_codex}/manifest.yaml (86%) rename examples/tutorials/00_sync/{060_harness_openai => 070_codex}/project/__init__.py (100%) rename examples/tutorials/00_sync/{harness_codex => 070_codex}/project/acp.py (100%) rename examples/tutorials/00_sync/{harness_codex => 070_codex}/pyproject.toml (96%) rename examples/tutorials/00_sync/{harness_codex => 070_codex}/tests/test_agent.py (99%) delete mode 100644 examples/tutorials/00_sync/harness_langgraph/README.md delete mode 100644 examples/tutorials/00_sync/harness_langgraph/manifest.yaml delete mode 100644 examples/tutorials/00_sync/harness_langgraph/project/acp.py delete mode 100644 examples/tutorials/00_sync/harness_langgraph/project/graph.py delete mode 100644 examples/tutorials/00_sync/harness_langgraph/project/tools.py delete mode 100644 examples/tutorials/00_sync/harness_langgraph/pyproject.toml delete mode 100644 examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py delete mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile delete mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/README.md delete mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml delete mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py delete mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py delete mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py delete mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml delete mode 100644 examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py delete mode 100644 examples/tutorials/10_async/00_base/100_langgraph/graph.png rename examples/tutorials/{00_sync/harness_pydantic_ai => 10_async/00_base/120_openai_agents}/.dockerignore (100%) rename examples/tutorials/10_async/00_base/{harness_langgraph => 120_openai_agents}/Dockerfile (70%) rename examples/tutorials/10_async/00_base/{130_harness_openai => 120_openai_agents}/README.md (92%) rename examples/tutorials/10_async/00_base/{130_harness_openai => 120_openai_agents}/manifest.yaml (82%) rename examples/tutorials/{00_sync/harness_codex => 10_async/00_base/120_openai_agents}/project/__init__.py (100%) rename examples/tutorials/10_async/00_base/{130_harness_openai => 120_openai_agents}/project/acp.py (100%) rename examples/tutorials/10_async/00_base/{130_harness_openai => 120_openai_agents}/project/agent.py (100%) rename examples/tutorials/10_async/00_base/{130_harness_openai => 120_openai_agents}/project/tools.py (100%) rename examples/tutorials/10_async/00_base/{130_harness_openai => 120_openai_agents}/pyproject.toml (95%) rename examples/tutorials/10_async/00_base/{130_harness_openai => 120_openai_agents}/tests/test_agent.py (100%) delete mode 100644 examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile delete mode 100644 examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md delete mode 100644 examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml delete mode 100644 examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py delete mode 100644 examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py delete mode 100644 examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py delete mode 100644 examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml delete mode 100644 examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py delete mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile delete mode 100644 examples/tutorials/10_async/00_base/130_harness_openai/project/__init__.py rename examples/tutorials/10_async/00_base/{120_openai_agents_local_sandbox => 140_codex}/.dockerignore (100%) rename examples/tutorials/10_async/00_base/{harness_codex => 140_codex}/Dockerfile (64%) rename examples/tutorials/10_async/00_base/{harness_codex => 140_codex}/README.md (94%) rename examples/tutorials/10_async/00_base/{harness_codex => 140_codex}/conftest.py (100%) rename examples/tutorials/10_async/00_base/{harness_codex => 140_codex}/manifest.yaml (84%) rename examples/tutorials/{00_sync/harness_langgraph => 10_async/00_base/140_codex}/project/__init__.py (100%) rename examples/tutorials/10_async/00_base/{harness_codex => 140_codex}/project/acp.py (100%) rename examples/tutorials/10_async/00_base/{harness_codex => 140_codex}/pyproject.toml (96%) rename examples/tutorials/10_async/00_base/{harness_codex => 140_codex}/tests/test_agent.py (99%) delete mode 100644 examples/tutorials/10_async/00_base/harness_codex/project/__init__.py delete mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/README.md delete mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml delete mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/project/__init__.py delete mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py delete mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py delete mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py delete mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml delete mode 100644 examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py delete mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile delete mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md delete mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml delete mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/project/__init__.py delete mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py delete mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py delete mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py delete mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml delete mode 100644 examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py rename examples/tutorials/10_async/{00_base/130_harness_openai => 10_temporal/120_openai_agents}/.dockerignore (100%) rename examples/tutorials/10_async/10_temporal/{harness_langgraph => 120_openai_agents}/Dockerfile (65%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/README.md (94%) rename examples/tutorials/10_async/10_temporal/{120_openai_agents_local_sandbox => 120_openai_agents}/environments.yaml (100%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/manifest.yaml (78%) rename examples/tutorials/{00_sync/harness_pydantic_ai => 10_async/10_temporal/120_openai_agents}/project/__init__.py (100%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/project/acp.py (100%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/project/activities.py (92%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/project/agent.py (100%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/project/run_worker.py (91%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/project/tools.py (100%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/project/workflow.py (97%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/pyproject.toml (95%) rename examples/tutorials/10_async/10_temporal/{140_harness_openai => 120_openai_agents}/tests/test_agent.py (100%) delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/__init__.py delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml delete mode 100644 examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py delete mode 100644 examples/tutorials/10_async/10_temporal/130_langgraph/.env.example delete mode 100644 examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb delete mode 100644 examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml delete mode 100644 examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py delete mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore delete mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile delete mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml delete mode 100644 examples/tutorials/10_async/10_temporal/140_harness_openai/project/__init__.py rename examples/tutorials/10_async/{00_base/harness_pydantic_ai => 10_temporal/150_codex}/.dockerignore (100%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/Dockerfile (66%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/README.md (95%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/conftest.py (72%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/manifest.yaml (80%) rename examples/tutorials/10_async/{00_base/120_openai_agents_local_sandbox => 10_temporal/150_codex}/project/__init__.py (100%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/project/acp.py (100%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/project/activities.py (100%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/project/run_worker.py (100%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/project/workflow.py (100%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/pyproject.toml (96%) rename examples/tutorials/10_async/10_temporal/{harness_codex => 150_codex}/tests/test_agent.py (99%) delete mode 100644 examples/tutorials/10_async/10_temporal/harness_codex/project/__init__.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/README.md delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/__init__.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml delete mode 100644 examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/__init__.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml delete mode 100644 examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py diff --git a/examples/tutorials/00_sync/030_langgraph/README.md b/examples/tutorials/00_sync/030_langgraph/README.md index e5b1db0f7..5a68792cc 100644 --- a/examples/tutorials/00_sync/030_langgraph/README.md +++ b/examples/tutorials/00_sync/030_langgraph/README.md @@ -1,43 +1,50 @@ -# Tutorial 030: Sync LangGraph Agent +# Tutorial: Sync LangGraph Agent -This tutorial demonstrates how to build a **synchronous** LangGraph agent on AgentEx with: -- Tool calling (ReAct pattern) -- Streaming token output -- Multi-turn conversation memory via AgentEx checkpointer -- Tracing integration +This tutorial demonstrates how to build a **synchronous** LangGraph agent on AgentEx +using the **unified harness surface**: -## Graph Structure +```python +turn = LangGraphTurn(stream, model=None) +emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...) +async for event in emitter.yield_turn(turn): + yield event +``` -![Graph](graph.png) +The `LangGraphTurn` + `UnifiedEmitter` path replaces calling the lower-level +``convert_langgraph_to_agentex_events`` helper directly. ## Key Concepts -### Sync ACP -The sync ACP model uses HTTP request/response for communication. The `@acp.on_message_send` handler receives a message and yields streaming events back to the client. +### Unified Harness + +`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw +LangGraph `astream()` generator and exposes `events` (an async generator of +`TaskMessageUpdate`) and `usage()` (token counts captured from the final +`AIMessage`). + +`UnifiedEmitter.yield_turn(turn)` iterates the turn's events and yields them +to the sync ACP handler unchanged. The same `LangGraphTurn` object can also be +passed to `UnifiedEmitter.auto_send_turn` in the async/temporal channels. -### LangGraph Integration -- **StateGraph**: Defines the agent's state machine with `AgentState` (message history) -- **ToolNode**: Automatically executes tool calls from the LLM -- **tools_condition**: Routes between tool execution and final response -- **Checkpointer**: Uses AgentEx's HTTP checkpointer for cross-request memory +### AGX1-377 Note -### Streaming -The agent streams tokens as they're generated using `convert_langgraph_to_agentex_events()`, which converts LangGraph's stream events into AgentEx `TaskMessageUpdate` events. +LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates" +node outputs). The `SpanDeriver` does not open tool spans from Full events +today; that gap is tracked in AGX1-373. ## Files | File | Description | |------|-------------| -| `project/acp.py` | ACP server and message handler | -| `project/graph.py` | LangGraph state graph definition | +| `project/acp.py` | ACP server using unified harness (LangGraphTurn + yield_turn) | +| `project/graph.py` | LangGraph state graph (weather example) | | `project/tools.py` | Tool definitions (weather example) | | `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | +| `manifest.yaml` | Agent configuration (name: s030-langgraph) | ## Running Locally ```bash -# From this directory agentex agents run ``` diff --git a/examples/tutorials/00_sync/030_langgraph/graph.png b/examples/tutorials/00_sync/030_langgraph/graph.png deleted file mode 100644 index 16d22a1e7ec819b0f0520a1347c729ca6adcbe5a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16357 zcmZ|0byQT{8#a6>K?zZ$TT)U&Is|DDq`O4v?gj}_x;vz#TRH_9dQeH}?(XK>^ZdSl z-gm8cEt~=8%sG4Cd+$50>$<q6=PPPd_RFJHMn3{X~?yQHF`q&lv!Tyw2*5%7UPDbQM&zU?ssLi-SI7x33#%w4C ztE)Mib?Wn=v2{F|YtrQh`H{qwe?G)V`nWc@zSHXK_mVdB%Oi@8^J5{Q zPKt5Q;cWL|9WQK%ItW)B;ShA`{C<*2-l2zYKZrp^!ZnDeXCZ42KQ)a&1r41>72*pkBN=;$n>$OY+x zgMxa&z3Yn9{LWXt7ZjA>knw%1(_#^S|30TxK8cMDc4zQ-_19OtrM=naSJ3Z#c|tP@ zi6Grr_`PbS)dZW1gF}^;h6a79L8}fr6!-wUdfQn52kMEUBve&V;r{sXqjC7LPZ)cf77GUQ zoX@#_dzo(i)}X0V(I4{_lm}Rq>nkhC*XR3okFl{`{4Q6M3`#UBc&w*>$nw7bg)!Zq zz>EzYY!+m#S=H*?t!MdJ=H})O%cnyJf92T|6jHfmSy)-|MK4$4M$~&SA3X{}O!D*R z&rN1Ssp{V}N~+#>KkBNV_uZ~_-c(s$juJJv12!c=OcfaD!)Bp!ZfQzN%3rVj*7bU$ zpUCqR!k&D{C@2Y6E#sHR>(><3uuwVu<^ z=p!b9(ow2i<2_NXU(&uDDL80sYxt|7XflgF4J&g2<)WqDE{+)_xg_^8maI~V3=ex|11NuZnCTYRXH#O7)t@Nr@mpM&`6?fJY{ znRX5ArSHwf-(n42pVie>*+Op~Ef<9{4H|lS7%+4ZPufsvX{o{&a5U0yvADsm^*FmF zbUIp*eFAx&?RfmW;N;{~<1&Mvuro0|{h?!X`h3qUs=PJ%v%6AnZHP{0&=0CCzq>|F zFo>(K44I6#ch~#JB6kffB-rTw z#p}uHj~+duEz=k%fhQ#;P4F3NX)z-E|1>JpOtMkuCrnLUc5HlUGsS(Mlk?md7ET;I z5BBU_{0r4kw$<8ipXTr!ce8;z!>o2#mhv$-FT76FwXNE67T9xC7iwB=$x&W&)x>h$ z{Z~`43{DT@FYVdN+CS#pLhS5HKc??(zXXSbG($%7Z9ZeO6=1F3D=WwRF4q@KlFJvL zuCtkyj-pUc8Cu&SFan;n>ByItNvEXc+qZ>)+gzLJI+xsP<_4y{n$?vR8Q_#Udt+!= z4-XF&Wn&jyn~5PtIZ0oQA~iB7G%T!So4K#hut z<9zufPoxfbhM)^uQpwJ=UHhx=-@jXa#v*<{0EZ_+RJ61!0Vr0Ur&}MDIhwRrg+)|nES+j(_>gGl7FNZt+ z%gF^H_9rDi{_56TYcn2H-n=JpJymOU8b-)|)(V^#c@85dyz$*-H?8dZ`WeTt z7ROb&!#;*AEMm?NUv!n98Fzig?#zu6DR$nQNx8nhrg`;htkPnHK4Z)>q5W!$8K3EC zc~svAPALfq2)etTzrW&u?Kn6%$bo~pa=JZHLcngY<92_0uA9WF`+c^?EXuMsntJ@I zg&~PqM8sEFM`xlm*V)8l*>-wA5Rd68!XvC;RX}_M44{E{#Z#; z>zfdqy~@g$=U{s3%$nug2v0bXEB#oyCz3qF!oni;*DncaF|kh#e)qmZlzunaf*wbC zmI%j>PU-#I`>dv~21Kj|Cr8J;ygb_3v)yTzaE_Dx`T0L3n(Wu$uKBFGbsIc(bEP#} zES;)K!s-I<2S$RBgp^cMgj8;|%{0$!BCD>m*V)dWWCePU&Y*|3u;f3s^uj zM+*x^6NY}b`3Cj28OfQMY%?ws`l~!?Z)Id4lCvRfgBp@)* zDUz;NZ18nPVlfc&a)YQ}_I^fxfNl9)wTNj1C!*i1Y;(q!@q+2iA zbmSBTW4tC)4+pZG+S_M1w{G8M;4bkeq#09|?Vo~h766X3O2V_SurRdTKU&LXAOp&; z>waRXs-x2{p&SHUtr=zgKkGr$8V*rW(S})D*~E|FCxK-kHiz*k)NRev+|tsbr&Ff! z1XT?D5TJ^gh94Gl{?K6+^`Z`O>Z=Z4T6+I;V&c3d`J=X!tnAl?{r!rUunvA{W8-Z5 zGiRc-A=n4)@+bF=ry8vcIgF30S;DwX2i-luu5g8ENy9w^VOqM-V)`);P6X&0M^tAE6e*ozis8%6j)~b$WDk^aXVANA&T~rU2tY5O+xVtGQz9N_uk&#mb7oR6X}u$@)JL5 zvtk4kb2c_fDm%-@u8f*AdS zY3zY}DuIm?4^lfDeRFejS621vhK7bZfsArte+(!lfC|ebyxRdzgNKJ#n|_=N;c&qs z9pO>-@Ya&|D8Caqc>a9g)E_1y^{bjtOr=Mpabb>)dY1LmWvGV-~|URFHH^nwygy(X`suZOYvM)}#94!2Fge(#aUw0S3Q_AweUMEX;lEg**v&HiDK;~P5gY|qSYV&O5 zb_M01ZyyN>3E%I}sZH*l8bnlSXBROA;o8H|Lb|fK(uUeV8d_GSQNqjA#52&^(h?dj zWm2VBko(Icg>U`cvwS9tHAlD`vis1g<185|&x)>6_bLc7E?cx8KI|N-VGI$}3O#I; zYMixA7}*@vz&UuL&XiD7OzNYT)duah1jINXb_p8 zE0NVid60qK>-)Pm#l!ovX`v|Sf017P`t^%iL?jz_o1ag2v0YKtJ2WIFBSZ8Q@491R zV!y?+cyiZuXF4e~5dEB*iRpoZ!{Nc%nZ+;7Nq0mdD#V8*0c48_N3Lyw?_L)i37hb1e>pGLa+;AlW8J1?){ip0=RwB>YM=+`f2 zfeN3&jJC6CXHd`-w=qh??;`veJ*U zc3iI~!^6WL$m8UBTy2%8H>@VMHJpw&ADoQDl|=1N9y0jec@TfbCJp|IXZS`|_6Y%- zH)VWgK}rho#=jc&oSYmaXhNSCdibz4CcOnEIC%Z7 zi{$n=HRM0rT))^G!{WR(ddk#Z{8_RuE}Gyu2gji&{Bkzo;jtV1vN}FKJ_P!GEbl1m z=*VQc=)8fe>r=XQJu?%>=OaJ};ZkO4Ew?o^*vcr*eY&}sIqt`LWEt^7b*>X+MxP5Z zn+QSK)H9-_lx+r%X9+uS!3N6l&8TATs2v9IOBL-zhWz||Z6R_}i?$O_6sW_rrgLC` z6ndwkQe_NmEa>Z0_%-t{|dzxN3P3tj1c_^~2- zT{&Lpp@k`kL0bhHC6p`aqVGI5RN)@yD=RC<*ZmkpfmCXP8bYo+_7Tf5G33#}6u?3< z3N1KLrJLO&=$n4?83raMI+XW)uh?PU(9Z@1LPZau=HfyrOy7T~=DgcqjYUamD0I0( z8=shHv<#cKzjCI~seNR7GBNcVv9T3?_4Ld?m;FlvT{*zpQ4h==9UXOqZRzqfVrh*R zIpy9cm8l;LkTiWzfJUX|ZX>3&|Q4 z41Vn2e31pqJW?wSa+iDm{yoe3%@f#UZu;>mB=S~K@lI5!M^Y!UdC!x`!=vD!kKq`C z;uzY$lG+k|J)hGJKcCME?G9TF%1@Ro`lw2l;$n6lWlb{b!v`+QK@aeEXQEV? zS!aKds%(k9M=(~3$=6c*ps(mS_S-oDU0{@)*@5z=+%Vt3P(E8h?I5e*VD-0Ndp>eo z#zz{xY;Es5(>FxRO=p7iZ0O@Qk;HH*&~Q8W@54FHHbJ2-FHBBo;r{lcWkTnDbPzm9@u3+q60zP3KiF^Lva*!Sh37rR9rsIk) zaesQ9gH)@9d9-4_#pLNaU9G>Xe#`Ijuq4AfjD6T0oswdNku&<1e_J$Rd$?qtz7&2V>e zl(h7=E5P|d7Hl8imOQ(wm!T`7=R7ThzJfpQZuMjtlhT{+rdz5aIeNf|GiC1ysxjI6 z3}wd>@Bw|0US?z%uA>DBOS8|Nsq0b*JJ}Tfdu!dF6}8_f+_*~*6s3aPI7!ZI*itW6 zPj-+VxBV?YSAQH3twT#sUsvIs$dA*(4jCwc!!R=9@I;)=Fpze5xPXGVhsbpPxB{nh zjJ(~e4UTeyRsp$GbhA^!)gR?{W@-J}P!&GJi0gAkH?d~%$B4Qa?+Kc{>NVB_trhh2 z^dkNI_km7#29AzL50rW&Ff6n*xntK$Hhbo=%u7;c_9GYQ>m`ERzLB`K{}o#ukxMw- zotCB%B7fY-!}y4h7;lFlEMFdcSyV+uh0!5)sxl}jh!mt*^P=usQ3IJ)GD7Rsx|xXG z9nJCD^QuX?%mtfNrHh1vN67xNl!h2eRTxPAp%>7x!r_zDa^AU9_v4hXDBCgU6a(r) znt_ZWau~NeZzQlX%Hb(U8AIcuqq18n+Nn+l6qN{hwt}$x|a}IX( z`%9bd7}OJ=8yfg>HA={FjQC8f`PN&oI&|Qd=r3NppeZRQ|SeENA zEXZZEc;3K(7G$A)v*6#`-TB9kB8$SKqF|Xi=!XJ*ToJH0sy`|;p-Deb3u+e~P=?7W z^+=Pz=@^EG=Y>+ajT8z)dPWVJaK4Io$`@Hp_Nd}S`7LFfQF;hah|3%&B_|W*F!ocy zCK;hFi0nZnY}a*IPF}vbYveEn<|Ny9EE@M%c!jcbcM&hoc9JfzJhmJK1^MwFV+x8t zh}Yp zPPsSu_oHS;@OVkcr*d%uPF$+ZcLHmxA zm37wvm?HJFd~Z;{!bs(}fq8=bhnuc%aHrGMUZQv4Wkv?$E@|sGyGE!Ksi7cYAuphO zqw$Y`+L&2ceQthdzxkLM8N)4nDwl~_K;~KH_dP`*DgLHyoAMNCe)!`Y^b$B(`>ZRl^Aoce~kJ|(8 zu_t52*plJ5F*g4qpCh=3IHG`)^&Vi$G6PQt3C%!J@(>ghOX=qy!%tBvvz*!_mE%_f zs1&AJz1+$RmT8CPCF2S(Jk-)64OnyA&sdEAJZK>6Kh2B`#C5!6(oGO1HfiNs7lwkI z;7KTaaDKmv!b0;C0N3qgM#lXRB7B=k(rRxwtq2pC2!8u^HaI>$!(Mab_w?Dv<%3!ZRZ~@68M)T~0f)S)LjgT*2a1fark;!DM zm*CNn<5sEJLWl9e(UHi$dkKckzm|t7yrP1lrs^4JxZ*-=)YRE{>n(yNrGoz3L?*&W zc}{S*7ooAgh`dDk`90bTmRraQqNY1^^ziaGnS@pwmN>T*GTR@o{iQzCQZWCq7%)+) zcPUK3;*Jh|8`(6j(T>UTd&E1-O>(@}s&BRmlBMD}GnK!dgDG6tf#^~G8rSE`>S5jC zQ6irEEnX(?mjG5eQssFXB_mfXDC@R2(>lIFJT}x*&o|+L2gzj)>1m83<>l>k=)3*7 zni|pvU^WjG)i>RbIXf`^eHhU;oPYQ!s&P~fV>41qe0q8drvLQFp}~uPKgZX|s}5Ue zxSjSa9h(zaH68sj`nYnvG=3PU=-W4QfK;ANo`9N#g{;PuHK5%Os;~n9djK zvhDk`$)(-sbc>S1xN`uJzChn953n-5_Kd@c9GEHG7xuLoeSMK?VRi@V9Q0LpE^Tk} zw6z)zgr5--Dj!)(EiEr&$I^BqfB6^=URKkaiA{J>CQ+#HUsvZaW_`^`PEAjrdCpts zXKHY5h;*~H$=91-#O1+cODAE4dl?KTB*+w#mF2K)Fs4ODMn-}-I5LWd(a~zl?+hw< z)hJ8kGSxY`xS|tWAgI!IwqGZ%z`)KQO#J2+<#(HpbgAw+W}WpVd6Y0uwxcCM2h+7M z)&rA>CU$W5pR+AvlI+&V3iPT^O{_hE_a{;6aV+<_K3nMc)LN-CBqP&+11hz#c1$(b z*89u#2P&>XUbboRyc{rMWRVNoG+UTQ+s zb1o%ijPdJ2HutwiM$913wVpg+wr#sq3<|DoEv3?^xO*s&K|v!O2|HRfTpmoh5r~^l z;lfwt>gu9lFKJ^E$VIJByEag!)_iDEitc}(s_?xPga=zt{MuzkK7__gFNrC=4@^&o zjG8 zK_*!9-p3F72L|4$slD?NMHHB1CwsG$+vB$ZGc%#Q&4l0 zAx<_p#vKUjM2U_Y4JM;o>A5Za;sCXywCma8mgtW+pJ&weJuPX9h*2M$!E@1DzI&7C z`bq}B;)L^~ieZ?92N#WP?SbItg)ezY1<=ymS>fC3$6|_bhm(Cj6i~1pjA>FX7i1+a z4U6&`ZTwC<98r>wiE|=T65)SXKkLeScjH9!UCHmEvGL-&k}-hq(jkhY>wU_QKe_iv zAzhKfBGnkn;l7`rKjJ?CQqppLEW%ih~NshS}~LnFA*wEX}XYI1!={jw5YF>~;&gSo9?pJ$#+H#Sc> zysFY^I7Q)m1LmyTnIGjk86m#p6Q=t#T;)S!xgjD|q{72|#?a@XIgi)%28c#DU_U~0 z%!y5OAPtxn?fOhF(eYBc7Y8gn;XX8>uL}1ywMi+_>uf3v$@F7!N##%?6KTG6kMv`` zf{_&IuQ7T9TMFSM5#Pp0*xN9alh|fC{O&X_mZNHu`H%4xQaCB@ujlD2J;Ph?F4yx9 zPEX&g!)JdIbphv7<$d`SfvjAm5HK|#?zQ`o5wIkG)o-rr>?ttz)NMtUJTI#L@h`lp zINP={yb4z~mXpZDA^rF^yh>50tS;Tj;FY^2rAg`5h?v1@fjt9s`tKv;FPGnsitChp z4TcFxs8a}b5n(Q+QyPVLYuuJ-l*se1|83l7Fd0-*em0U)-DUetH?V~=sULQS83jR< zaR@DdW$n_KCIjGXgkOKu6!G8M8^a-g9~1wB0v%^ZayZk+0h~?fb-k^yQT=oY-dfFY zrs(gUO=pA3IRJv$UmX0D@_4Hm`lhjV>)ek=Qke>)lb5hbNjV-dZZpc|bV_90J|b0G zSrxu@PLt9vPPV8E0_R~IiI^RX7y%hhwyMv%4K95iuD3*UBzE)ja>4rf*H^ut@5n9lV||7lCM~O9u+bXelnqH$oi|z$LnCH=Drh~ zHyjn3ah;w=Nk+#a3dLt`gOyZ>go*e(pnl~-`0jGB#kMh5{u7lMATjdg!TmdC>t6wJN#YKveMd&(@B^ zTi-@D7?HH(UuVY9SU7){ikjLeFCE<~jdJt{rS8*dJJIJtLQi?;Z~T5+*4j5R>4b&m zKo9_wq}qM)^=r}4E{XW;8AWbS{LBGf5=%=>Pmj~s$j|4x^HPmTJ%U}iWy7q_k|crv zxJKi8FJF$7W^~^+VSfHg%FvvfK&Q3g%;BFnS4?ZbB3Yk))Y2_TJ8}Rs+rNL zsZIDD%+IQd%#K@;rz;8F+81YCYY?QoW6qAyY{O1+qHZTNF|rHLRU@9k5qnl-bV3lmxbR-Mm#vtCQ56K2M9EyNdFB~hRH*S`yUot4!Xh~ZMn z7H?&tW0OPz_#0s`jWV4`X$p_2<-Qs@OUo?+Nb^VPL+U(>BChE{&$ZVdN;IZ+c}2&{ z;lZdMw55D~*}Q#jvnM7Rv25dFvut|m@Ef;bicutcn_A_+r0<1(R9<6$Qj$17IMl+T zqw9W2(psEll9-tIx`Ow0zRKcGOh0sRI+3+7z)tx7X8_%o$jCAg105aY1UdEAZ?IKH zX69hV(3eEvnirvL!__Gv>8{$_*9cf(oP?THD}!~|kL`=_6`+}ltW9~HUBa6{gvide zA)#rLa=A)YcNI(^KW&KD`_)(?r5^DaJ~-I1(xiN$qQ?Zl9&G@Uuh{N?L`Aw@wulA0}DMmYaeebi5|hQ(2* zApdfkzwPySn1m~y8Wu*c)I3(Zb0KoPyZcszmC0+Wf3`BAX~k?^y1-o^F|pL72kw4< zs{}L&(OFqxzkk=geg98L9Ha3mVFt_jOiN_@n%OkgO~61GuM)pD!D}N?FM&v_VBp~Z z5G$7+6itwAz%^9sY3!uH&|-^Jp$55AJE5y=xkp&tj@iNt3=)xX%5p5^r^HJtIiob_}Y0 zusH1la+%OV^>JjouX9V^@U2jytt+MMi&=hG&udu(EFms4VE^yGxtU3PbunnYaJT6C zJZc}ApG4!N*_T)~GX41Qzgy`<4k%_qFI_#=>hfPV?llu@@BKwK>yjLU{_^1KoLjR@ zI`6O41^&-oiL0Tp43N;F@5(3See>DR?22n2_N_=Bi^V0bC4%SQUR2fRFgiRH$FQ&` zoQ*9N`L6iU7P1kEi7c&+Z@TI}JsshdXiO?*oQT{N&0urXxeTh}n(|xzN(7Ab!&`Ia zNN9SpKIgO99^>u#-^~!c>-q$}+F*NDIyPF?0oK`*-J&bN@QbcmJ=keY!=zLsBA~71^{^ssqJZoL`R{amnl^qR)u_52u4kr@x0Z5j zAq0Q$@r6FT!aZ2VOFQxLwC0J-yM^C-PvZTYzoHf1?|B~ScRdBN~p^dtzCdc^!ytRnA+o9{iCLmcH2S2a~sRcnOE@u>3_ z^%GV+>Y{ow{tCJzrt$yQe5UR9a_;kh&}$V;*9Ze=5A}m+birW}DsY+Wxzz1mZ&`K=4sR zN=TDz14pDqMMZR(kza9D#7>-{py;X9wY9yzp`lw^Zf@>aj=JZ3d|FoAB&uPdq2*JN zlHda6fw*W>U=(A$F-u86-PutHI2R6rM?04NC)|unPQC!tkrb4oSa59PNiWwsh8U75 z;;Annb|<*zKkK^s^Zog(U9bR+8t;y1^HR7{6rwm`6lUS%F3qzwXFLDmmNt`R26*hm zHE`pCj3iyrij$juKU@(N<=sl%Li$f1Weni`XQ+!=5Ds(bk%gRGzb7z_&Sww2YTGBH zL}g>LRjxH9A$bHlBpEAM7$aJbl=#DON_1BpnFPudXO^9r&_6I3W;;k!c)x$*a2_zrY(6Ja?QN-!-e_~j0u4G`wx7RAgQmOH>vQ(|ScpCcnHVz!w0Hz4{W5Q(G} zgMQQz1)YqCxG{vacg~8fPtfue(M{g`z%jXgnMjjJaR|-N$|OKq||Uw>-Z5u@m*7;Kr!4lFA1lkq?;9N2xJY z#__c}CPPCEY|f1W@pr@kOWOE3S>Zc6vB_f&*l7B3a9>}ap_c&tUvBTC{+QA7KokMs zU`j|!BJr+=%G-Sfot6M1y$k*>2?nV4#5b_<=F*QDkY?aS*OQyhY;sCUqDz|%y<@!B zm$_{^3Etkng|lHAyUr1rc%er-TIcg|Y)En|7yrneNSfRJGqjbKw|9%!vu*|=lP)9~ zNF;ZBqjov%ToJ8bQS&Mqy))odQ%SuJv!?&p{g6l(>Wjs@7dDCpwV|X4$tT9UKly^u z9A_9NJ8bYv7N{gHF0S57fbFmQ-SHCjz{b}Y&xG%Fi!~AZ&6^=JXd>j?ss)_gG)Q_# z2#pBIQ#?F%0Hdom9*8qThEDMI{98%gL`inaIj)8B=yEX|()N-waX>0o-i4*_Fqqaw z9jIX?g@LILO}6=5kBW(jMeuYiuO=Br0zFb&k8S<@3ye;Y2&oz_dipg#yOu*(5v+J* zBp@I_FT9GN)oG}pg3@5p#r}6-%%MS&=;QoP=|V;h^cA8tH-Cy^&OHSZ<;4(Zghf>J zo=Z|$N-v`6gLWbrP6&L{*%D3c-Kb^l+uT2s#8$rgwl2ouyBIh)6d?GLAgCewtnuQv zyrTc$10z*x+UJ*j)gwLR^@z=~pBb6oCN%QrErx}wgtGJY>6N3Q;XIdTYny`dv#K9I zxWAQ_vH-rF_@j2B5IIdFUzzenLr+9ms=^YgKLzgtk8LdL?1JTnXJErnCn?0zYN|hf zMTC_P8C7X{19a;aC{pSNR{sJ7-m%RCk2Ay*t#G13F>EyKp-Rc2q`cr zV!z6h!DxJ*!`S2^kf9CAc>eVOphV##!^!A=e~2&nCtsy#2k6IWWi6}1f_Mq%#I{}t z+hQG^JCoJQws7=Tl>R<_?!O8qBvQC4i(&CHoI?^A1jt~%R#e!vDHbrVr*oBzZRzXj zky-Pk-GYBVcU?<_$hT`v&yI}Sd-Piv$CnR6$sRx(Nd~o zOQW041du%t4FNgR-BwA3cXf5O8hh(WtZFQ+*_cZJRWG)>8_=Mg6=eBxwj6X~(g6i7 zC**zDO$Y;4JpExY5=v5>xhDQjjkkW&yu~PRbF!*yvcBSsE=tfPR9a1qo5%VB9|*rg zXvY-Di;9|~)6xRdY@5iT15gHbzUS!Lb^SR#O}Mxiv+;UI}E=s+UkczdFv6V!8R48u{PcMwlnE$kJj zWs3%GQF#T&?bSsG-=I4v#$3cyifh>k1csz)D(_!Mu%Dn%Z%S_+*9^{whcTj1b{-w zU^)5c!TG`r0RL%3+R&R^-wjI0U^L9RpP1}IpV`|kh=2afuARdh z)xgY9K1#hkP@eyqC7n2W-R%J(;%Fn#y;|thTG9alcpH$$M|8h`0@yIhErxAOkhe_hvH)nT`(@yZ&as2^obdfk-z4X!M^_Qto46V<#y8DK-&w+Z$b5 zUH$s==TFBd3SqLlySx6?am`>37*JiQ=Bub1HrvgMr~`R%9gqWBPn2hjHaeNfmW*}U z)?0x^v|sx5kPRc?Z-t?#mzGvC3q}A-==;`YKOj5|URn9z;_KUn_<|Wi=Ci654eH#i z_u&aBTE_L$>$~&qqV-#P-N%Y_g(9mL*XUG?g)SKrgUN?a30SWv04Pe>B2}i*H9d_! zHz$HX70Psi7eHax<99FkxT1ygIN-cduLb|492Y{YT7=o=-+Tkp8g0fGFP`kU3ZnSi zwcn0!CukbH*cvH_LV?zXv-Vp}he!f}>R){V=xKjlbz-u|k87*}a&8O+77|FS&N_t< z61u-R?#|B6J_2^j11eBq;C&PdnTDsQbv;0DK-hZ|6a6GrLmNFdLXnFFl=-1F-fiex zap@-%^eK9!OuZwZ0M^Jtp<>f-QhVjR@$TFtN|=72ngw8>PatsK?SLv(T|9wlVEiTQ zSy*=`rTfAs64{TSQJ|JLER_$ItdAQ+B&qy;!?}s$>({TM&j<+M{H{A%nT#a?uRdS- zeJhj)rQ6DCM(55yyG5X^Mlm_nwJLDizr(s++cL9s;~H6N_%$pw(h$Z~+Pm3cgw?XOPG7+GS!w!b7LEL4Jg^ey++S z#1zQI?*Mt==MC_E-AcgR$d^~}Fl&^)v<3oUUdI)g0Yn4Bca@@O&_44B10xz~0ihm8 z$a)`O5gDQTg8^NZ8@3w=n+_)D0EMx3zRD)jg?a6iW0?k~q%tZ*0@?GC2}mmcdSx{( z0|9xP(txn%Nk^uzSA#}Lgo1*?O^j1!H<2m|<8peE6MTTkq<~pdts1!!$Yn6sXP7Vo zx;CAs#w=@v0V(jH1FeIGnf+lv2lNB|W%uy#KAWg$`#paAN4gN_FBO`JUds`LmNz%c zQL?XmaWFA^TF)c9Pd3>B67Ac;S|48IW(~MU(A%OBpODZwJzX$vQGh*LyY^k9`pl_r`66j>m&|ZN4jAn#Nb_(#>7Talq z5ETQ1w2CDQc^PyTUznH{14~>dA0`^p`)T9J$ zp&p2--G(!T2Y~jy6Z9rM?yYY5!tvcK9PLHu(h$)x0cti7OY9Pv>>_O-ZP3Yb+QSEs zf$+ZAa&mGfKc(8G&^Q_u_G@%_WAo8EN_bUa{|XZq?936q%(N_s^bK5z6x~Bq z;`TJ=<7Ep$4HZTVWH6c4Ug!OKI}B7w3k>)_TP|2dujj7X zG7(1`6asdarxS+Qk410nf6uzj>-&O!2>kr--|uwhIk%{Q;=YiZBtkBhHi0qpsfhy6 zwMxjwN+ih90fF@se3SxIHQLwO&m^lXXC3ON_4n5+av00e-qCz=gbzX0H&}~l18_dd zQYHResoAf#vz~e5%rMmgMW9-(fWKf~l~g43r*D5L3gUte zmDt6#UAba`>bn#*H70!NZYtucqZvu*634r9I(6A%Am)Zqo_2PMKY>m>h1>sLy^B%c zgMcz{Jk?sCjSvXV*9<|+*I+V7?n|J8Pl=7qxjS#aCx4f9RPrU~OF^b*@y_=&+iEt0 zmPs#!n^i2R>B^5+M!y6)KTgOCB|(z`v_2GCzHo$N@~?;BOQy7&4QJ9t;pk2EXt2%C zN1qa7N!@H4+ARkYQoI4IWSBv_pPvBe$cs*P*Uy;HmeXfr6skaaWAkjtQH_{7@2<0} zYZkHxWye<80EjfB5aURgm3-ms2VF2^!aoDX^~Qy zh7>|%#`ef=cnj{o#1@yX3r%5b@Kbs)6wOhgMFF^L2d&c zu3^nTN#!#H-MVhiqnbBdf`iQt7DM(SXs^}HI%8t0|D$BImTd;%E2L3GUDdo>H_cL_ zQN}r#DLj1+Tq+kj=qEDtJ`Cn%%9Ph(cQyoV8asfKjhFy~SXS54GFN^Uj7`cj$YVYE za=Xc8`=p}%uIU~`byh_1Ccwvki0~L7Q9ZmGodB_R9pJq#`>p4h`xl4H@}NZYnOjH& z9bNU_S0|fyAo;=}<9HtgN`Hwv(4*vnXk-yN=s;Nop&x*=#irM1yQx6Nv54qjeBcjG zlsV{5w2lJZQOF%2queSRvy?~ZRt4(kz-E*MRQDMvXv;zj0Ca#NdMIX++oPnM)e={O zK~ILZ>)u!K0yke$(+W+^4G@A5Lf#2J9;1$T64E{6p0}cRhm5X!H+Vqb`YJL@C?z5o zb6(nbk$7jS7M~{#-)-))!^25txSQWCc_4n|^y^q0qNqgXfEDXTB4YH(<_P9GJY)NM zOw-UqlftR}Y|2U!2?>di?8#L28}xIK%Gur>_E1Iua?}zuZG4=pG_G@4h9w|%AgrlA zqHWuHs&O3nSVYTU{tCDp^XYoKEYIV$p{A~o$GZO+kn#)>CF$R$s+(8aFy0t-U8q=Rs2yiLg0JD;qJL4B|FPY9e;k)+WY7MM|oEA3bDh0-7N&udbNP zqS~Ar^$>Z~&CN}`(}X~ooeyZSQpo*dAMt$;|5#2=E*T_I>p0voABlnBKcKz6y^{!K zoh&#wSbS_WwfN#A=>6zwJk?<#pvORV79h7PN;CH3zWn`Ta?Y3Cg?z*h zB^7Ol7gtx~o*9gxe9fnN|6)s@sM=Iz=~nzSGCv%+X|(pfJiJ{Ub|zmjyha^hcFzBb z2Sx5Pv)5{0dd$M#KEWn>9u#fWQFENDs>jnfXlqd!VMP+Zv&P8o+L)O!araoMM}-oR zRIZRvHy>t&vr4{o#96(w>9$pVmiUr8s{0OPRpPy`mcF`84GX2KteIPuDM_yNPU__e zooTE{fUg_=_x03HhVP~9l^z12HTl+z_83fu_)0wvFU)_iQV(ev7#I-3JsJv@&1IiS z_F-dU${#wDMQjdi{%d{^{%~_~wc^N`u~PSgwO(x8;R5rnfES|r&b4`H0Ryc==g5=W zn|w0I@12Ss+}3Tl_vy3Op1Ysj*!*!2)N^bt(0zqJY9kMbhh&EDpLJf5@T}BFMR_@Q zZDJc{vLLN^QTXv*CVi_qt!S|8HS=tA zVY)ishDFGUG5;hvU!u)yNt#2Nr0J$40&#h?<#E<<`e28tGx^T`#tcxu=dY5_C|fFN z`+es4oTB81D_m=wb0P%N0st=8nD;N9k3BFBSNcZ(&4wXRgaw9?u(o8P@R6{mPk~F3 z?B%rFk-nsBiB~?oQu0ObddykL(+hQgumSMCZ0+p=iDWNHKJS&5~@X_Nn&#`6r; zgjy?j@w$y*K}Im`N@TW2$f^zD-tc%K*y@bgdXLSyb2 TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: - """Handle incoming messages from Agentex, streaming tokens and tool calls.""" + """Handle incoming messages, streaming tokens and tool calls via unified harness.""" graph = await get_graph() - thread_id = params.task.id + task_id = params.task.id user_message = params.content.content - logger.info(f"Processing message for thread {thread_id}") + logger.info(f"Processing message for task {task_id}") async with adk.tracing.span( - trace_id=thread_id, + trace_id=task_id, + task_id=task_id, name="message", input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - callback = create_langgraph_tracing_handler( - trace_id=thread_id, - parent_span_id=turn_span.id if turn_span else None, - ) - stream = graph.astream( {"messages": [{"role": "user", "content": user_message}]}, - config={ - "configurable": {"thread_id": thread_id}, - "callbacks": [callback], - }, + config={"configurable": {"thread_id": task_id}}, stream_mode=["messages", "updates"], ) + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + final_text = "" - async for event in convert_langgraph_to_agentex_events(stream): - # Accumulate text deltas for span output + async for event in emitter.yield_turn(turn): + # Accumulate text deltas so the span's final_output is the assistant + # text (matching the async tutorial), not the usage metrics. delta = getattr(event, "delta", None) if isinstance(delta, TextDelta) and delta.text_delta: final_text += delta.text_delta yield event if turn_span: - turn_span.output = {"final_output": final_text} + turn_span.output = {"final_output": final_text, "usage": turn.usage().model_dump()} diff --git a/examples/tutorials/00_sync/030_langgraph/project/graph.py b/examples/tutorials/00_sync/030_langgraph/project/graph.py index 53728cd58..6709719e5 100644 --- a/examples/tutorials/00_sync/030_langgraph/project/graph.py +++ b/examples/tutorials/00_sync/030_langgraph/project/graph.py @@ -1,8 +1,7 @@ -""" -LangGraph graph definition. +"""LangGraph graph definition for the 030_langgraph sync agent. -Defines the state, nodes, edges, and compiles the graph. -The compiled graph is the boundary between this module and the API layer. +Identical to ``030_langgraph/project/graph.py`` — the graph definition is not +affected by the harness migration. Only ``acp.py`` changes. """ from __future__ import annotations @@ -35,15 +34,12 @@ class AgentState(TypedDict): """State schema for the agent graph.""" + messages: Annotated[list[Any], add_messages] async def create_graph(): - """Create and compile the agent graph with checkpointer. - - Returns: - A compiled LangGraph StateGraph ready for invocation. - """ + """Create and compile the agent graph with checkpointer.""" llm = ChatOpenAI( model=MODEL_NAME, reasoning={"effort": "high", "summary": "auto"}, @@ -56,9 +52,7 @@ def agent_node(state: AgentState) -> dict[str, Any]: """Process the current state and generate a response.""" messages = state["messages"] if not messages or not isinstance(messages[0], SystemMessage): - system_content = SYSTEM_PROMPT.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ) + system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) messages = [SystemMessage(content=system_content)] + messages response = llm_with_tools.invoke(messages) return {"messages": [response]} diff --git a/examples/tutorials/00_sync/030_langgraph/project/tools.py b/examples/tutorials/00_sync/030_langgraph/project/tools.py index 1b402a906..b3e5dba34 100644 --- a/examples/tutorials/00_sync/030_langgraph/project/tools.py +++ b/examples/tutorials/00_sync/030_langgraph/project/tools.py @@ -1,9 +1,4 @@ -""" -Tool definitions for the LangGraph agent. - -Add your custom tools here. Each tool should be a function decorated with @tool -or created using the Tool class. -""" +"""Tool definitions for the 030_langgraph sync agent.""" from langchain_core.tools import Tool @@ -17,16 +12,13 @@ def get_weather(city: str) -> str: Returns: A string describing the weather conditions. """ - # TODO: Replace with actual weather API call return f"The weather in {city} is sunny and 72°F" -# Define tools weather_tool = Tool( name="get_weather", func=get_weather, description="Get the current weather for a city. Input should be a city name.", ) -# Export all tools as a list TOOLS = [weather_tool] diff --git a/examples/tutorials/00_sync/030_langgraph/pyproject.toml b/examples/tutorials/00_sync/030_langgraph/pyproject.toml index fc9f99971..33bea16b5 100644 --- a/examples/tutorials/00_sync/030_langgraph/pyproject.toml +++ b/examples/tutorials/00_sync/030_langgraph/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "s030-langgraph" version = "0.1.0" -description = "A sync LangGraph agent with tool calling and streaming" +description = "A sync LangGraph agent using the unified harness surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py b/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py index 36fcf418f..dabd83e76 100644 --- a/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py +++ b/examples/tutorials/00_sync/030_langgraph/tests/test_agent.py @@ -1,14 +1,8 @@ """ -Tests for the sync LangGraph agent. +Tests for the sync harness LangGraph agent. -This test suite validates: -- Non-streaming message sending with tool-calling LangGraph agent -- Streaming message sending with token-by-token output - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v +Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn) +end-to-end against a live AgentEx server. Configuration: - AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) @@ -25,26 +19,22 @@ from agentex.types.agent_rpc_params import ParamsCreateTaskRequest, ParamsSendMessageRequest from agentex.lib.sdk.fastacp.base.base_acp_server import uuid -# Configuration from environment variables AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") AGENT_NAME = os.environ.get("AGENT_NAME", "s030-langgraph") @pytest.fixture def client(): - """Create an AgentEx client instance for testing.""" return Agentex(base_url=AGENTEX_API_BASE_URL) @pytest.fixture def agent_name(): - """Return the agent name for testing.""" return AGENT_NAME @pytest.fixture def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" agents = client.agents.list() for agent in agents: if agent.name == agent_name: @@ -53,10 +43,7 @@ def agent_id(client, agent_name): class TestNonStreamingMessages: - """Test non-streaming message sending with LangGraph agent.""" - def test_send_simple_message(self, client: Agentex, agent_name: str): - """Test sending a simple message and receiving a response.""" response = client.agents.send_message( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -72,7 +59,6 @@ def test_send_simple_message(self, client: Agentex, agent_name: str): assert len(result) >= 1 def test_tool_calling(self, client: Agentex, agent_name: str): - """Test that the agent can use tools (e.g., weather tool).""" response = client.agents.send_message( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -88,12 +74,10 @@ def test_tool_calling(self, client: Agentex, agent_name: str): assert len(result) >= 1 def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id: str): - """Test multi-turn conversation with memory via LangGraph checkpointer.""" task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None - # First message response1 = client.agents.send_message( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -107,7 +91,6 @@ def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id ) assert response1.result is not None - # Second message - agent should remember the name response2 = client.agents.send_message( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -126,10 +109,7 @@ def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id class TestStreamingMessages: - """Test streaming message sending with LangGraph agent.""" - def test_stream_simple_message(self, client: Agentex, agent_name: str): - """Test streaming a simple message response.""" stream = client.agents.send_message_stream( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -140,14 +120,11 @@ def test_stream_simple_message(self, client: Agentex, agent_name: str): ) ), ) - aggregated_content, chunks = collect_streaming_response(stream) - assert aggregated_content is not None assert len(chunks) > 1, "No chunks received in streaming response." def test_stream_tool_calling(self, client: Agentex, agent_name: str): - """Test streaming with tool calls.""" stream = client.agents.send_message_stream( agent_name=agent_name, params=ParamsSendMessageRequest( @@ -158,9 +135,7 @@ def test_stream_tool_calling(self, client: Agentex, agent_name: str): ) ), ) - aggregated_content, chunks = collect_streaming_response(stream) - assert aggregated_content is not None assert len(chunks) > 0, "No chunks received in streaming response." diff --git a/examples/tutorials/00_sync/040_pydantic_ai/README.md b/examples/tutorials/00_sync/040_pydantic_ai/README.md index 02c3b57c7..ef52c7c77 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/README.md +++ b/examples/tutorials/00_sync/040_pydantic_ai/README.md @@ -1,46 +1,52 @@ -# Tutorial 040: Sync Pydantic AI Agent +# Sync Pydantic AI Agent -This tutorial demonstrates how to build a **synchronous** Pydantic AI agent on AgentEx with: -- Tool calling (Pydantic AI handles the tool loop internally) -- Streaming token output (including token-by-token tool-call argument streaming) +A minimal **synchronous** Pydantic AI agent that drives the **unified harness +surface** (`UnifiedEmitter.yield_turn` + `PydanticAITurn`) on the sync +(HTTP-yield) channel. -## Key Concepts +## Why this agent exists -### Sync ACP -The sync ACP model uses HTTP request/response for communication. The `@acp.on_message_send` handler receives a message and yields streaming events back to the client. +This agent is the sync coverage for the unified surface: it shows an agent +author wiring the sync channel through `UnifiedEmitter.yield_turn` and getting +automatic span derivation (tool spans nested under the per-turn span) for free, +exactly like the async/temporal channels. -### Pydantic AI Integration -- **Agent**: A single `pydantic_ai.Agent` that owns the model and tools. No graph required — Pydantic AI runs its own tool-call loop until the model is done. -- **`@agent.tool_plain`**: Registers a Python function as a tool. Pydantic AI infers the schema from type hints and docstring. -- **`agent.run_stream_events(...)`**: Yields `AgentStreamEvent`s (PartStartEvent / PartDeltaEvent / PartEndEvent / FunctionToolResultEvent) as the model produces them. +## How it wires the unified surface -### Streaming -The agent streams tokens and tool-call arguments as they're generated using `convert_pydantic_ai_to_agentex_events()`, which adapts Pydantic AI's stream into AgentEx `TaskMessageUpdate` events. Notably, **tool-call arguments stream as `ToolRequestDelta` tokens** rather than arriving as a single complete payload — a richer experience than what OpenAI Agents SDK currently exposes. +In `project/acp.py`: -## Files +```python +emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, +) +async with agent.run_stream_events(user_message) as stream: + turn = PydanticAITurn(stream, model=MODEL_NAME) # coalesce off: stream tool-call arg tokens + async for ev in emitter.yield_turn(turn): + yield ev +``` -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server and message handler | -| `project/agent.py` | Pydantic AI agent + tool registration | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | +- `coalesce_tool_requests=False` (the default) preserves token-by-token + tool-call argument streaming on the sync channel. +- The `UnifiedEmitter` is constructed from the ACP/streaming context + (`task_id` + `trace_id` + `parent_span_id`) so tool spans nest under the + per-turn `AGENT_WORKFLOW` span automatically. -## Running Locally +## Files -```bash -# From this directory -agentex agents run -``` +- `project/acp.py` — sync ACP handler using `emitter.yield_turn(...)`. +- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool. +- `project/tools.py` — `get_weather(city)` returning a constant. +- `tests/test_agent.py` — live integration test (requires a running agent). -## Running Tests +## Tools -```bash -pytest tests/test_agent.py -v -``` +- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string so a + run deterministically exercises text + a tool call + a tool response. -## Notes +## Offline coverage -- Multi-turn conversation memory is not wired in this tutorial. Pydantic AI does not ship a checkpointer like LangGraph; to add memory, load prior messages via `adk.messages.list(task_id=...)` and pass them to `agent.run_stream_events(..., message_history=...)`. -- Reasoning/thinking tokens are not exercised here because `gpt-4o-mini` does not emit `ThinkingPart`s. Swap to a reasoning-capable model (e.g. `openai:o1-mini` via Pydantic AI's appropriate provider) if you want to test that branch end-to-end. +Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake +streaming/tracing, no network) live in the SDK repo under +`tests/lib/core/harness/` (the pydantic-ai sync suite). diff --git a/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml b/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml index 68d3b4a00..9563de39c 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml +++ b/examples/tutorials/00_sync/040_pydantic_ai/manifest.yaml @@ -17,7 +17,7 @@ local_development: agent: acp_type: sync name: s040-pydantic-ai - description: A sync Pydantic AI agent with tool calling and streaming + description: A sync Pydantic AI harness test agent using the unified emitter surface temporal: enabled: false @@ -47,7 +47,7 @@ deployment: global: agent: name: "s040-pydantic-ai" - description: "A sync Pydantic AI agent with tool calling and streaming" + description: "A sync Pydantic AI harness test agent using the unified emitter surface" replicaCount: 1 resources: requests: diff --git a/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py b/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py index 0c096893f..f23cd7960 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py +++ b/examples/tutorials/00_sync/040_pydantic_ai/project/acp.py @@ -1,7 +1,17 @@ -"""ACP (Agent Communication Protocol) handler for Agentex. - -This is the API layer — it owns the agent lifecycle and streams tokens -and tool calls from the Pydantic AI agent to the Agentex frontend. +"""ACP handler for the sync harness Pydantic AI test agent. + +This agent exercises the UNIFIED HARNESS SURFACE on the sync (HTTP-yield) +channel — ``UnifiedEmitter.yield_turn(PydanticAITurn(...))`` — rather than the +bare ``convert_pydantic_ai_to_agentex_events`` converter used by the +``040_pydantic_ai`` tutorial. The unified surface gives the sync channel the +same tracing (span derivation) the async/temporal channels get for free. + +Flow: +1. Open a per-turn AGENT_WORKFLOW span via ``adk.tracing.span``. +2. Construct a ``UnifiedEmitter`` from the ACP/streaming context (task_id + + trace_id + parent_span_id) so tool spans nest under the turn span. +3. Wrap ``agent.run_stream_events(...)`` in a ``PydanticAITurn`` and forward + events with ``emitter.yield_turn(turn)`` — yielding each to the client. """ from __future__ import annotations @@ -14,17 +24,15 @@ load_dotenv() import agentex.lib.adk as adk -from project.agent import create_agent -from agentex.lib.adk import ( - create_pydantic_ai_tracing_handler, - convert_pydantic_ai_to_agentex_events, -) +from project.agent import MODEL_NAME, create_agent from agentex.lib.types.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.types.task_message_update import TaskMessageUpdate from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) @@ -54,7 +62,7 @@ def get_agent(): async def handle_message_send( params: SendMessageParams, ) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: - """Handle incoming messages from Agentex, streaming tokens and tool calls.""" + """Handle incoming messages, streaming events through the unified surface.""" agent = get_agent() task_id = params.task.id @@ -68,11 +76,17 @@ async def handle_message_send( input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - tracing_handler = create_pydantic_ai_tracing_handler( + # Construct the UnifiedEmitter from the ACP/streaming context so tracing + # is automatic: tool spans nest under this turn's span. + emitter = UnifiedEmitter( + task_id=task_id, trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, - task_id=task_id, ) + async with agent.run_stream_events(user_message) as stream: - async for event in convert_pydantic_ai_to_agentex_events(stream, tracing_handler=tracing_handler): - yield event + # PydanticAITurn preserves token-by-token tool-call argument + # streaming (Start+Delta+Done) on the sync/HTTP channel. + turn = PydanticAITurn(stream, model=MODEL_NAME) + async for ev in emitter.yield_turn(turn): + yield ev diff --git a/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py b/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py index 2c0f6f10c..72fd74173 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py +++ b/examples/tutorials/00_sync/040_pydantic_ai/project/agent.py @@ -1,4 +1,4 @@ -"""Pydantic AI agent definition. +"""Pydantic AI agent definition for the sync harness test agent. The Agent is the boundary between this module and the API layer (acp.py). Pydantic AI handles its own tool-call loop internally — no graph required. @@ -12,6 +12,8 @@ from project.tools import get_weather +__all__ = ["create_agent", "MODEL_NAME"] + MODEL_NAME = "openai:gpt-4o-mini" SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. @@ -29,9 +31,7 @@ def create_agent() -> Agent: """Build and return the Pydantic AI agent with tools registered.""" agent = Agent( MODEL_NAME, - system_prompt=SYSTEM_PROMPT.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ), + system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), ) agent.tool_plain(get_weather) diff --git a/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py b/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py index bab87942a..d649c75f1 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py +++ b/examples/tutorials/00_sync/040_pydantic_ai/project/tools.py @@ -1,8 +1,8 @@ -"""Tool definitions for the Pydantic AI agent. +"""Tool definitions for the sync harness Pydantic AI agent. Pydantic AI tools are registered directly on the Agent via decorators -(see project.agent). This module hosts the bare functions so they're -easy to unit-test in isolation. +(see project.agent). This module hosts the bare function so it is easy to +unit-test in isolation. """ from __future__ import annotations diff --git a/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml b/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml index 3e645fa15..748a9f3cb 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml +++ b/examples/tutorials/00_sync/040_pydantic_ai/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "s040-pydantic-ai" version = "0.1.0" -description = "A sync Pydantic AI agent with tool calling and streaming" +description = "A sync Pydantic AI harness test agent using the unified emitter surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py b/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py index d3deed1c7..4aad12a56 100644 --- a/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py +++ b/examples/tutorials/00_sync/040_pydantic_ai/tests/test_agent.py @@ -1,8 +1,10 @@ -"""Tests for the sync Pydantic AI agent. +"""Live tests for the sync Pydantic AI agent. -This test suite validates: -- Non-streaming message sending with tool-calling Pydantic AI agent -- Streaming message sending with token-by-token output +These tests require a running agent (server + deployed agent) and exercise the +unified-surface sync handler end-to-end over the wire. + +Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives +in the SDK repo under ``tests/lib/core/harness/`` (the pydantic-ai sync suite). To run these tests: 1. Make sure the agent is running (via docker-compose or `agentex agents run`) @@ -50,7 +52,7 @@ def agent_id(client, agent_name): class TestNonStreamingMessages: - """Test non-streaming message sending with Pydantic AI agent.""" + """Test non-streaming message sending with the unified-surface sync agent.""" def test_send_simple_message(self, client: Agentex, agent_name: str): """Test sending a simple message and receiving a response.""" @@ -86,7 +88,7 @@ def test_tool_calling(self, client: Agentex, agent_name: str): class TestStreamingMessages: - """Test streaming message sending with Pydantic AI agent.""" + """Test streaming message sending through the unified yield_turn path.""" def test_stream_simple_message(self, client: Agentex, agent_name: str): """Test streaming a simple message response.""" @@ -107,10 +109,10 @@ def test_stream_simple_message(self, client: Agentex, agent_name: str): assert len(chunks) > 1, "No chunks received in streaming response." def test_stream_tool_calling(self, client: Agentex, agent_name: str): - """Test streaming with tool calls. + """Test streaming with tool calls through the unified surface. - This exercises the headline Pydantic AI converter feature: - tool-call argument tokens streaming through as ToolRequestDelta. + Exercises token-by-token tool-call argument streaming (coalesce off), + which the unified yield_turn path preserves on the sync channel. """ stream = client.agents.send_message_stream( agent_name=agent_name, diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/.dockerignore b/examples/tutorials/00_sync/050_openai_agents/.dockerignore similarity index 100% rename from examples/tutorials/00_sync/050_openai_agents_local_sandbox/.dockerignore rename to examples/tutorials/00_sync/050_openai_agents/.dockerignore diff --git a/examples/tutorials/00_sync/harness_langgraph/Dockerfile b/examples/tutorials/00_sync/050_openai_agents/Dockerfile similarity index 73% rename from examples/tutorials/00_sync/harness_langgraph/Dockerfile rename to examples/tutorials/00_sync/050_openai_agents/Dockerfile index 9d492198f..c9ccd6f54 100644 --- a/examples/tutorials/00_sync/harness_langgraph/Dockerfile +++ b/examples/tutorials/00_sync/050_openai_agents/Dockerfile @@ -23,16 +23,16 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 # Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/harness_langgraph/pyproject.toml /app/harness_langgraph/pyproject.toml -COPY 00_sync/harness_langgraph/README.md /app/harness_langgraph/README.md +COPY 00_sync/050_openai_agents/pyproject.toml /app/050_openai_agents/pyproject.toml +COPY 00_sync/050_openai_agents/README.md /app/050_openai_agents/README.md -WORKDIR /app/harness_langgraph +WORKDIR /app/050_openai_agents # Copy the project code -COPY 00_sync/harness_langgraph/project /app/harness_langgraph/project +COPY 00_sync/050_openai_agents/project /app/050_openai_agents/project # Copy the test files -COPY 00_sync/harness_langgraph/tests /app/harness_langgraph/tests +COPY 00_sync/050_openai_agents/tests /app/050_openai_agents/tests # Copy shared test utilities COPY test_utils /app/test_utils @@ -44,7 +44,7 @@ RUN uv pip install --system .[dev] ENV PYTHONPATH=/app # Set test environment variables -ENV AGENT_NAME=s-harness-langgraph +ENV AGENT_NAME=s050-openai-agents # Run the agent using uvicorn CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/060_harness_openai/README.md b/examples/tutorials/00_sync/050_openai_agents/README.md similarity index 85% rename from examples/tutorials/00_sync/060_harness_openai/README.md rename to examples/tutorials/00_sync/050_openai_agents/README.md index e22e9aa8b..98cec3f9a 100644 --- a/examples/tutorials/00_sync/060_harness_openai/README.md +++ b/examples/tutorials/00_sync/050_openai_agents/README.md @@ -9,8 +9,8 @@ The OpenAI Agents SDK produces native streaming events. This tutorial wraps a `Runner.run_streamed` result in an `OpenAITurn` — the provider -> canonical `StreamTaskMessage*` adapter — and forwards the canonical stream to the frontend via `UnifiedEmitter.yield_turn`. The same `OpenAITurn` flows unchanged through -`auto_send_turn` in the async (`130_harness_openai`) and temporal -(`140_harness_openai`) variants; only the delivery method differs. +`auto_send_turn` in the async (`10_async/00_base/120_openai_agents`) and temporal +(`10_async/10_temporal/120_openai_agents`) variants; only the delivery method differs. ```python result = Runner.run_streamed(starting_agent=agent, input=user_message) diff --git a/examples/tutorials/00_sync/060_harness_openai/manifest.yaml b/examples/tutorials/00_sync/050_openai_agents/manifest.yaml similarity index 84% rename from examples/tutorials/00_sync/060_harness_openai/manifest.yaml rename to examples/tutorials/00_sync/050_openai_agents/manifest.yaml index 4967c1f8d..bdb47e8d8 100644 --- a/examples/tutorials/00_sync/060_harness_openai/manifest.yaml +++ b/examples/tutorials/00_sync/050_openai_agents/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../ include_paths: - - 00_sync/060_harness_openai + - 00_sync/050_openai_agents - test_utils - dockerfile: 00_sync/060_harness_openai/Dockerfile - dockerignore: 00_sync/060_harness_openai/.dockerignore + dockerfile: 00_sync/050_openai_agents/Dockerfile + dockerignore: 00_sync/050_openai_agents/.dockerignore local_development: agent: @@ -16,7 +16,7 @@ local_development: agent: acp_type: sync - name: s060-harness-openai + name: s050-openai-agents description: A sync OpenAI Agents SDK agent on the unified harness surface temporal: @@ -46,7 +46,7 @@ deployment: global: agent: - name: "s060-harness-openai" + name: "s050-openai-agents" description: "A sync OpenAI Agents SDK agent on the unified harness surface" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/__init__.py b/examples/tutorials/00_sync/050_openai_agents/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/__init__.py rename to examples/tutorials/00_sync/050_openai_agents/project/__init__.py diff --git a/examples/tutorials/00_sync/060_harness_openai/project/acp.py b/examples/tutorials/00_sync/050_openai_agents/project/acp.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/project/acp.py rename to examples/tutorials/00_sync/050_openai_agents/project/acp.py diff --git a/examples/tutorials/00_sync/060_harness_openai/project/agent.py b/examples/tutorials/00_sync/050_openai_agents/project/agent.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/project/agent.py rename to examples/tutorials/00_sync/050_openai_agents/project/agent.py diff --git a/examples/tutorials/00_sync/060_harness_openai/project/tools.py b/examples/tutorials/00_sync/050_openai_agents/project/tools.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/project/tools.py rename to examples/tutorials/00_sync/050_openai_agents/project/tools.py diff --git a/examples/tutorials/00_sync/060_harness_openai/pyproject.toml b/examples/tutorials/00_sync/050_openai_agents/pyproject.toml similarity index 95% rename from examples/tutorials/00_sync/060_harness_openai/pyproject.toml rename to examples/tutorials/00_sync/050_openai_agents/pyproject.toml index 39cceb8f2..48d2481dd 100644 --- a/examples/tutorials/00_sync/060_harness_openai/pyproject.toml +++ b/examples/tutorials/00_sync/050_openai_agents/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "s060-harness-openai" +name = "s050-openai-agents" version = "0.1.0" description = "A sync OpenAI Agents SDK agent on the unified harness surface" readme = "README.md" diff --git a/examples/tutorials/00_sync/060_harness_openai/tests/test_agent.py b/examples/tutorials/00_sync/050_openai_agents/tests/test_agent.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/tests/test_agent.py rename to examples/tutorials/00_sync/050_openai_agents/tests/test_agent.py diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile deleted file mode 100644 index 8e0ec22df..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/050_openai_agents_local_sandbox/pyproject.toml /app/050_openai_agents_local_sandbox/pyproject.toml -COPY 00_sync/050_openai_agents_local_sandbox/README.md /app/050_openai_agents_local_sandbox/README.md - -WORKDIR /app/050_openai_agents_local_sandbox - -# Copy the project code -COPY 00_sync/050_openai_agents_local_sandbox/project /app/050_openai_agents_local_sandbox/project - -# Copy the test files -COPY 00_sync/050_openai_agents_local_sandbox/tests /app/050_openai_agents_local_sandbox/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=s050-openai-agents-local-sandbox - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md deleted file mode 100644 index 9c2c81d7d..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/README.md +++ /dev/null @@ -1,113 +0,0 @@ -# Tutorial 050: Sync OpenAI Agents SDK with a Local Sandbox - -This tutorial demonstrates how to build a **synchronous** agent on AgentEx using the -[OpenAI Agents SDK](https://developers.openai.com/api/docs/guides/agents) and its -**sandbox** runtime, running with the **local** (`unix_local`) backend. - -The agent is a "local sandbox assistant": it answers questions by actually running -real shell commands (e.g. `python3 --version`, `ls /tmp`, `python3 -c "..."`) -instead of guessing. - -## Key Concepts - -### Sync ACP -The sync ACP model uses HTTP request/response for communication. The -`@acp.on_message_send` handler receives a message, runs the agent, and returns the -agent's final answer as a `TextContent`. - -### OpenAI Agents SDK Sandbox -The OpenAI Agents SDK ships `agents.sandbox`, which lets you give an agent -**capabilities** (instead of hand-written tools) that the runtime turns into real -tools backed by a sandbox: - -- **`SandboxAgent`**: an `Agent` that is granted sandbox capabilities. -- **Capabilities** (`from agents.sandbox.capabilities import Shell, Filesystem, Memory`): - each capability expands into a set of real tools. This tutorial uses `Shell`, which - lets the model run real shell commands. -- **`SandboxRunConfig`** + a sandbox **client**: tells the runtime *where* the tools - actually execute. - -### The LOCAL sandbox (`UnixLocalSandboxClient`) -This tutorial uses the local backend -(`from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient, UnixLocalSandboxClientOptions`), -`backend_id="unix_local"`. The local sandbox runs shell commands **ON THE HOST** — -the agent's own container/process. There is **no Docker, no Temporal, and no remote -sandbox infrastructure** involved. This makes it the simplest way to give an agent a -real shell. - -The sandbox is wired up through the SDK's `RunConfig`: - -```python -from agents import Runner, set_tracing_disabled -from agents.run_config import RunConfig -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.sandbox.capabilities import Shell -from agents.sandbox.sandboxes.unix_local import ( - UnixLocalSandboxClient, - UnixLocalSandboxClientOptions, -) - -set_tracing_disabled(True) # avoid api.openai.com tracing 401 behind a gateway - -agent = SandboxAgent( - name="Local Sandbox Assistant", - instructions="...use the shell tools to actually run commands...", - capabilities=[Shell()], -) -run_config = RunConfig( - sandbox=SandboxRunConfig( - client=UnixLocalSandboxClient(), - options=UnixLocalSandboxClientOptions(), - ) -) -result = await Runner.run(agent, input="what's the python version?", run_config=run_config) -print(result.final_output) -``` - -`Runner.run` drives the full tool-call loop internally: the model issues shell -commands, the local sandbox runs them on the host, the output is fed back, and the -loop continues until the model produces a final answer. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server and message handler (runs the sandbox agent) | -| `project/agent.py` | `SandboxAgent` + `RunConfig(sandbox=...)` wiring + `run_agent` | -| `project/tools.py` | Sandbox capability factory (`Shell`) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | - -## Running Locally - -```bash -# From this directory -agentex agents run -``` - -Set `OPENAI_API_KEY` (or `LITELLM_API_KEY` if you're behind the Scale LiteLLM -gateway) in your environment or in a `.env` file in `project/` so the agent can call -the model. - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` - -## Notes - -- **No infra required.** Because this uses the `unix_local` backend, the shell tools - run directly in the agent's process — no Docker daemon, no Temporal, no remote - sandbox. Swap the client for a remote/containerized backend to isolate execution. -- **Tracing.** `set_tracing_disabled(True)` turns off the OpenAI Agents SDK's native - tracer (which would otherwise try to ship traces to `api.openai.com`). The manifest - also sets `OPENAI_AGENTS_DISABLE_TRACING=1`. AgentEx/SGP tracing still runs via the - tracing manager configured in `acp.py` when SGP credentials are present. -- **Capabilities are the tools.** To let the agent do more, add capabilities in - `project/tools.py` (e.g. `Filesystem()`, `Memory()`). - -## Further Reading - -- OpenAI Agents SDK guide: https://developers.openai.com/api/docs/guides/agents -- The next evolution of the Agents SDK: https://openai.com/index/the-next-evolution-of-the-agents-sdk/ diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml deleted file mode 100644 index 8ae5b98a1..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/manifest.yaml +++ /dev/null @@ -1,61 +0,0 @@ -build: - context: - root: ../../ - include_paths: - - 00_sync/050_openai_agents_local_sandbox - - test_utils - dockerfile: 00_sync/050_openai_agents_local_sandbox/Dockerfile - dockerignore: 00_sync/050_openai_agents_local_sandbox/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: sync - name: s050-openai-agents-local-sandbox - description: A sync OpenAI Agents SDK agent using a local (unix_local) sandbox - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - - env: - OPENAI_AGENTS_DISABLE_TRACING: "1" - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "s050-openai-agents-local-sandbox" - description: "A sync OpenAI Agents SDK agent using a local (unix_local) sandbox" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py deleted file mode 100644 index 005d679bf..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/acp.py +++ /dev/null @@ -1,77 +0,0 @@ -"""ACP (Agent Communication Protocol) handler for Agentex. - -This is the API layer — it owns the agent lifecycle and runs the OpenAI Agents -SDK *sandbox* agent for each incoming message, returning the agent's final -answer to the Agentex frontend. - -The agent uses the LOCAL sandbox backend (``UnixLocalSandboxClient``), which runs -shell commands on the host (this process/container). The OpenAI Agents SDK runs -its tool-call loop internally via ``Runner.run`` and returns the final output, so -this sync handler returns a single ``TextContent`` rather than streaming tokens. -""" - -from __future__ import annotations - -import os - -from dotenv import load_dotenv - -load_dotenv() - -from agentex.lib import adk -from project.agent import run_agent -from agentex.lib.types.acp import SendMessageParams -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.types.task_message_content import TaskMessageContent -from agentex.lib.core.tracing.tracing_processor_manager import ( - add_tracing_processor_config, -) - -logger = make_logger(__name__) - -# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client -# compatibility, so the same example works behind the Scale LiteLLM gateway. -_litellm_key = os.environ.get("LITELLM_API_KEY") -if _litellm_key and not os.environ.get("OPENAI_API_KEY"): - os.environ["OPENAI_API_KEY"] = _litellm_key - -SGP_API_KEY = os.environ.get("SGP_API_KEY", "") -SGP_ACCOUNT_ID = os.environ.get("SGP_ACCOUNT_ID", "") -SGP_CLIENT_BASE_URL = os.environ.get("SGP_CLIENT_BASE_URL", "") - -if SGP_API_KEY and SGP_ACCOUNT_ID: - add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=SGP_API_KEY, - sgp_account_id=SGP_ACCOUNT_ID, - sgp_base_url=SGP_CLIENT_BASE_URL, - ) - ) - -acp = FastACP.create(acp_type="sync") - - -@acp.on_message_send -async def handle_message_send( - params: SendMessageParams, -) -> TaskMessageContent: - """Handle incoming messages by running the local-sandbox agent.""" - task_id = params.task.id - user_message = params.content.content - logger.info(f"Processing message for task {task_id}") - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name="message", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - final_output = await run_agent(user_message) - if turn_span: - turn_span.output = {"final_output": final_output} - - return TextContent(author="agent", content=final_output) diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py deleted file mode 100644 index d674d14c9..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/agent.py +++ /dev/null @@ -1,92 +0,0 @@ -"""OpenAI Agents SDK local-sandbox agent definition. - -This mirrors the Pydantic AI tutorial (040): the agent is the boundary between -this module and the API layer (acp.py). The difference is the runtime — here we -use the OpenAI Agents SDK ``SandboxAgent`` together with the **local** sandbox -backend (``UnixLocalSandboxClient``). - -The local sandbox runs shell commands ON THE HOST — the agent's own -container/process. There is no Docker, no Temporal, and no remote sandbox -infrastructure. The OpenAI Agents SDK runs its own tool-call loop internally: -when the model decides to run a shell command, the sandbox executes it locally -and feeds the output back to the model until it produces a final answer. -""" - -from __future__ import annotations - -from datetime import datetime - -from agents import Runner, set_tracing_disabled -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.run_config import RunConfig -from agents.sandbox.sandboxes.unix_local import ( - UnixLocalSandboxClient, - UnixLocalSandboxClientOptions, -) - -from project.tools import get_capabilities - -# Disable the openai-agents SDK's native tracer so it doesn't ship traces to -# api.openai.com using OPENAI_API_KEY (which may be a gateway/proxy key and would -# 401). Agentex tracing still runs via the tracing manager configured in acp.py. -set_tracing_disabled(True) - -MODEL_NAME = "gpt-4o-mini" -INSTRUCTIONS = """You are a local sandbox assistant. - -Current date and time: {timestamp} - -You have access to shell tools that run real commands on the local machine. - -Guidelines: -- ALWAYS use the shell tools to actually run commands — never guess or make up - output. If the user asks for the Python version, run `python3 --version`. If - they ask to list files, run `ls`. If they ask you to compute something, use - `python3 -c "..."`. -- Run the minimal command(s) needed to answer the question. -- Report the real command output back to the user, concisely. -""" - - -def create_agent() -> SandboxAgent: - """Build and return the OpenAI Agents SDK sandbox agent. - - The agent is granted shell capabilities (see ``project.tools``). The actual - sandbox backend (where the shell commands run) is supplied at run time via - the ``RunConfig`` returned by ``create_run_config``. - """ - return SandboxAgent( - name="Local Sandbox Assistant", - model=MODEL_NAME, - instructions=INSTRUCTIONS.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ), - capabilities=get_capabilities(), - ) - - -def create_run_config() -> RunConfig: - """Build the RunConfig that points the agent at the LOCAL sandbox backend. - - ``UnixLocalSandboxClient`` (backend_id="unix_local") runs shell commands on - the host — the agent's own process — so no Docker or remote infra is needed. - """ - return RunConfig( - sandbox=SandboxRunConfig( - client=UnixLocalSandboxClient(), - options=UnixLocalSandboxClientOptions(), - ) - ) - - -async def run_agent(user_message: str) -> str: - """Run the sandbox agent on a single user message and return the final text. - - The OpenAI Agents SDK handles the full tool-call loop internally: the model - issues shell commands, the local sandbox runs them on the host, and the - output is fed back until the model produces a final answer. - """ - agent = create_agent() - run_config = create_run_config() - result = await Runner.run(agent, input=user_message, run_config=run_config, max_turns=10) - return result.final_output diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py deleted file mode 100644 index 0ad8f25ac..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/project/tools.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Sandbox capabilities for the OpenAI Agents SDK local-sandbox agent. - -Unlike the Pydantic AI tutorial (040), this agent does not register hand-written -Python functions as tools. Instead it is given *capabilities* — the OpenAI Agents -SDK sandbox runtime turns each capability into a real set of tools (run a shell -command, read a file, etc.) backed by an actual sandbox backend. - -Here we use the ``Shell`` capability, which lets the model run real shell commands. -With the local (``unix_local``) backend those commands execute ON THE HOST — the -agent's own process/container — so there is no Docker, Temporal, or remote infra -involved. This module hosts the capability factory so the agent wiring in -``project.agent`` stays readable and the capability set is easy to extend -(e.g. add ``Filesystem()`` or ``Memory()``). -""" - -from __future__ import annotations - -from agents.sandbox.capabilities import Shell - - -def get_capabilities() -> list: - """Return the sandbox capabilities the agent is allowed to use. - - Returns: - A list of OpenAI Agents SDK sandbox capabilities. We grant ``Shell`` so - the agent can run real shell commands on the local machine. Add - ``Filesystem()`` or ``Memory()`` here to expand what the agent can do. - """ - return [Shell()] diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml deleted file mode 100644 index 472a6bef7..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "s050-openai-agents-local-sandbox" -version = "0.1.0" -description = "A sync OpenAI Agents SDK agent using a local (unix_local) sandbox" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "openai-agents>=0.14.3,<0.15", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py b/examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py deleted file mode 100644 index 52ed1bf2f..000000000 --- a/examples/tutorials/00_sync/050_openai_agents_local_sandbox/tests/test_agent.py +++ /dev/null @@ -1,148 +0,0 @@ -"""Tests for the sync OpenAI Agents SDK local-sandbox agent. - -This test suite validates: -- Sending a message that requires the agent to actually run a shell command in - the LOCAL sandbox (unix_local backend) and receiving a non-empty response. - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: s050-openai-agents-local-sandbox) -""" - -import os - -import pytest -from test_utils.sync import validate_text_in_string - -from agentex import Agentex -from agentex.types import TextContentParam -from agentex.types.agent_rpc_params import ParamsSendMessageRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "s050-openai-agents-local-sandbox") - - -@pytest.fixture -def client(): - """Create an AgentEx client instance for testing.""" - return Agentex(base_url=AGENTEX_API_BASE_URL) - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest.fixture -def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -def _response_text(result) -> str: - """Flatten a send_message result into a single string for assertions. - - Result items may be a bare string, a ``TextContent`` (``.content`` is the - string), or a ``TaskMessage`` wrapping a ``TextContent`` (``.content`` is the - ``TextContent``, whose ``.content`` is the string). Dig through ``.content`` - until we reach a string. - """ - - def _text_of(obj, _depth: int = 0) -> str: - if isinstance(obj, str): - return obj - if _depth > 5: - return "" - inner = getattr(obj, "content", None) - if inner is None: - return "" - return _text_of(inner, _depth + 1) - - parts = [t for t in (_text_of(item) for item in result) if t] - return "\n".join(parts) - - -class TestLocalSandboxMessages: - """Test the local-sandbox OpenAI Agents SDK agent.""" - - def test_send_simple_message(self, client: Agentex, agent_name: str): - """Test sending a simple message and receiving a response.""" - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Hello! What can you help me with?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - def test_shell_python_version(self, client: Agentex, agent_name: str): - """Test that the agent uses its shell to run a real command. - - We ask it to print the Python version. The agent should run - `python3 --version` in the local sandbox and report the real output, - which always starts with "Python 3". - """ - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content=( - "Use your shell to print the Python version on this " - "machine, then tell me what it is." - ), - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - text = _response_text(result) - assert text, "Expected a non-empty response from the sandbox agent." - # The sandbox runs on Python 3.12, so the real output contains "Python 3". - validate_text_in_string("Python 3", text) - - def test_shell_compute(self, client: Agentex, agent_name: str): - """Test that the agent uses python3 in the sandbox to compute a value.""" - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content=( - "Use python3 in your shell to compute 21 * 2 and tell me " - "the result." - ), - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - text = _response_text(result) - assert text, "Expected a non-empty response from the sandbox agent." - validate_text_in_string("42", text) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/00_sync/060_harness_openai/Dockerfile b/examples/tutorials/00_sync/060_harness_openai/Dockerfile deleted file mode 100644 index 1bd4f4860..000000000 --- a/examples/tutorials/00_sync/060_harness_openai/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/060_harness_openai/pyproject.toml /app/060_harness_openai/pyproject.toml -COPY 00_sync/060_harness_openai/README.md /app/060_harness_openai/README.md - -WORKDIR /app/060_harness_openai - -# Copy the project code -COPY 00_sync/060_harness_openai/project /app/060_harness_openai/project - -# Copy the test files -COPY 00_sync/060_harness_openai/tests /app/060_harness_openai/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=s060-harness-openai - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/060_harness_openai/.dockerignore b/examples/tutorials/00_sync/070_codex/.dockerignore similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/.dockerignore rename to examples/tutorials/00_sync/070_codex/.dockerignore diff --git a/examples/tutorials/00_sync/harness_codex/Dockerfile b/examples/tutorials/00_sync/070_codex/Dockerfile similarity index 74% rename from examples/tutorials/00_sync/harness_codex/Dockerfile rename to examples/tutorials/00_sync/070_codex/Dockerfile index 72713b95d..75abf677d 100644 --- a/examples/tutorials/00_sync/harness_codex/Dockerfile +++ b/examples/tutorials/00_sync/070_codex/Dockerfile @@ -23,16 +23,16 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 # Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/harness_codex/pyproject.toml /app/harness_codex/pyproject.toml -COPY 00_sync/harness_codex/README.md /app/harness_codex/README.md +COPY 00_sync/070_codex/pyproject.toml /app/070_codex/pyproject.toml +COPY 00_sync/070_codex/README.md /app/070_codex/README.md -WORKDIR /app/harness_codex +WORKDIR /app/070_codex # Copy the project code -COPY 00_sync/harness_codex/project /app/harness_codex/project +COPY 00_sync/070_codex/project /app/070_codex/project # Copy the test files -COPY 00_sync/harness_codex/tests /app/harness_codex/tests +COPY 00_sync/070_codex/tests /app/070_codex/tests # Copy shared test utilities COPY test_utils /app/test_utils @@ -44,7 +44,7 @@ RUN uv pip install --system .[dev] ENV PYTHONPATH=/app # Set test environment variables -ENV AGENT_NAME=s-harness-codex +ENV AGENT_NAME=s070-codex # Run the agent using uvicorn CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/harness_codex/README.md b/examples/tutorials/00_sync/070_codex/README.md similarity index 95% rename from examples/tutorials/00_sync/harness_codex/README.md rename to examples/tutorials/00_sync/070_codex/README.md index 5f3396cfa..3abb2766f 100644 --- a/examples/tutorials/00_sync/harness_codex/README.md +++ b/examples/tutorials/00_sync/070_codex/README.md @@ -1,4 +1,4 @@ -# harness_codex (sync) +# 070_codex (sync) Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap, `CodexTurn`, and `UnifiedEmitter` for a **sync** (HTTP-yield) ACP agent. @@ -27,7 +27,7 @@ The offline tests inject a fake subprocess and never invoke the real CLI: ```bash cd /path/to/scale-agentex-python -uv run --all-packages --all-extras pytest examples/tutorials/00_sync/harness_codex/tests/test_agent.py -q +uv run --all-packages --all-extras pytest examples/tutorials/00_sync/070_codex/tests/test_agent.py -q ``` ## Running live integration tests diff --git a/examples/tutorials/00_sync/harness_codex/conftest.py b/examples/tutorials/00_sync/070_codex/conftest.py similarity index 100% rename from examples/tutorials/00_sync/harness_codex/conftest.py rename to examples/tutorials/00_sync/070_codex/conftest.py diff --git a/examples/tutorials/00_sync/harness_codex/manifest.yaml b/examples/tutorials/00_sync/070_codex/manifest.yaml similarity index 86% rename from examples/tutorials/00_sync/harness_codex/manifest.yaml rename to examples/tutorials/00_sync/070_codex/manifest.yaml index 52943f8f2..87dad2847 100644 --- a/examples/tutorials/00_sync/harness_codex/manifest.yaml +++ b/examples/tutorials/00_sync/070_codex/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../ include_paths: - - 00_sync/harness_codex + - 00_sync/070_codex - test_utils - dockerfile: 00_sync/harness_codex/Dockerfile - dockerignore: 00_sync/harness_codex/.dockerignore + dockerfile: 00_sync/070_codex/Dockerfile + dockerignore: 00_sync/070_codex/.dockerignore local_development: agent: @@ -16,7 +16,7 @@ local_development: agent: acp_type: sync - name: s-harness-codex + name: s070-codex description: Sync tutorial agent driving the unified harness surface via local codex CLI subprocess temporal: @@ -46,7 +46,7 @@ deployment: global: agent: - name: "s-harness-codex" + name: "s070-codex" description: "Sync tutorial agent driving the unified harness surface via local codex CLI subprocess" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/060_harness_openai/project/__init__.py b/examples/tutorials/00_sync/070_codex/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/060_harness_openai/project/__init__.py rename to examples/tutorials/00_sync/070_codex/project/__init__.py diff --git a/examples/tutorials/00_sync/harness_codex/project/acp.py b/examples/tutorials/00_sync/070_codex/project/acp.py similarity index 100% rename from examples/tutorials/00_sync/harness_codex/project/acp.py rename to examples/tutorials/00_sync/070_codex/project/acp.py diff --git a/examples/tutorials/00_sync/harness_codex/pyproject.toml b/examples/tutorials/00_sync/070_codex/pyproject.toml similarity index 96% rename from examples/tutorials/00_sync/harness_codex/pyproject.toml rename to examples/tutorials/00_sync/070_codex/pyproject.toml index ca7d8ac18..88bbb9cca 100644 --- a/examples/tutorials/00_sync/harness_codex/pyproject.toml +++ b/examples/tutorials/00_sync/070_codex/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "s-harness-codex" +name = "s070-codex" version = "0.1.0" description = "Sync tutorial agent driving the unified harness surface via local codex CLI subprocess" readme = "README.md" diff --git a/examples/tutorials/00_sync/harness_codex/tests/test_agent.py b/examples/tutorials/00_sync/070_codex/tests/test_agent.py similarity index 99% rename from examples/tutorials/00_sync/harness_codex/tests/test_agent.py rename to examples/tutorials/00_sync/070_codex/tests/test_agent.py index b2d5b6498..94aa2aaf2 100644 --- a/examples/tutorials/00_sync/harness_codex/tests/test_agent.py +++ b/examples/tutorials/00_sync/070_codex/tests/test_agent.py @@ -145,7 +145,7 @@ async def test_on_result_callback_receives_session_id(self): LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1" AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "s-harness-codex") +AGENT_NAME = os.environ.get("AGENT_NAME", "s070-codex") @pytest.mark.skipif(not LIVE, reason="Set CODEX_LIVE_TESTS=1 and ensure codex CLI + OPENAI_API_KEY are available") diff --git a/examples/tutorials/00_sync/harness_langgraph/README.md b/examples/tutorials/00_sync/harness_langgraph/README.md deleted file mode 100644 index 86367f162..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/README.md +++ /dev/null @@ -1,55 +0,0 @@ -# Tutorial: Sync Harness LangGraph Agent - -This tutorial demonstrates how to build a **synchronous** LangGraph agent on AgentEx -using the **unified harness surface**: - -```python -turn = LangGraphTurn(stream, model=None) -emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...) -async for event in emitter.yield_turn(turn): - yield event -``` - -Compare with ``030_langgraph``, which uses the bespoke -``convert_langgraph_to_agentex_events`` helper directly. - -## Key Concepts - -### Unified Harness - -`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw -LangGraph `astream()` generator and exposes `events` (an async generator of -`TaskMessageUpdate`) and `usage()` (token counts captured from the final -`AIMessage`). - -`UnifiedEmitter.yield_turn(turn)` iterates the turn's events and yields them -to the sync ACP handler unchanged. The same `LangGraphTurn` object can also be -passed to `UnifiedEmitter.auto_send_turn` in the async/temporal channels. - -### AGX1-377 Note - -LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates" -node outputs). The `SpanDeriver` does not open tool spans from Full events -today; that gap is tracked in AGX1-373. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server using unified harness (LangGraphTurn + yield_turn) | -| `project/graph.py` | LangGraph state graph (identical to 030_langgraph) | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration (name: s-harness-langgraph) | - -## Running Locally - -```bash -agentex agents run -``` - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` diff --git a/examples/tutorials/00_sync/harness_langgraph/manifest.yaml b/examples/tutorials/00_sync/harness_langgraph/manifest.yaml deleted file mode 100644 index 1f57678f2..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/manifest.yaml +++ /dev/null @@ -1,58 +0,0 @@ -build: - context: - root: ../../ - include_paths: - - 00_sync/harness_langgraph - - test_utils - dockerfile: 00_sync/harness_langgraph/Dockerfile - dockerignore: 00_sync/harness_langgraph/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: sync - name: s-harness-langgraph - description: A sync LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn) - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "s-harness-langgraph" - description: "A sync LangGraph agent using the unified harness surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/00_sync/harness_langgraph/project/acp.py b/examples/tutorials/00_sync/harness_langgraph/project/acp.py deleted file mode 100644 index f609f1682..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/project/acp.py +++ /dev/null @@ -1,107 +0,0 @@ -"""ACP handler for sync harness LangGraph agent. - -Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph -``astream()`` generator, and ``UnifiedEmitter.yield_turn`` converts it into -the AgentEx ``TaskMessageUpdate`` event stream expected by the sync ACP. - -Differences from ``030_langgraph`` (bespoke path): -- No ``create_langgraph_tracing_handler`` boilerplate. -- No manual text-delta accumulation for the span output. -- Tool calls are emitted as ``StreamTaskMessageFull`` (not Start+Delta+Done) - via the same code path as the async/temporal channels. -- Usage data (token counts) is captured on the ``LangGraphTurn`` object and - can be read after the turn completes. - -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` -events (from "updates"). The ``SpanDeriver`` does not open tool spans from -Full events today; that gap is tracked in AGX1-373. -""" - -from __future__ import annotations - -import os -from typing import AsyncGenerator - -from dotenv import load_dotenv - -load_dotenv() - -import agentex.lib.adk as adk -from project.graph import create_graph -from agentex.lib.types.acp import SendMessageParams -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.core.harness.emitter import UnifiedEmitter -from agentex.types.task_message_delta import TextDelta -from agentex.types.task_message_update import TaskMessageUpdate -from agentex.types.task_message_content import TaskMessageContent -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create(acp_type="sync") - -_graph = None - - -async def get_graph(): - """Get or create the compiled graph instance.""" - global _graph - if _graph is None: - _graph = await create_graph() - return _graph - - -@acp.on_message_send -async def handle_message_send( - params: SendMessageParams, -) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: - """Handle incoming messages, streaming tokens and tool calls via unified harness.""" - graph = await get_graph() - - task_id = params.task.id - user_message = params.content.content - - logger.info(f"Processing message for task {task_id}") - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name="message", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - stream = graph.astream( - {"messages": [{"role": "user", "content": user_message}]}, - config={"configurable": {"thread_id": task_id}}, - stream_mode=["messages", "updates"], - ) - - turn = LangGraphTurn(stream, model=None) - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - - final_text = "" - async for event in emitter.yield_turn(turn): - # Accumulate text deltas so the span's final_output is the assistant - # text (matching the async tutorial), not the usage metrics. - delta = getattr(event, "delta", None) - if isinstance(delta, TextDelta) and delta.text_delta: - final_text += delta.text_delta - yield event - - if turn_span: - turn_span.output = {"final_output": final_text, "usage": turn.usage().model_dump()} diff --git a/examples/tutorials/00_sync/harness_langgraph/project/graph.py b/examples/tutorials/00_sync/harness_langgraph/project/graph.py deleted file mode 100644 index 4516087d2..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/project/graph.py +++ /dev/null @@ -1,67 +0,0 @@ -"""LangGraph graph definition for the harness_langgraph sync agent. - -Identical to ``030_langgraph/project/graph.py`` — the graph definition is not -affected by the harness migration. Only ``acp.py`` changes. -""" - -from __future__ import annotations - -from typing import Any, Annotated -from datetime import datetime -from typing_extensions import TypedDict - -from langgraph.graph import START, StateGraph -from langchain_openai import ChatOpenAI -from langgraph.prebuilt import ToolNode, tools_condition -from langchain_core.messages import SystemMessage -from langgraph.graph.message import add_messages - -from project.tools import TOOLS -from agentex.lib.adk import create_checkpointer - -MODEL_NAME = "gpt-5" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -class AgentState(TypedDict): - """State schema for the agent graph.""" - - messages: Annotated[list[Any], add_messages] - - -async def create_graph(): - """Create and compile the agent graph with checkpointer.""" - llm = ChatOpenAI( - model=MODEL_NAME, - reasoning={"effort": "high", "summary": "auto"}, - ) - llm_with_tools = llm.bind_tools(TOOLS) - - checkpointer = await create_checkpointer() - - def agent_node(state: AgentState) -> dict[str, Any]: - """Process the current state and generate a response.""" - messages = state["messages"] - if not messages or not isinstance(messages[0], SystemMessage): - system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) - messages = [SystemMessage(content=system_content)] + messages - response = llm_with_tools.invoke(messages) - return {"messages": [response]} - - builder = StateGraph(AgentState) - builder.add_node("agent", agent_node) - builder.add_node("tools", ToolNode(tools=TOOLS)) - builder.add_edge(START, "agent") - builder.add_conditional_edges("agent", tools_condition, "tools") - builder.add_edge("tools", "agent") - - return builder.compile(checkpointer=checkpointer) diff --git a/examples/tutorials/00_sync/harness_langgraph/project/tools.py b/examples/tutorials/00_sync/harness_langgraph/project/tools.py deleted file mode 100644 index f02587430..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/project/tools.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Tool definitions for the harness_langgraph sync agent.""" - -from langchain_core.tools import Tool - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" - - -weather_tool = Tool( - name="get_weather", - func=get_weather, - description="Get the current weather for a city. Input should be a city name.", -) - -TOOLS = [weather_tool] diff --git a/examples/tutorials/00_sync/harness_langgraph/pyproject.toml b/examples/tutorials/00_sync/harness_langgraph/pyproject.toml deleted file mode 100644 index deecd08b3..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/pyproject.toml +++ /dev/null @@ -1,37 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "s-harness-langgraph" -version = "0.1.0" -description = "A sync LangGraph agent using the unified harness surface" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "langgraph", - "langchain-openai", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py b/examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py deleted file mode 100644 index 2eb561cec..000000000 --- a/examples/tutorials/00_sync/harness_langgraph/tests/test_agent.py +++ /dev/null @@ -1,144 +0,0 @@ -""" -Tests for the sync harness LangGraph agent. - -Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.yield_turn) -end-to-end against a live AgentEx server. - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: s-harness-langgraph) -""" - -import os - -import pytest -from test_utils.sync import validate_text_in_string, collect_streaming_response - -from agentex import Agentex -from agentex.types import TextContent, TextContentParam -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest, ParamsSendMessageRequest -from agentex.lib.sdk.fastacp.base.base_acp_server import uuid - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "s-harness-langgraph") - - -@pytest.fixture -def client(): - return Agentex(base_url=AGENTEX_API_BASE_URL) - - -@pytest.fixture -def agent_name(): - return AGENT_NAME - - -@pytest.fixture -def agent_id(client, agent_name): - agents = client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingMessages: - def test_send_simple_message(self, client: Agentex, agent_name: str): - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Hello! What can you help me with?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - def test_tool_calling(self, client: Agentex, agent_name: str): - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What's the weather in San Francisco?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - def test_multiturn_conversation(self, client: Agentex, agent_name: str, agent_id: str): - task_response = client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - response1 = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="My name is Alice. Remember that.", - type="text", - ), - task_id=task.id, - ), - ) - assert response1.result is not None - - response2 = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What is my name?", - type="text", - ), - task_id=task.id, - ), - ) - assert response2.result is not None - for message in response2.result: - if isinstance(message.content, TextContent): - validate_text_in_string("alice", message.content.content.lower()) - - -class TestStreamingMessages: - def test_stream_simple_message(self, client: Agentex, agent_name: str): - stream = client.agents.send_message_stream( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Tell me a short joke.", - type="text", - ) - ), - ) - aggregated_content, chunks = collect_streaming_response(stream) - assert aggregated_content is not None - assert len(chunks) > 1, "No chunks received in streaming response." - - def test_stream_tool_calling(self, client: Agentex, agent_name: str): - stream = client.agents.send_message_stream( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What's the weather in New York?", - type="text", - ) - ), - ) - aggregated_content, chunks = collect_streaming_response(stream) - assert aggregated_content is not None - assert len(chunks) > 0, "No chunks received in streaming response." - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile b/examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile deleted file mode 100644 index 3a9412fa9..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 00_sync/harness_pydantic_ai/pyproject.toml /app/harness_pydantic_ai/pyproject.toml -COPY 00_sync/harness_pydantic_ai/README.md /app/harness_pydantic_ai/README.md - -WORKDIR /app/harness_pydantic_ai - -# Copy the project code -COPY 00_sync/harness_pydantic_ai/project /app/harness_pydantic_ai/project - -# Copy the test files -COPY 00_sync/harness_pydantic_ai/tests /app/harness_pydantic_ai/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=s-harness-pydantic-ai - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/README.md b/examples/tutorials/00_sync/harness_pydantic_ai/README.md deleted file mode 100644 index 1466bc4e7..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# Sync Pydantic AI Harness Test Agent - -A minimal **synchronous** Pydantic AI agent that drives the **unified harness -surface** (`UnifiedEmitter.yield_turn` + `PydanticAITurn`) on the sync -(HTTP-yield) channel. - -## Why this agent exists - -The `00_sync/040_pydantic_ai` tutorial streams via the bare -`convert_pydantic_ai_to_agentex_events` converter and does **not** exercise the -unified `yield_turn` path. This harness test agent is the sync coverage for the -unified surface: it proves an agent author can wire the sync channel through -`UnifiedEmitter` and get automatic span derivation (tool spans nested under the -per-turn span) for free, exactly like the async/temporal channels. - -## How it wires the unified surface - -In `project/acp.py`: - -```python -emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, -) -async with agent.run_stream_events(user_message) as stream: - turn = PydanticAITurn(stream, model=MODEL_NAME) # coalesce off: stream tool-call arg tokens - async for ev in emitter.yield_turn(turn): - yield ev -``` - -- `coalesce_tool_requests=False` (the default) preserves token-by-token - tool-call argument streaming on the sync channel. -- The `UnifiedEmitter` is constructed from the ACP/streaming context - (`task_id` + `trace_id` + `parent_span_id`) so tool spans nest under the - per-turn `AGENT_WORKFLOW` span automatically. - -## Files - -- `project/acp.py` — sync ACP handler using `emitter.yield_turn(...)`. -- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool. -- `project/tools.py` — `get_weather(city)` returning a constant. -- `tests/test_agent.py` — live integration test (requires a running agent). - -## Tools - -- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string so a - run deterministically exercises text + a tool call + a tool response. - -## Offline coverage - -Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake -streaming/tracing, no network) live in the SDK repo at -`tests/lib/core/harness/test_harness_pydantic_ai_sync.py`. diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml b/examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml deleted file mode 100644 index 55d8f5d2b..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/manifest.yaml +++ /dev/null @@ -1,58 +0,0 @@ -build: - context: - root: ../../ - include_paths: - - 00_sync/harness_pydantic_ai - - test_utils - dockerfile: 00_sync/harness_pydantic_ai/Dockerfile - dockerignore: 00_sync/harness_pydantic_ai/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: sync - name: s-harness-pydantic-ai - description: A sync Pydantic AI harness test agent using the unified emitter surface - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "s-harness-pydantic-ai" - description: "A sync Pydantic AI harness test agent using the unified emitter surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py deleted file mode 100644 index f23cd7960..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/project/acp.py +++ /dev/null @@ -1,92 +0,0 @@ -"""ACP handler for the sync harness Pydantic AI test agent. - -This agent exercises the UNIFIED HARNESS SURFACE on the sync (HTTP-yield) -channel — ``UnifiedEmitter.yield_turn(PydanticAITurn(...))`` — rather than the -bare ``convert_pydantic_ai_to_agentex_events`` converter used by the -``040_pydantic_ai`` tutorial. The unified surface gives the sync channel the -same tracing (span derivation) the async/temporal channels get for free. - -Flow: -1. Open a per-turn AGENT_WORKFLOW span via ``adk.tracing.span``. -2. Construct a ``UnifiedEmitter`` from the ACP/streaming context (task_id + - trace_id + parent_span_id) so tool spans nest under the turn span. -3. Wrap ``agent.run_stream_events(...)`` in a ``PydanticAITurn`` and forward - events with ``emitter.yield_turn(turn)`` — yielding each to the client. -""" - -from __future__ import annotations - -import os -from typing import AsyncGenerator - -from dotenv import load_dotenv - -load_dotenv() - -import agentex.lib.adk as adk -from project.agent import MODEL_NAME, create_agent -from agentex.lib.types.acp import SendMessageParams -from agentex.lib.core.harness import UnifiedEmitter -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.types.task_message_update import TaskMessageUpdate -from agentex.types.task_message_content import TaskMessageContent -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create(acp_type="sync") - -_agent = None - - -def get_agent(): - """Get or create the Pydantic AI agent instance.""" - global _agent - if _agent is None: - _agent = create_agent() - return _agent - - -@acp.on_message_send -async def handle_message_send( - params: SendMessageParams, -) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: - """Handle incoming messages, streaming events through the unified surface.""" - agent = get_agent() - task_id = params.task.id - - user_message = params.content.content - logger.info(f"Processing message for task {task_id}") - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name="message", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - # Construct the UnifiedEmitter from the ACP/streaming context so tracing - # is automatic: tool spans nest under this turn's span. - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - - async with agent.run_stream_events(user_message) as stream: - # PydanticAITurn preserves token-by-token tool-call argument - # streaming (Start+Delta+Done) on the sync/HTTP channel. - turn = PydanticAITurn(stream, model=MODEL_NAME) - async for ev in emitter.yield_turn(turn): - yield ev diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py deleted file mode 100644 index 72fd74173..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/project/agent.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Pydantic AI agent definition for the sync harness test agent. - -The Agent is the boundary between this module and the API layer (acp.py). -Pydantic AI handles its own tool-call loop internally — no graph required. -""" - -from __future__ import annotations - -from datetime import datetime - -from pydantic_ai import Agent - -from project.tools import get_weather - -__all__ = ["create_agent", "MODEL_NAME"] - -MODEL_NAME = "openai:gpt-4o-mini" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -def create_agent() -> Agent: - """Build and return the Pydantic AI agent with tools registered.""" - agent = Agent( - MODEL_NAME, - system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), - ) - - agent.tool_plain(get_weather) - - return agent diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py b/examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py deleted file mode 100644 index d649c75f1..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/project/tools.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Tool definitions for the sync harness Pydantic AI agent. - -Pydantic AI tools are registered directly on the Agent via decorators -(see project.agent). This module hosts the bare function so it is easy to -unit-test in isolation. -""" - -from __future__ import annotations - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml b/examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml deleted file mode 100644 index 08f709a4a..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "s-harness-pydantic-ai" -version = "0.1.0" -description = "A sync Pydantic AI harness test agent using the unified emitter surface" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "pydantic-ai-slim[openai]>=1.0,<2", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py b/examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py deleted file mode 100644 index 96da95fdc..000000000 --- a/examples/tutorials/00_sync/harness_pydantic_ai/tests/test_agent.py +++ /dev/null @@ -1,138 +0,0 @@ -"""Live tests for the sync harness Pydantic AI agent. - -These tests require a running agent (server + deployed agent) and exercise the -unified-surface sync handler end-to-end over the wire. They mirror the -``040_pydantic_ai`` tutorial tests but target this harness agent. - -Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives -in ``tests/lib/core/harness/test_harness_pydantic_ai_sync.py`` in the SDK repo. - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: s-harness-pydantic-ai) -""" - -import os - -import pytest -from test_utils.sync import validate_text_in_string, collect_streaming_response - -from agentex import Agentex -from agentex.types import TextContentParam -from agentex.types.agent_rpc_params import ParamsSendMessageRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "s-harness-pydantic-ai") - - -@pytest.fixture -def client(): - """Create an AgentEx client instance for testing.""" - return Agentex(base_url=AGENTEX_API_BASE_URL) - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest.fixture -def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingMessages: - """Test non-streaming message sending with the unified-surface sync agent.""" - - def test_send_simple_message(self, client: Agentex, agent_name: str): - """Test sending a simple message and receiving a response.""" - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Hello! What can you help me with?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - def test_tool_calling(self, client: Agentex, agent_name: str): - """Test that the agent can use tools (e.g., weather tool).""" - response = client.agents.send_message( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What's the weather in San Francisco?", - type="text", - ) - ), - ) - result = response.result - assert result is not None - assert len(result) >= 1 - - -class TestStreamingMessages: - """Test streaming message sending through the unified yield_turn path.""" - - def test_stream_simple_message(self, client: Agentex, agent_name: str): - """Test streaming a simple message response.""" - stream = client.agents.send_message_stream( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="Tell me a short joke.", - type="text", - ) - ), - ) - - aggregated_content, chunks = collect_streaming_response(stream) - - assert aggregated_content is not None - assert len(chunks) > 1, "No chunks received in streaming response." - - def test_stream_tool_calling(self, client: Agentex, agent_name: str): - """Test streaming with tool calls through the unified surface. - - Exercises token-by-token tool-call argument streaming (coalesce off), - which the unified yield_turn path preserves on the sync channel. - """ - stream = client.agents.send_message_stream( - agent_name=agent_name, - params=ParamsSendMessageRequest( - content=TextContentParam( - author="user", - content="What's the weather in New York? Respond with the temperature.", - type="text", - ) - ), - ) - - aggregated_content, chunks = collect_streaming_response(stream) - - assert aggregated_content is not None - assert len(chunks) > 0, "No chunks received in streaming response." - # The weather tool always returns "72°F", so the agent's reply should mention it. - validate_text_in_string("72", aggregated_content) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/00_base/100_langgraph/README.md b/examples/tutorials/10_async/00_base/100_langgraph/README.md index 6f6c6a36b..cd2fa6dd6 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/README.md +++ b/examples/tutorials/10_async/00_base/100_langgraph/README.md @@ -1,46 +1,52 @@ -# Tutorial 100: Async LangGraph Agent +# Tutorial: Async LangGraph Agent -This tutorial demonstrates how to build an **asynchronous** LangGraph agent on AgentEx with: -- Task-based event handling via Redis -- Tool calling (ReAct pattern) -- Multi-turn conversation memory via AgentEx checkpointer -- Tracing integration +This tutorial demonstrates how to build an **async** LangGraph agent on AgentEx +using the **unified harness surface**: -## Graph Structure +```python +turn = LangGraphTurn(stream, model=None) +emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...) +result = await emitter.auto_send_turn(turn) +``` + +The `LangGraphTurn` + `UnifiedEmitter.auto_send_turn` path replaces calling the +lower-level ``stream_langgraph_events`` helper directly. + +## Key Concepts + +### Unified Harness + +`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw +LangGraph `astream()` generator and exposes `events` (an async generator of +`TaskMessageUpdate`) and `usage()` (token counts captured from the final +`AIMessage`). -![Graph](graph.png) +`UnifiedEmitter.auto_send_turn(turn)` pushes each event to Redis via +`streaming_task_message_context`, accumulates the final text, and returns a +`TurnResult(final_text=..., usage=...)`. -## Sync vs Async: Key Differences +The same `LangGraphTurn` object can also be passed to +`UnifiedEmitter.yield_turn` in the sync channel. -| Aspect | Sync (Tutorial 030) | Async (This Tutorial) | -|--------|--------------------|-----------------------| -| **ACP Type** | `sync` | `async` | -| **Handler** | `@acp.on_message_send` | `@acp.on_task_event_send` | -| **Response** | HTTP streaming (yields) | Redis streaming | -| **Message Echo** | Implicit | Explicit (`adk.messages.create`) | -| **Streaming Helper** | `convert_langgraph_to_agentex_events()` | `stream_langgraph_events()` | -| **Extra Handlers** | None | `on_task_create`, `on_task_cancel` | +### AGX1-377 Note -### When to use Async? -- Long-running tasks that may exceed HTTP timeout -- Agents that need to push updates asynchronously -- Multi-step workflows where the client polls for results -- Production agents that need reliable message delivery via Redis +LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates" +node outputs). The `SpanDeriver` does not open tool spans from Full events +today; that gap is tracked in AGX1-373. ## Files | File | Description | |------|-------------| -| `project/acp.py` | ACP server with async event handlers | -| `project/graph.py` | LangGraph state graph definition | +| `project/acp.py` | ACP server using unified harness (LangGraphTurn + auto_send_turn) | +| `project/graph.py` | LangGraph state graph (weather example) | | `project/tools.py` | Tool definitions (weather example) | | `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | +| `manifest.yaml` | Agent configuration (name: ab100-langgraph) | ## Running Locally ```bash -# From this directory agentex agents run ``` diff --git a/examples/tutorials/10_async/00_base/100_langgraph/graph.png b/examples/tutorials/10_async/00_base/100_langgraph/graph.png deleted file mode 100644 index 16d22a1e7ec819b0f0520a1347c729ca6adcbe5a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16357 zcmZ|0byQT{8#a6>K?zZ$TT)U&Is|DDq`O4v?gj}_x;vz#TRH_9dQeH}?(XK>^ZdSl z-gm8cEt~=8%sG4Cd+$50>$<q6=PPPd_RFJHMn3{X~?yQHF`q&lv!Tyw2*5%7UPDbQM&zU?ssLi-SI7x33#%w4C ztE)Mib?Wn=v2{F|YtrQh`H{qwe?G)V`nWc@zSHXK_mVdB%Oi@8^J5{Q zPKt5Q;cWL|9WQK%ItW)B;ShA`{C<*2-l2zYKZrp^!ZnDeXCZ42KQ)a&1r41>72*pkBN=;$n>$OY+x zgMxa&z3Yn9{LWXt7ZjA>knw%1(_#^S|30TxK8cMDc4zQ-_19OtrM=naSJ3Z#c|tP@ zi6Grr_`PbS)dZW1gF}^;h6a79L8}fr6!-wUdfQn52kMEUBve&V;r{sXqjC7LPZ)cf77GUQ zoX@#_dzo(i)}X0V(I4{_lm}Rq>nkhC*XR3okFl{`{4Q6M3`#UBc&w*>$nw7bg)!Zq zz>EzYY!+m#S=H*?t!MdJ=H})O%cnyJf92T|6jHfmSy)-|MK4$4M$~&SA3X{}O!D*R z&rN1Ssp{V}N~+#>KkBNV_uZ~_-c(s$juJJv12!c=OcfaD!)Bp!ZfQzN%3rVj*7bU$ zpUCqR!k&D{C@2Y6E#sHR>(><3uuwVu<^ z=p!b9(ow2i<2_NXU(&uDDL80sYxt|7XflgF4J&g2<)WqDE{+)_xg_^8maI~V3=ex|11NuZnCTYRXH#O7)t@Nr@mpM&`6?fJY{ znRX5ArSHwf-(n42pVie>*+Op~Ef<9{4H|lS7%+4ZPufsvX{o{&a5U0yvADsm^*FmF zbUIp*eFAx&?RfmW;N;{~<1&Mvuro0|{h?!X`h3qUs=PJ%v%6AnZHP{0&=0CCzq>|F zFo>(K44I6#ch~#JB6kffB-rTw z#p}uHj~+duEz=k%fhQ#;P4F3NX)z-E|1>JpOtMkuCrnLUc5HlUGsS(Mlk?md7ET;I z5BBU_{0r4kw$<8ipXTr!ce8;z!>o2#mhv$-FT76FwXNE67T9xC7iwB=$x&W&)x>h$ z{Z~`43{DT@FYVdN+CS#pLhS5HKc??(zXXSbG($%7Z9ZeO6=1F3D=WwRF4q@KlFJvL zuCtkyj-pUc8Cu&SFan;n>ByItNvEXc+qZ>)+gzLJI+xsP<_4y{n$?vR8Q_#Udt+!= z4-XF&Wn&jyn~5PtIZ0oQA~iB7G%T!So4K#hut z<9zufPoxfbhM)^uQpwJ=UHhx=-@jXa#v*<{0EZ_+RJ61!0Vr0Ur&}MDIhwRrg+)|nES+j(_>gGl7FNZt+ z%gF^H_9rDi{_56TYcn2H-n=JpJymOU8b-)|)(V^#c@85dyz$*-H?8dZ`WeTt z7ROb&!#;*AEMm?NUv!n98Fzig?#zu6DR$nQNx8nhrg`;htkPnHK4Z)>q5W!$8K3EC zc~svAPALfq2)etTzrW&u?Kn6%$bo~pa=JZHLcngY<92_0uA9WF`+c^?EXuMsntJ@I zg&~PqM8sEFM`xlm*V)8l*>-wA5Rd68!XvC;RX}_M44{E{#Z#; z>zfdqy~@g$=U{s3%$nug2v0bXEB#oyCz3qF!oni;*DncaF|kh#e)qmZlzunaf*wbC zmI%j>PU-#I`>dv~21Kj|Cr8J;ygb_3v)yTzaE_Dx`T0L3n(Wu$uKBFGbsIc(bEP#} zES;)K!s-I<2S$RBgp^cMgj8;|%{0$!BCD>m*V)dWWCePU&Y*|3u;f3s^uj zM+*x^6NY}b`3Cj28OfQMY%?ws`l~!?Z)Id4lCvRfgBp@)* zDUz;NZ18nPVlfc&a)YQ}_I^fxfNl9)wTNj1C!*i1Y;(q!@q+2iA zbmSBTW4tC)4+pZG+S_M1w{G8M;4bkeq#09|?Vo~h766X3O2V_SurRdTKU&LXAOp&; z>waRXs-x2{p&SHUtr=zgKkGr$8V*rW(S})D*~E|FCxK-kHiz*k)NRev+|tsbr&Ff! z1XT?D5TJ^gh94Gl{?K6+^`Z`O>Z=Z4T6+I;V&c3d`J=X!tnAl?{r!rUunvA{W8-Z5 zGiRc-A=n4)@+bF=ry8vcIgF30S;DwX2i-luu5g8ENy9w^VOqM-V)`);P6X&0M^tAE6e*ozis8%6j)~b$WDk^aXVANA&T~rU2tY5O+xVtGQz9N_uk&#mb7oR6X}u$@)JL5 zvtk4kb2c_fDm%-@u8f*AdS zY3zY}DuIm?4^lfDeRFejS621vhK7bZfsArte+(!lfC|ebyxRdzgNKJ#n|_=N;c&qs z9pO>-@Ya&|D8Caqc>a9g)E_1y^{bjtOr=Mpabb>)dY1LmWvGV-~|URFHH^nwygy(X`suZOYvM)}#94!2Fge(#aUw0S3Q_AweUMEX;lEg**v&HiDK;~P5gY|qSYV&O5 zb_M01ZyyN>3E%I}sZH*l8bnlSXBROA;o8H|Lb|fK(uUeV8d_GSQNqjA#52&^(h?dj zWm2VBko(Icg>U`cvwS9tHAlD`vis1g<185|&x)>6_bLc7E?cx8KI|N-VGI$}3O#I; zYMixA7}*@vz&UuL&XiD7OzNYT)duah1jINXb_p8 zE0NVid60qK>-)Pm#l!ovX`v|Sf017P`t^%iL?jz_o1ag2v0YKtJ2WIFBSZ8Q@491R zV!y?+cyiZuXF4e~5dEB*iRpoZ!{Nc%nZ+;7Nq0mdD#V8*0c48_N3Lyw?_L)i37hb1e>pGLa+;AlW8J1?){ip0=RwB>YM=+`f2 zfeN3&jJC6CXHd`-w=qh??;`veJ*U zc3iI~!^6WL$m8UBTy2%8H>@VMHJpw&ADoQDl|=1N9y0jec@TfbCJp|IXZS`|_6Y%- zH)VWgK}rho#=jc&oSYmaXhNSCdibz4CcOnEIC%Z7 zi{$n=HRM0rT))^G!{WR(ddk#Z{8_RuE}Gyu2gji&{Bkzo;jtV1vN}FKJ_P!GEbl1m z=*VQc=)8fe>r=XQJu?%>=OaJ};ZkO4Ew?o^*vcr*eY&}sIqt`LWEt^7b*>X+MxP5Z zn+QSK)H9-_lx+r%X9+uS!3N6l&8TATs2v9IOBL-zhWz||Z6R_}i?$O_6sW_rrgLC` z6ndwkQe_NmEa>Z0_%-t{|dzxN3P3tj1c_^~2- zT{&Lpp@k`kL0bhHC6p`aqVGI5RN)@yD=RC<*ZmkpfmCXP8bYo+_7Tf5G33#}6u?3< z3N1KLrJLO&=$n4?83raMI+XW)uh?PU(9Z@1LPZau=HfyrOy7T~=DgcqjYUamD0I0( z8=shHv<#cKzjCI~seNR7GBNcVv9T3?_4Ld?m;FlvT{*zpQ4h==9UXOqZRzqfVrh*R zIpy9cm8l;LkTiWzfJUX|ZX>3&|Q4 z41Vn2e31pqJW?wSa+iDm{yoe3%@f#UZu;>mB=S~K@lI5!M^Y!UdC!x`!=vD!kKq`C z;uzY$lG+k|J)hGJKcCME?G9TF%1@Ro`lw2l;$n6lWlb{b!v`+QK@aeEXQEV? zS!aKds%(k9M=(~3$=6c*ps(mS_S-oDU0{@)*@5z=+%Vt3P(E8h?I5e*VD-0Ndp>eo z#zz{xY;Es5(>FxRO=p7iZ0O@Qk;HH*&~Q8W@54FHHbJ2-FHBBo;r{lcWkTnDbPzm9@u3+q60zP3KiF^Lva*!Sh37rR9rsIk) zaesQ9gH)@9d9-4_#pLNaU9G>Xe#`Ijuq4AfjD6T0oswdNku&<1e_J$Rd$?qtz7&2V>e zl(h7=E5P|d7Hl8imOQ(wm!T`7=R7ThzJfpQZuMjtlhT{+rdz5aIeNf|GiC1ysxjI6 z3}wd>@Bw|0US?z%uA>DBOS8|Nsq0b*JJ}Tfdu!dF6}8_f+_*~*6s3aPI7!ZI*itW6 zPj-+VxBV?YSAQH3twT#sUsvIs$dA*(4jCwc!!R=9@I;)=Fpze5xPXGVhsbpPxB{nh zjJ(~e4UTeyRsp$GbhA^!)gR?{W@-J}P!&GJi0gAkH?d~%$B4Qa?+Kc{>NVB_trhh2 z^dkNI_km7#29AzL50rW&Ff6n*xntK$Hhbo=%u7;c_9GYQ>m`ERzLB`K{}o#ukxMw- zotCB%B7fY-!}y4h7;lFlEMFdcSyV+uh0!5)sxl}jh!mt*^P=usQ3IJ)GD7Rsx|xXG z9nJCD^QuX?%mtfNrHh1vN67xNl!h2eRTxPAp%>7x!r_zDa^AU9_v4hXDBCgU6a(r) znt_ZWau~NeZzQlX%Hb(U8AIcuqq18n+Nn+l6qN{hwt}$x|a}IX( z`%9bd7}OJ=8yfg>HA={FjQC8f`PN&oI&|Qd=r3NppeZRQ|SeENA zEXZZEc;3K(7G$A)v*6#`-TB9kB8$SKqF|Xi=!XJ*ToJH0sy`|;p-Deb3u+e~P=?7W z^+=Pz=@^EG=Y>+ajT8z)dPWVJaK4Io$`@Hp_Nd}S`7LFfQF;hah|3%&B_|W*F!ocy zCK;hFi0nZnY}a*IPF}vbYveEn<|Ny9EE@M%c!jcbcM&hoc9JfzJhmJK1^MwFV+x8t zh}Yp zPPsSu_oHS;@OVkcr*d%uPF$+ZcLHmxA zm37wvm?HJFd~Z;{!bs(}fq8=bhnuc%aHrGMUZQv4Wkv?$E@|sGyGE!Ksi7cYAuphO zqw$Y`+L&2ceQthdzxkLM8N)4nDwl~_K;~KH_dP`*DgLHyoAMNCe)!`Y^b$B(`>ZRl^Aoce~kJ|(8 zu_t52*plJ5F*g4qpCh=3IHG`)^&Vi$G6PQt3C%!J@(>ghOX=qy!%tBvvz*!_mE%_f zs1&AJz1+$RmT8CPCF2S(Jk-)64OnyA&sdEAJZK>6Kh2B`#C5!6(oGO1HfiNs7lwkI z;7KTaaDKmv!b0;C0N3qgM#lXRB7B=k(rRxwtq2pC2!8u^HaI>$!(Mab_w?Dv<%3!ZRZ~@68M)T~0f)S)LjgT*2a1fark;!DM zm*CNn<5sEJLWl9e(UHi$dkKckzm|t7yrP1lrs^4JxZ*-=)YRE{>n(yNrGoz3L?*&W zc}{S*7ooAgh`dDk`90bTmRraQqNY1^^ziaGnS@pwmN>T*GTR@o{iQzCQZWCq7%)+) zcPUK3;*Jh|8`(6j(T>UTd&E1-O>(@}s&BRmlBMD}GnK!dgDG6tf#^~G8rSE`>S5jC zQ6irEEnX(?mjG5eQssFXB_mfXDC@R2(>lIFJT}x*&o|+L2gzj)>1m83<>l>k=)3*7 zni|pvU^WjG)i>RbIXf`^eHhU;oPYQ!s&P~fV>41qe0q8drvLQFp}~uPKgZX|s}5Ue zxSjSa9h(zaH68sj`nYnvG=3PU=-W4QfK;ANo`9N#g{;PuHK5%Os;~n9djK zvhDk`$)(-sbc>S1xN`uJzChn953n-5_Kd@c9GEHG7xuLoeSMK?VRi@V9Q0LpE^Tk} zw6z)zgr5--Dj!)(EiEr&$I^BqfB6^=URKkaiA{J>CQ+#HUsvZaW_`^`PEAjrdCpts zXKHY5h;*~H$=91-#O1+cODAE4dl?KTB*+w#mF2K)Fs4ODMn-}-I5LWd(a~zl?+hw< z)hJ8kGSxY`xS|tWAgI!IwqGZ%z`)KQO#J2+<#(HpbgAw+W}WpVd6Y0uwxcCM2h+7M z)&rA>CU$W5pR+AvlI+&V3iPT^O{_hE_a{;6aV+<_K3nMc)LN-CBqP&+11hz#c1$(b z*89u#2P&>XUbboRyc{rMWRVNoG+UTQ+s zb1o%ijPdJ2HutwiM$913wVpg+wr#sq3<|DoEv3?^xO*s&K|v!O2|HRfTpmoh5r~^l z;lfwt>gu9lFKJ^E$VIJByEag!)_iDEitc}(s_?xPga=zt{MuzkK7__gFNrC=4@^&o zjG8 zK_*!9-p3F72L|4$slD?NMHHB1CwsG$+vB$ZGc%#Q&4l0 zAx<_p#vKUjM2U_Y4JM;o>A5Za;sCXywCma8mgtW+pJ&weJuPX9h*2M$!E@1DzI&7C z`bq}B;)L^~ieZ?92N#WP?SbItg)ezY1<=ymS>fC3$6|_bhm(Cj6i~1pjA>FX7i1+a z4U6&`ZTwC<98r>wiE|=T65)SXKkLeScjH9!UCHmEvGL-&k}-hq(jkhY>wU_QKe_iv zAzhKfBGnkn;l7`rKjJ?CQqppLEW%ih~NshS}~LnFA*wEX}XYI1!={jw5YF>~;&gSo9?pJ$#+H#Sc> zysFY^I7Q)m1LmyTnIGjk86m#p6Q=t#T;)S!xgjD|q{72|#?a@XIgi)%28c#DU_U~0 z%!y5OAPtxn?fOhF(eYBc7Y8gn;XX8>uL}1ywMi+_>uf3v$@F7!N##%?6KTG6kMv`` zf{_&IuQ7T9TMFSM5#Pp0*xN9alh|fC{O&X_mZNHu`H%4xQaCB@ujlD2J;Ph?F4yx9 zPEX&g!)JdIbphv7<$d`SfvjAm5HK|#?zQ`o5wIkG)o-rr>?ttz)NMtUJTI#L@h`lp zINP={yb4z~mXpZDA^rF^yh>50tS;Tj;FY^2rAg`5h?v1@fjt9s`tKv;FPGnsitChp z4TcFxs8a}b5n(Q+QyPVLYuuJ-l*se1|83l7Fd0-*em0U)-DUetH?V~=sULQS83jR< zaR@DdW$n_KCIjGXgkOKu6!G8M8^a-g9~1wB0v%^ZayZk+0h~?fb-k^yQT=oY-dfFY zrs(gUO=pA3IRJv$UmX0D@_4Hm`lhjV>)ek=Qke>)lb5hbNjV-dZZpc|bV_90J|b0G zSrxu@PLt9vPPV8E0_R~IiI^RX7y%hhwyMv%4K95iuD3*UBzE)ja>4rf*H^ut@5n9lV||7lCM~O9u+bXelnqH$oi|z$LnCH=Drh~ zHyjn3ah;w=Nk+#a3dLt`gOyZ>go*e(pnl~-`0jGB#kMh5{u7lMATjdg!TmdC>t6wJN#YKveMd&(@B^ zTi-@D7?HH(UuVY9SU7){ikjLeFCE<~jdJt{rS8*dJJIJtLQi?;Z~T5+*4j5R>4b&m zKo9_wq}qM)^=r}4E{XW;8AWbS{LBGf5=%=>Pmj~s$j|4x^HPmTJ%U}iWy7q_k|crv zxJKi8FJF$7W^~^+VSfHg%FvvfK&Q3g%;BFnS4?ZbB3Yk))Y2_TJ8}Rs+rNL zsZIDD%+IQd%#K@;rz;8F+81YCYY?QoW6qAyY{O1+qHZTNF|rHLRU@9k5qnl-bV3lmxbR-Mm#vtCQ56K2M9EyNdFB~hRH*S`yUot4!Xh~ZMn z7H?&tW0OPz_#0s`jWV4`X$p_2<-Qs@OUo?+Nb^VPL+U(>BChE{&$ZVdN;IZ+c}2&{ z;lZdMw55D~*}Q#jvnM7Rv25dFvut|m@Ef;bicutcn_A_+r0<1(R9<6$Qj$17IMl+T zqw9W2(psEll9-tIx`Ow0zRKcGOh0sRI+3+7z)tx7X8_%o$jCAg105aY1UdEAZ?IKH zX69hV(3eEvnirvL!__Gv>8{$_*9cf(oP?THD}!~|kL`=_6`+}ltW9~HUBa6{gvide zA)#rLa=A)YcNI(^KW&KD`_)(?r5^DaJ~-I1(xiN$qQ?Zl9&G@Uuh{N?L`Aw@wulA0}DMmYaeebi5|hQ(2* zApdfkzwPySn1m~y8Wu*c)I3(Zb0KoPyZcszmC0+Wf3`BAX~k?^y1-o^F|pL72kw4< zs{}L&(OFqxzkk=geg98L9Ha3mVFt_jOiN_@n%OkgO~61GuM)pD!D}N?FM&v_VBp~Z z5G$7+6itwAz%^9sY3!uH&|-^Jp$55AJE5y=xkp&tj@iNt3=)xX%5p5^r^HJtIiob_}Y0 zusH1la+%OV^>JjouX9V^@U2jytt+MMi&=hG&udu(EFms4VE^yGxtU3PbunnYaJT6C zJZc}ApG4!N*_T)~GX41Qzgy`<4k%_qFI_#=>hfPV?llu@@BKwK>yjLU{_^1KoLjR@ zI`6O41^&-oiL0Tp43N;F@5(3See>DR?22n2_N_=Bi^V0bC4%SQUR2fRFgiRH$FQ&` zoQ*9N`L6iU7P1kEi7c&+Z@TI}JsshdXiO?*oQT{N&0urXxeTh}n(|xzN(7Ab!&`Ia zNN9SpKIgO99^>u#-^~!c>-q$}+F*NDIyPF?0oK`*-J&bN@QbcmJ=keY!=zLsBA~71^{^ssqJZoL`R{amnl^qR)u_52u4kr@x0Z5j zAq0Q$@r6FT!aZ2VOFQxLwC0J-yM^C-PvZTYzoHf1?|B~ScRdBN~p^dtzCdc^!ytRnA+o9{iCLmcH2S2a~sRcnOE@u>3_ z^%GV+>Y{ow{tCJzrt$yQe5UR9a_;kh&}$V;*9Ze=5A}m+birW}DsY+Wxzz1mZ&`K=4sR zN=TDz14pDqMMZR(kza9D#7>-{py;X9wY9yzp`lw^Zf@>aj=JZ3d|FoAB&uPdq2*JN zlHda6fw*W>U=(A$F-u86-PutHI2R6rM?04NC)|unPQC!tkrb4oSa59PNiWwsh8U75 z;;Annb|<*zKkK^s^Zog(U9bR+8t;y1^HR7{6rwm`6lUS%F3qzwXFLDmmNt`R26*hm zHE`pCj3iyrij$juKU@(N<=sl%Li$f1Weni`XQ+!=5Ds(bk%gRGzb7z_&Sww2YTGBH zL}g>LRjxH9A$bHlBpEAM7$aJbl=#DON_1BpnFPudXO^9r&_6I3W;;k!c)x$*a2_zrY(6Ja?QN-!-e_~j0u4G`wx7RAgQmOH>vQ(|ScpCcnHVz!w0Hz4{W5Q(G} zgMQQz1)YqCxG{vacg~8fPtfue(M{g`z%jXgnMjjJaR|-N$|OKq||Uw>-Z5u@m*7;Kr!4lFA1lkq?;9N2xJY z#__c}CPPCEY|f1W@pr@kOWOE3S>Zc6vB_f&*l7B3a9>}ap_c&tUvBTC{+QA7KokMs zU`j|!BJr+=%G-Sfot6M1y$k*>2?nV4#5b_<=F*QDkY?aS*OQyhY;sCUqDz|%y<@!B zm$_{^3Etkng|lHAyUr1rc%er-TIcg|Y)En|7yrneNSfRJGqjbKw|9%!vu*|=lP)9~ zNF;ZBqjov%ToJ8bQS&Mqy))odQ%SuJv!?&p{g6l(>Wjs@7dDCpwV|X4$tT9UKly^u z9A_9NJ8bYv7N{gHF0S57fbFmQ-SHCjz{b}Y&xG%Fi!~AZ&6^=JXd>j?ss)_gG)Q_# z2#pBIQ#?F%0Hdom9*8qThEDMI{98%gL`inaIj)8B=yEX|()N-waX>0o-i4*_Fqqaw z9jIX?g@LILO}6=5kBW(jMeuYiuO=Br0zFb&k8S<@3ye;Y2&oz_dipg#yOu*(5v+J* zBp@I_FT9GN)oG}pg3@5p#r}6-%%MS&=;QoP=|V;h^cA8tH-Cy^&OHSZ<;4(Zghf>J zo=Z|$N-v`6gLWbrP6&L{*%D3c-Kb^l+uT2s#8$rgwl2ouyBIh)6d?GLAgCewtnuQv zyrTc$10z*x+UJ*j)gwLR^@z=~pBb6oCN%QrErx}wgtGJY>6N3Q;XIdTYny`dv#K9I zxWAQ_vH-rF_@j2B5IIdFUzzenLr+9ms=^YgKLzgtk8LdL?1JTnXJErnCn?0zYN|hf zMTC_P8C7X{19a;aC{pSNR{sJ7-m%RCk2Ay*t#G13F>EyKp-Rc2q`cr zV!z6h!DxJ*!`S2^kf9CAc>eVOphV##!^!A=e~2&nCtsy#2k6IWWi6}1f_Mq%#I{}t z+hQG^JCoJQws7=Tl>R<_?!O8qBvQC4i(&CHoI?^A1jt~%R#e!vDHbrVr*oBzZRzXj zky-Pk-GYBVcU?<_$hT`v&yI}Sd-Piv$CnR6$sRx(Nd~o zOQW041du%t4FNgR-BwA3cXf5O8hh(WtZFQ+*_cZJRWG)>8_=Mg6=eBxwj6X~(g6i7 zC**zDO$Y;4JpExY5=v5>xhDQjjkkW&yu~PRbF!*yvcBSsE=tfPR9a1qo5%VB9|*rg zXvY-Di;9|~)6xRdY@5iT15gHbzUS!Lb^SR#O}Mxiv+;UI}E=s+UkczdFv6V!8R48u{PcMwlnE$kJj zWs3%GQF#T&?bSsG-=I4v#$3cyifh>k1csz)D(_!Mu%Dn%Z%S_+*9^{whcTj1b{-w zU^)5c!TG`r0RL%3+R&R^-wjI0U^L9RpP1}IpV`|kh=2afuARdh z)xgY9K1#hkP@eyqC7n2W-R%J(;%Fn#y;|thTG9alcpH$$M|8h`0@yIhErxAOkhe_hvH)nT`(@yZ&as2^obdfk-z4X!M^_Qto46V<#y8DK-&w+Z$b5 zUH$s==TFBd3SqLlySx6?am`>37*JiQ=Bub1HrvgMr~`R%9gqWBPn2hjHaeNfmW*}U z)?0x^v|sx5kPRc?Z-t?#mzGvC3q}A-==;`YKOj5|URn9z;_KUn_<|Wi=Ci654eH#i z_u&aBTE_L$>$~&qqV-#P-N%Y_g(9mL*XUG?g)SKrgUN?a30SWv04Pe>B2}i*H9d_! zHz$HX70Psi7eHax<99FkxT1ygIN-cduLb|492Y{YT7=o=-+Tkp8g0fGFP`kU3ZnSi zwcn0!CukbH*cvH_LV?zXv-Vp}he!f}>R){V=xKjlbz-u|k87*}a&8O+77|FS&N_t< z61u-R?#|B6J_2^j11eBq;C&PdnTDsQbv;0DK-hZ|6a6GrLmNFdLXnFFl=-1F-fiex zap@-%^eK9!OuZwZ0M^Jtp<>f-QhVjR@$TFtN|=72ngw8>PatsK?SLv(T|9wlVEiTQ zSy*=`rTfAs64{TSQJ|JLER_$ItdAQ+B&qy;!?}s$>({TM&j<+M{H{A%nT#a?uRdS- zeJhj)rQ6DCM(55yyG5X^Mlm_nwJLDizr(s++cL9s;~H6N_%$pw(h$Z~+Pm3cgw?XOPG7+GS!w!b7LEL4Jg^ey++S z#1zQI?*Mt==MC_E-AcgR$d^~}Fl&^)v<3oUUdI)g0Yn4Bca@@O&_44B10xz~0ihm8 z$a)`O5gDQTg8^NZ8@3w=n+_)D0EMx3zRD)jg?a6iW0?k~q%tZ*0@?GC2}mmcdSx{( z0|9xP(txn%Nk^uzSA#}Lgo1*?O^j1!H<2m|<8peE6MTTkq<~pdts1!!$Yn6sXP7Vo zx;CAs#w=@v0V(jH1FeIGnf+lv2lNB|W%uy#KAWg$`#paAN4gN_FBO`JUds`LmNz%c zQL?XmaWFA^TF)c9Pd3>B67Ac;S|48IW(~MU(A%OBpODZwJzX$vQGh*LyY^k9`pl_r`66j>m&|ZN4jAn#Nb_(#>7Talq z5ETQ1w2CDQc^PyTUznH{14~>dA0`^p`)T9J$ zp&p2--G(!T2Y~jy6Z9rM?yYY5!tvcK9PLHu(h$)x0cti7OY9Pv>>_O-ZP3Yb+QSEs zf$+ZAa&mGfKc(8G&^Q_u_G@%_WAo8EN_bUa{|XZq?936q%(N_s^bK5z6x~Bq z;`TJ=<7Ep$4HZTVWH6c4Ug!OKI}B7w3k>)_TP|2dujj7X zG7(1`6asdarxS+Qk410nf6uzj>-&O!2>kr--|uwhIk%{Q;=YiZBtkBhHi0qpsfhy6 zwMxjwN+ih90fF@se3SxIHQLwO&m^lXXC3ON_4n5+av00e-qCz=gbzX0H&}~l18_dd zQYHResoAf#vz~e5%rMmgMW9-(fWKf~l~g43r*D5L3gUte zmDt6#UAba`>bn#*H70!NZYtucqZvu*634r9I(6A%Am)Zqo_2PMKY>m>h1>sLy^B%c zgMcz{Jk?sCjSvXV*9<|+*I+V7?n|J8Pl=7qxjS#aCx4f9RPrU~OF^b*@y_=&+iEt0 zmPs#!n^i2R>B^5+M!y6)KTgOCB|(z`v_2GCzHo$N@~?;BOQy7&4QJ9t;pk2EXt2%C zN1qa7N!@H4+ARkYQoI4IWSBv_pPvBe$cs*P*Uy;HmeXfr6skaaWAkjtQH_{7@2<0} zYZkHxWye<80EjfB5aURgm3-ms2VF2^!aoDX^~Qy zh7>|%#`ef=cnj{o#1@yX3r%5b@Kbs)6wOhgMFF^L2d&c zu3^nTN#!#H-MVhiqnbBdf`iQt7DM(SXs^}HI%8t0|D$BImTd;%E2L3GUDdo>H_cL_ zQN}r#DLj1+Tq+kj=qEDtJ`Cn%%9Ph(cQyoV8asfKjhFy~SXS54GFN^Uj7`cj$YVYE za=Xc8`=p}%uIU~`byh_1Ccwvki0~L7Q9ZmGodB_R9pJq#`>p4h`xl4H@}NZYnOjH& z9bNU_S0|fyAo;=}<9HtgN`Hwv(4*vnXk-yN=s;Nop&x*=#irM1yQx6Nv54qjeBcjG zlsV{5w2lJZQOF%2queSRvy?~ZRt4(kz-E*MRQDMvXv;zj0Ca#NdMIX++oPnM)e={O zK~ILZ>)u!K0yke$(+W+^4G@A5Lf#2J9;1$T64E{6p0}cRhm5X!H+Vqb`YJL@C?z5o zb6(nbk$7jS7M~{#-)-))!^25txSQWCc_4n|^y^q0qNqgXfEDXTB4YH(<_P9GJY)NM zOw-UqlftR}Y|2U!2?>di?8#L28}xIK%Gur>_E1Iua?}zuZG4=pG_G@4h9w|%AgrlA zqHWuHs&O3nSVYTU{tCDp^XYoKEYIV$p{A~o$GZO+kn#)>CF$R$s+(8aFy0t-U8q=Rs2yiLg0JD;qJL4B|FPY9e;k)+WY7MM|oEA3bDh0-7N&udbNP zqS~Ar^$>Z~&CN}`(}X~ooeyZSQpo*dAMt$;|5#2=E*T_I>p0voABlnBKcKz6y^{!K zoh&#wSbS_WwfN#A=>6zwJk?<#pvORV79h7PN;CH3zWn`Ta?Y3Cg?z*h zB^7Ol7gtx~o*9gxe9fnN|6)s@sM=Iz=~nzSGCv%+X|(pfJiJ{Ub|zmjyha^hcFzBb z2Sx5Pv)5{0dd$M#KEWn>9u#fWQFENDs>jnfXlqd!VMP+Zv&P8o+L)O!araoMM}-oR zRIZRvHy>t&vr4{o#96(w>9$pVmiUr8s{0OPRpPy`mcF`84GX2KteIPuDM_yNPU__e zooTE{fUg_=_x03HhVP~9l^z12HTl+z_83fu_)0wvFU)_iQV(ev7#I-3JsJv@&1IiS z_F-dU${#wDMQjdi{%d{^{%~_~wc^N`u~PSgwO(x8;R5rnfES|r&b4`H0Ryc==g5=W zn|w0I@12Ss+}3Tl_vy3Op1Ysj*!*!2)N^bt(0zqJY9kMbhh&EDpLJf5@T}BFMR_@Q zZDJc{vLLN^QTXv*CVi_qt!S|8HS=tA zVY)ishDFGUG5;hvU!u)yNt#2Nr0J$40&#h?<#E<<`e28tGx^T`#tcxu=dY5_C|fFN z`+es4oTB81D_m=wb0P%N0st=8nD;N9k3BFBSNcZ(&4wXRgaw9?u(o8P@R6{mPk~F3 z?B%rFk-nsBiB~?oQu0ObddykL(+hQgumSMCZ0+p=iDWNHKJS&5~@X_Nn&#`6r; zgjy?j@w$y*K}Im`N@TW2$f^zD-tc%K*y@bgdXLSyb2 dict[str, Any]: """Process the current state and generate a response.""" messages = state["messages"] if not messages or not isinstance(messages[0], SystemMessage): - system_content = SYSTEM_PROMPT.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ) + system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) messages = [SystemMessage(content=system_content)] + messages response = llm_with_tools.invoke(messages) return {"messages": [response]} diff --git a/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py b/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py index 1b402a906..e421528fc 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py +++ b/examples/tutorials/10_async/00_base/100_langgraph/project/tools.py @@ -1,9 +1,4 @@ -""" -Tool definitions for the LangGraph agent. - -Add your custom tools here. Each tool should be a function decorated with @tool -or created using the Tool class. -""" +"""Tool definitions for the 100_langgraph async agent.""" from langchain_core.tools import Tool @@ -17,16 +12,13 @@ def get_weather(city: str) -> str: Returns: A string describing the weather conditions. """ - # TODO: Replace with actual weather API call return f"The weather in {city} is sunny and 72°F" -# Define tools weather_tool = Tool( name="get_weather", func=get_weather, description="Get the current weather for a city. Input should be a city name.", ) -# Export all tools as a list TOOLS = [weather_tool] diff --git a/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml b/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml index fecbc6149..715477bac 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml +++ b/examples/tutorials/10_async/00_base/100_langgraph/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "ab100-langgraph" version = "0.1.0" -description = "An async LangGraph agent with tool calling and Redis streaming" +description = "An async LangGraph agent using the unified harness surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py b/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py index 948db1558..b80d7a8f9 100644 --- a/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py +++ b/examples/tutorials/10_async/00_base/100_langgraph/tests/test_agent.py @@ -1,14 +1,8 @@ """ -Tests for the async LangGraph agent. +Tests for the async harness LangGraph agent. -This test suite validates: -- Non-streaming event sending and polling -- Streaming event sending - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v +Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn) +end-to-end against a live AgentEx server. Configuration: - AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) @@ -25,14 +19,12 @@ from agentex.types.agent_rpc_params import ParamsCreateTaskRequest from agentex.lib.sdk.fastacp.base.base_acp_server import uuid -# Configuration from environment variables AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") AGENT_NAME = os.environ.get("AGENT_NAME", "ab100-langgraph") @pytest_asyncio.fixture async def client(): - """Create an AsyncAgentex client instance for testing.""" client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) yield client await client.close() @@ -40,13 +32,11 @@ async def client(): @pytest.fixture def agent_name(): - """Return the agent name for testing.""" return AGENT_NAME @pytest_asyncio.fixture async def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" agents = await client.agents.list() for agent in agents: if agent.name == agent_name: @@ -55,14 +45,9 @@ async def agent_id(client, agent_name): class TestNonStreamingEvents: - """Test non-streaming event sending and polling.""" - @pytest.mark.asyncio async def test_send_event(self, client: AsyncAgentex, agent_id: str): - """Test sending an event to the async LangGraph agent.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -78,10 +63,7 @@ async def test_send_event(self, client: AsyncAgentex, agent_id: str): @pytest.mark.asyncio async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): - """Test that the agent can use tools (e.g., weather tool).""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -97,14 +79,9 @@ async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): class TestStreamingEvents: - """Test streaming event sending.""" - @pytest.mark.asyncio async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str): - """Test sending an event and streaming the response.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md b/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md index 6046b579a..db56979cc 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/README.md @@ -1,63 +1,52 @@ -# Tutorial 110 (async/base): Pydantic AI Agent +# Async Pydantic AI Agent -This tutorial demonstrates how to build an **async** Pydantic AI agent on AgentEx with: -- Tool calling (Pydantic AI handles the tool loop internally) -- Streaming token output via Redis (text + reasoning tokens stream as deltas) -- Task lifecycle hooks (create / event-send / cancel) +A minimal **async** (Redis-streaming) Pydantic AI agent that drives the +**unified harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) +directly. -This is the async counterpart to the sync tutorial at [`00_sync/040_pydantic_ai`](../../../00_sync/040_pydantic_ai/). +## Why this agent exists -## Key Concepts +This agent calls `emitter.auto_send_turn(...)` **explicitly** at the +agent-author level, making the unified-surface wiring visible and giving the +async channel direct coverage. -### Async ACP -Unlike sync ACP (HTTP request/response with chunked streaming back), async ACP uses **Redis** for streaming. The HTTP call returns immediately when an event is acknowledged; the agent then pushes updates to Redis on its own schedule. The UI subscribes to Redis to receive deltas. +## How it wires the unified surface -### Pydantic AI Integration -- **Agent**: A single `pydantic_ai.Agent` that owns the model and tools. No graph required. -- **`@agent.tool_plain`**: Registers a Python function as a tool. Pydantic AI infers the schema from type hints and docstring. -- **`agent.run_stream_events(...)`**: Yields `AgentStreamEvent`s (`PartStartEvent` / `PartDeltaEvent` / `PartEndEvent` / `FunctionToolResultEvent`) as the model produces them. +In `project/acp.py`: -### Streaming -The helper `stream_pydantic_ai_events(stream, task_id)` consumes the Pydantic AI event stream and writes Agentex updates to Redis via `adk.streaming.streaming_task_message_context(...)`: -- **Text and thinking tokens** stream as Redis deltas inside coalesced contexts. -- **Tool requests and tool responses** are emitted as **discrete full messages** (no token-level arg streaming). To stream tool-call argument tokens, use the sync converter — see [`00_sync/040_pydantic_ai`](../../../00_sync/040_pydantic_ai/). - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | Async ACP server with task lifecycle handlers | -| `project/agent.py` | Pydantic AI agent + tool registration | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration | - -## Running Locally - -```bash -# From this directory -agentex agents run +```python +emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, +) +async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: + turn = PydanticAITurn(tee_messages(stream), model=MODEL_NAME, coalesce_tool_requests=True) + result = await emitter.auto_send_turn(turn) ``` -## Running Tests +- `coalesce_tool_requests=True` is required on the async/auto_send path until + AGX1-377 lands: tool requests are delivered as a single `Full(tool_request)` + rather than streamed `Start + Delta + Done`. +- The `UnifiedEmitter` is constructed from the ACP context (`task_id` + + `trace_id` + `parent_span_id`) so messages auto-send to the task stream + (Redis) and tracing is automatic. +- Multi-turn memory is persisted via `adk.state` (pydantic-ai message history + round-tripped through `ModelMessagesTypeAdapter`). -```bash -pytest tests/test_agent.py -v -``` +## Files -## Sync vs Async — How the Code Differs +- `project/acp.py` — async ACP handler using `emitter.auto_send_turn(...)`. +- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool. +- `project/tools.py` — `get_weather(city)` returning a constant. +- `tests/test_agent.py` — live integration test (requires a running agent). -This tutorial uses the same `project/agent.py` and `project/tools.py` as the sync version. The only meaningful differences live in `project/acp.py`: +## Tools -| Concern | Sync (`s040-pydantic-ai`) | Async (`ab110-pydantic-ai`) | -|---|---|---| -| ACP type | `FastACP.create(acp_type="sync")` | `FastACP.create(acp_type="async", config=AsyncACPConfig(type="base"))` | -| Handler hook | `@acp.on_message_send` returns/yields events | `@acp.on_task_event_send` returns nothing | -| Stream output | `yield event` (chunked HTTP) | `await context.stream_update(...)` (Redis) | -| Tool calls | Args stream as `ToolRequestDelta` tokens | Args arrive in one full message | -| Lifecycle | Ephemeral (no task hooks) | `on_task_create` + `on_task_cancel` form a durable task contract | +- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string. -## Notes +## Offline coverage -- Multi-turn conversation memory is not wired here. Pydantic AI does not ship a checkpointer; to add memory, load prior messages via `adk.messages.list(task_id=...)` and pass them to `agent.run_stream_events(..., message_history=...)`. -- Reasoning/thinking tokens are not exercised by `gpt-4o-mini`. Swap to a reasoning-capable model if you want to test that branch end-to-end. +Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake +streaming/tracing, no network) live in the SDK repo under +`tests/lib/core/harness/` (the pydantic-ai async suite). diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml index 583b07251..4aca13d44 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/manifest.yaml @@ -17,7 +17,7 @@ local_development: agent: acp_type: async name: ab110-pydantic-ai - description: An async Pydantic AI agent with tool calling and Redis streaming + description: An async Pydantic AI harness test agent using the unified emitter surface temporal: enabled: false @@ -38,7 +38,7 @@ agent: - env_var_name: SGP_CLIENT_BASE_URL secret_name: sgp-client-base-url secret_key: url - + deployment: image: repository: "" @@ -47,7 +47,7 @@ deployment: global: agent: name: "ab110-pydantic-ai" - description: "An async Pydantic AI agent with tool calling and Redis streaming" + description: "An async Pydantic AI harness test agent using the unified emitter surface" replicaCount: 1 resources: requests: diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py index dc8a2de21..95b638f8b 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/acp.py @@ -1,13 +1,14 @@ -"""ACP handler for async Pydantic AI agent. +"""ACP handler for the async harness Pydantic AI test agent. -Uses the async ACP model with Redis streaming instead of HTTP yields. -Text and reasoning tokens stream as Redis deltas; tool requests and -responses are persisted as discrete full messages. +This agent exercises the UNIFIED HARNESS SURFACE on the async (Redis-streaming) +channel — ``UnifiedEmitter.auto_send_turn(PydanticAITurn(...))`` +— calling it directly rather than via the ``stream_pydantic_ai_events`` helper +(which the ``110_pydantic_ai`` tutorial uses). This makes the unified-surface +wiring explicit at the agent-author level. Multi-turn memory is persisted via ``adk.state``: on each turn we load the previous pydantic-ai ``message_history`` from state, run the agent with it, -then save the updated history back. Without this, every turn would be a -fresh stateless run and the agent would forget the prior conversation. +then save the updated history back. """ from __future__ import annotations @@ -23,17 +24,15 @@ from pydantic_ai.messages import ModelMessagesTypeAdapter import agentex.lib.adk as adk -from project.agent import create_agent -from agentex.lib.adk import ( - stream_pydantic_ai_events, - create_pydantic_ai_tracing_handler, -) +from project.agent import MODEL_NAME, create_agent from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.fastacp import AsyncACPConfig from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.utils.model_utils import BaseModel from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) @@ -66,9 +65,7 @@ class ConversationState(BaseModel): ``history_json`` holds the pydantic-ai message history serialized by ``ModelMessagesTypeAdapter`` — pydantic-ai's official way to round-trip - ``ModelMessage`` objects through JSON. We can't use a plain - ``list[ModelMessage]`` field because ``ModelMessage`` is a discriminated - union of runtime types, not a stable Pydantic schema. + ``ModelMessage`` objects through JSON. """ history_json: str = "[]" @@ -77,11 +74,7 @@ class ConversationState(BaseModel): @acp.on_task_create async def handle_task_create(params: CreateTaskParams): - """Initialize per-task state on task creation. - - A fresh task starts with no message history; the conversation is built - up by ``handle_task_event_send`` on each subsequent user message. - """ + """Initialize per-task state on task creation.""" logger.info(f"Task created: {params.task.id}") await adk.state.create( task_id=params.task.id, @@ -92,7 +85,7 @@ async def handle_task_create(params: CreateTaskParams): @acp.on_task_event_send async def handle_task_event_send(params: SendEventParams): - """Handle each user message: load prior history, run the agent, save updated history.""" + """Handle each user message through the unified auto_send_turn path.""" agent = get_agent() task_id = params.task.id agent_id = params.agent.id @@ -103,9 +96,7 @@ async def handle_task_event_send(params: SendEventParams): # Echo the user's message into the task history. await adk.messages.create(task_id=task_id, content=params.event.content) - # Load the previous conversation history from state. If state is missing - # (e.g. task wasn't initialised via on_task_create), fall back to a fresh - # one so the agent still responds — just without memory of prior turns. + # Load the previous conversation history from state (fall back to fresh). task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) if task_state is None: state = ConversationState() @@ -123,15 +114,15 @@ async def handle_task_event_send(params: SendEventParams): input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - tracing_handler = create_pydantic_ai_tracing_handler( + # Construct the UnifiedEmitter from the ACP context so tracing is + # automatic and messages are auto-sent to the task stream (Redis). + emitter = UnifiedEmitter( + task_id=task_id, trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, - task_id=task_id, ) - # Wrap the pydantic-ai event stream so we can capture the final - # AgentRunResultEvent (which carries the full message list for the - # next turn) without changing the streaming-helper's signature. + # Capture the terminal AgentRunResultEvent to persist message history. captured_messages: list[Any] = [] async def tee_messages(upstream) -> AsyncIterator[Any]: @@ -141,9 +132,13 @@ async def tee_messages(upstream) -> AsyncIterator[Any]: yield event async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: - final_output = await stream_pydantic_ai_events( - tee_messages(stream), task_id, tracing_handler=tracing_handler + # The unified auto_send path delivers streamed tool requests natively + # (Start+Delta+Done), so no coalescing workaround is needed. + turn = PydanticAITurn( + tee_messages(stream), + model=MODEL_NAME, ) + result = await emitter.auto_send_turn(turn) # Save the updated message history so the next turn picks up here. if captured_messages: @@ -156,7 +151,7 @@ async def tee_messages(upstream) -> AsyncIterator[Any]: ) if turn_span: - turn_span.output = {"final_output": final_output} + turn_span.output = {"final_output": result.final_text} @acp.on_task_cancel diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py index 2c0f6f10c..e7b764d82 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/agent.py @@ -1,4 +1,4 @@ -"""Pydantic AI agent definition. +"""Pydantic AI agent definition for the async harness test agent. The Agent is the boundary between this module and the API layer (acp.py). Pydantic AI handles its own tool-call loop internally — no graph required. @@ -12,6 +12,8 @@ from project.tools import get_weather +__all__ = ["create_agent", "MODEL_NAME"] + MODEL_NAME = "openai:gpt-4o-mini" SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. @@ -29,9 +31,7 @@ def create_agent() -> Agent: """Build and return the Pydantic AI agent with tools registered.""" agent = Agent( MODEL_NAME, - system_prompt=SYSTEM_PROMPT.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ), + system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), ) agent.tool_plain(get_weather) diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py index 98f65d509..0f16a7cb0 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/project/tools.py @@ -1,8 +1,8 @@ -"""Tool definitions for the async Pydantic AI agent. +"""Tool definitions for the async harness Pydantic AI agent. Pydantic AI tools are registered directly on the Agent via decorators -(see project.agent). This module hosts the bare functions so they're -easy to unit-test in isolation. +(see project.agent). This module hosts the bare function so it is easy to +unit-test in isolation. """ from __future__ import annotations diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml index f5cd32e0a..257918014 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "ab110-pydantic-ai" version = "0.1.0" -description = "An async Pydantic AI agent with tool calling and Redis streaming" +description = "An async Pydantic AI harness test agent using the unified emitter surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py index a31322d30..ce573a697 100644 --- a/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py +++ b/examples/tutorials/10_async/00_base/110_pydantic_ai/tests/test_agent.py @@ -1,8 +1,10 @@ -"""Tests for the async Pydantic AI agent. +"""Live tests for the async Pydantic AI agent. -This test suite validates: -- Non-streaming event sending and polling -- Streaming event sending +These tests require a running agent (server + deployed agent) and exercise the +unified-surface async handler end-to-end over the wire. + +Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives +in the SDK repo under ``tests/lib/core/harness/`` (the pydantic-ai async suite). To run these tests: 1. Make sure the agent is running (via docker-compose or `agentex agents run`) @@ -53,14 +55,12 @@ async def agent_id(client, agent_name): class TestNonStreamingEvents: - """Test non-streaming event sending and polling.""" + """Test non-streaming event sending through the unified auto_send_turn path.""" @pytest.mark.asyncio async def test_send_event(self, client: AsyncAgentex, agent_id: str): - """Test sending an event to the async Pydantic AI agent.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + """Test sending an event to the async harness Pydantic AI agent.""" + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -77,9 +77,7 @@ async def test_send_event(self, client: AsyncAgentex, agent_id: str): @pytest.mark.asyncio async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): """Test that the agent can use tools (e.g., weather tool).""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -100,9 +98,7 @@ class TestStreamingEvents: @pytest.mark.asyncio async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str): """Test sending an event and streaming the response.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/.dockerignore b/examples/tutorials/10_async/00_base/120_openai_agents/.dockerignore similarity index 100% rename from examples/tutorials/00_sync/harness_pydantic_ai/.dockerignore rename to examples/tutorials/10_async/00_base/120_openai_agents/.dockerignore diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile b/examples/tutorials/10_async/00_base/120_openai_agents/Dockerfile similarity index 70% rename from examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile rename to examples/tutorials/10_async/00_base/120_openai_agents/Dockerfile index 3e0bd696a..76fe0fdef 100644 --- a/examples/tutorials/10_async/00_base/harness_langgraph/Dockerfile +++ b/examples/tutorials/10_async/00_base/120_openai_agents/Dockerfile @@ -23,16 +23,16 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 # Copy pyproject.toml and README.md to install dependencies -COPY 10_async/00_base/harness_langgraph/pyproject.toml /app/harness_langgraph/pyproject.toml -COPY 10_async/00_base/harness_langgraph/README.md /app/harness_langgraph/README.md +COPY 10_async/00_base/120_openai_agents/pyproject.toml /app/120_openai_agents/pyproject.toml +COPY 10_async/00_base/120_openai_agents/README.md /app/120_openai_agents/README.md -WORKDIR /app/harness_langgraph +WORKDIR /app/120_openai_agents # Copy the project code -COPY 10_async/00_base/harness_langgraph/project /app/harness_langgraph/project +COPY 10_async/00_base/120_openai_agents/project /app/120_openai_agents/project # Copy the test files -COPY 10_async/00_base/harness_langgraph/tests /app/harness_langgraph/tests +COPY 10_async/00_base/120_openai_agents/tests /app/120_openai_agents/tests # Copy shared test utilities COPY test_utils /app/test_utils @@ -44,7 +44,7 @@ RUN uv pip install --system .[dev] pytest-asyncio httpx ENV PYTHONPATH=/app # Set test environment variables -ENV AGENT_NAME=a-harness-langgraph +ENV AGENT_NAME=ab120-openai-agents # Run the agent using uvicorn CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/README.md b/examples/tutorials/10_async/00_base/120_openai_agents/README.md similarity index 92% rename from examples/tutorials/10_async/00_base/130_harness_openai/README.md rename to examples/tutorials/10_async/00_base/120_openai_agents/README.md index ac439e4ed..0b55b00a2 100644 --- a/examples/tutorials/10_async/00_base/130_harness_openai/README.md +++ b/examples/tutorials/10_async/00_base/120_openai_agents/README.md @@ -5,7 +5,7 @@ delivers its output through the **unified harness surface**. ## What this demonstrates -Same `OpenAITurn` adapter as the sync tutorial (`060_harness_openai`), but the +Same `OpenAITurn` adapter as the sync tutorial (`050_openai_agents`), but the async ACP pushes the turn to the task stream via `UnifiedEmitter.auto_send_turn` instead of yielding over HTTP. `auto_send_turn` returns a `TurnResult` with the accumulated final text and normalized usage. diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml b/examples/tutorials/10_async/00_base/120_openai_agents/manifest.yaml similarity index 82% rename from examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml rename to examples/tutorials/10_async/00_base/120_openai_agents/manifest.yaml index 7e67675fa..bd8d5cce5 100644 --- a/examples/tutorials/10_async/00_base/130_harness_openai/manifest.yaml +++ b/examples/tutorials/10_async/00_base/120_openai_agents/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../../ include_paths: - - 10_async/00_base/130_harness_openai + - 10_async/00_base/120_openai_agents - test_utils - dockerfile: 10_async/00_base/130_harness_openai/Dockerfile - dockerignore: 10_async/00_base/130_harness_openai/.dockerignore + dockerfile: 10_async/00_base/120_openai_agents/Dockerfile + dockerignore: 10_async/00_base/120_openai_agents/.dockerignore local_development: agent: @@ -16,7 +16,7 @@ local_development: agent: acp_type: async - name: ab130-harness-openai + name: ab120-openai-agents description: An async OpenAI Agents SDK agent on the unified harness surface temporal: @@ -46,7 +46,7 @@ deployment: global: agent: - name: "ab130-harness-openai" + name: "ab120-openai-agents" description: "An async OpenAI Agents SDK agent on the unified harness surface" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/harness_codex/project/__init__.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/harness_codex/project/__init__.py rename to examples/tutorials/10_async/00_base/120_openai_agents/project/__init__.py diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/acp.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/acp.py similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/project/acp.py rename to examples/tutorials/10_async/00_base/120_openai_agents/project/acp.py diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/agent.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/agent.py similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/project/agent.py rename to examples/tutorials/10_async/00_base/120_openai_agents/project/agent.py diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/tools.py b/examples/tutorials/10_async/00_base/120_openai_agents/project/tools.py similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/project/tools.py rename to examples/tutorials/10_async/00_base/120_openai_agents/project/tools.py diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml b/examples/tutorials/10_async/00_base/120_openai_agents/pyproject.toml similarity index 95% rename from examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml rename to examples/tutorials/10_async/00_base/120_openai_agents/pyproject.toml index c05e8c1c6..f48fab49f 100644 --- a/examples/tutorials/10_async/00_base/130_harness_openai/pyproject.toml +++ b/examples/tutorials/10_async/00_base/120_openai_agents/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "ab130-harness-openai" +name = "ab120-openai-agents" version = "0.1.0" description = "An async OpenAI Agents SDK agent on the unified harness surface" readme = "README.md" diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/tests/test_agent.py b/examples/tutorials/10_async/00_base/120_openai_agents/tests/test_agent.py similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/tests/test_agent.py rename to examples/tutorials/10_async/00_base/120_openai_agents/tests/test_agent.py diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile deleted file mode 100644 index 1272027cf..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml /app/120_openai_agents_local_sandbox/pyproject.toml -COPY 10_async/00_base/120_openai_agents_local_sandbox/README.md /app/120_openai_agents_local_sandbox/README.md - -WORKDIR /app/120_openai_agents_local_sandbox - -# Copy the project code -COPY 10_async/00_base/120_openai_agents_local_sandbox/project /app/120_openai_agents_local_sandbox/project - -# Copy the test files -COPY 10_async/00_base/120_openai_agents_local_sandbox/tests /app/120_openai_agents_local_sandbox/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] pytest-asyncio httpx - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=ab120-openai-agents-local-sandbox - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md deleted file mode 100644 index 58d422b39..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/README.md +++ /dev/null @@ -1,119 +0,0 @@ -# Tutorial 120: Async OpenAI Agents SDK with a Local Sandbox - -This tutorial demonstrates how to build an **async (non-Temporal)** agent on AgentEx -using the [OpenAI Agents SDK](https://developers.openai.com/api/docs/guides/agents) -and its **sandbox** runtime, running with the **local** (`unix_local`) backend. - -The agent is a "local sandbox assistant": it answers questions by actually running -real shell commands (e.g. `python3 --version`, `ls /tmp`, `python3 -c "..."`) -instead of guessing. - -This mirrors the Pydantic AI async tutorial (`110_pydantic_ai`): same async ACP -model (`acp_type: async`, `temporal.enabled: false`), same per-task `adk.state` -multi-turn memory pattern. The difference is the runtime — here we use the OpenAI -Agents SDK `SandboxAgent` with the local sandbox backend. - -## Key Concepts - -### Async ACP (base) -The async ACP model is event-driven: `on_task_create` initializes per-task state, -and `on_task_event_send` handles each user message. Conversation history is -persisted across turns via `adk.state`. - -### OpenAI Agents SDK Sandbox -The OpenAI Agents SDK ships `agents.sandbox`, which lets you give an agent -**capabilities** (instead of hand-written tools) that the runtime turns into real -tools backed by a sandbox: - -- **`SandboxAgent`**: an `Agent` that is granted sandbox capabilities. -- **Capabilities** (`from agents.sandbox.capabilities import Shell, Filesystem, Memory`): - each capability expands into a set of real tools. This tutorial uses `Shell`, which - lets the model run real shell commands. -- **`SandboxRunConfig`** + a sandbox **client**: tells the runtime *where* the tools - actually execute. - -### The LOCAL sandbox (`UnixLocalSandboxClient`) -This tutorial uses the local backend -(`from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient, UnixLocalSandboxClientOptions`), -`backend_id="unix_local"`. The local sandbox runs shell commands **ON THE HOST** — -the agent's own container/process. There is **no Docker, no Temporal, and no remote -sandbox infrastructure** involved. - -The sandbox is wired up through the SDK's `RunConfig`: - -```python -from agents import Runner, set_tracing_disabled -from agents.run_config import RunConfig -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.sandbox.capabilities import Shell -from agents.sandbox.sandboxes.unix_local import ( - UnixLocalSandboxClient, - UnixLocalSandboxClientOptions, -) - -set_tracing_disabled(True) # avoid api.openai.com tracing 401 behind a gateway - -agent = SandboxAgent( - name="Local Sandbox Assistant", - instructions="...use the shell tools to actually run commands...", - capabilities=[Shell()], -) -run_config = RunConfig( - sandbox=SandboxRunConfig( - client=UnixLocalSandboxClient(), - options=UnixLocalSandboxClientOptions(), - ) -) -result = await Runner.run(agent, input=input_list, run_config=run_config) -print(result.final_output) -``` - -`Runner.run` drives the full tool-call loop internally: the model issues shell -commands, the local sandbox runs them on the host, the output is fed back, and the -loop continues until the model produces a final answer. Because the loop is -self-contained, the async handler runs the agent and persists a single final -`TextContent` rather than streaming tokens. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | Async ACP server + handlers (`adk.state` multi-turn, runs the sandbox agent) | -| `project/agent.py` | `SandboxAgent` + `RunConfig(sandbox=...)` wiring + `run_agent` | -| `project/tools.py` | Sandbox capability factory (`Shell`) | -| `tests/test_agent.py` | Integration tests (polling pattern) | -| `manifest.yaml` | Agent configuration | - -## Running Locally - -```bash -# From this directory -agentex agents run -``` - -Set `OPENAI_API_KEY` (or `LITELLM_API_KEY` if you're behind the Scale LiteLLM -gateway) in your environment or in a `.env` file in `project/` so the agent can call -the model. - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` - -## Notes - -- **No infra required.** Because this uses the `unix_local` backend, the shell tools - run directly in the agent's process — no Docker daemon, no Temporal, no remote - sandbox. Swap the client for a remote/containerized backend to isolate execution. -- **Tracing.** `set_tracing_disabled(True)` turns off the OpenAI Agents SDK's native - tracer (which would otherwise try to ship traces to `api.openai.com`). The manifest - also sets `OPENAI_AGENTS_DISABLE_TRACING=1`. AgentEx/SGP tracing still runs via the - tracing manager configured in `acp.py` when SGP credentials are present. -- **Capabilities are the tools.** To let the agent do more, add capabilities in - `project/tools.py` (e.g. `Filesystem()`, `Memory()`). - -## Further Reading - -- OpenAI Agents SDK guide: https://developers.openai.com/api/docs/guides/agents -- The Temporal variant of this tutorial: `10_async/10_temporal/120_openai_agents_local_sandbox` diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml deleted file mode 100644 index e0c3c0596..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/manifest.yaml +++ /dev/null @@ -1,61 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/00_base/120_openai_agents_local_sandbox - - test_utils - dockerfile: 10_async/00_base/120_openai_agents_local_sandbox/Dockerfile - dockerignore: 10_async/00_base/120_openai_agents_local_sandbox/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: async - name: ab120-openai-agents-local-sandbox - description: An async OpenAI Agents SDK agent using a local (unix_local) sandbox - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - - env: - OPENAI_AGENTS_DISABLE_TRACING: "1" - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "ab120-openai-agents-local-sandbox" - description: "An async OpenAI Agents SDK agent using a local (unix_local) sandbox" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py deleted file mode 100644 index 6ff475873..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/acp.py +++ /dev/null @@ -1,149 +0,0 @@ -"""ACP handler for the async OpenAI Agents SDK local-sandbox agent. - -Uses the async ACP model (``acp_type: async``, ``temporal.enabled: false``), -mirroring the Pydantic AI tutorial (110). The difference is the runtime: here we -run an OpenAI Agents SDK ``SandboxAgent`` against the **local** sandbox backend -(``UnixLocalSandboxClient``), which executes real shell commands on the host. - -The OpenAI Agents SDK sandbox runtime drives the full tool-call loop internally -inside ``Runner.run`` (model -> shell command -> output -> model -> ... -> final -answer), so this handler runs the agent and persists a single final -``TextContent`` rather than streaming tokens itself. - -Multi-turn memory is persisted via ``adk.state``: on each turn we load the prior -OpenAI Agents SDK input list from state, run the agent with it, then save the -updated list (``result.to_input_list()``) back. Without this, every turn would be -a fresh stateless run and the agent would forget the prior conversation. -""" - -from __future__ import annotations - -import os -from typing import Any - -from dotenv import load_dotenv - -load_dotenv() - -import agentex.lib.adk as adk -from project.agent import run_agent -from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams -from agentex.lib.types.fastacp import AsyncACPConfig -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.utils.model_utils import BaseModel -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client -# compatibility, so the same example works behind the Scale LiteLLM gateway. -_litellm_key = os.environ.get("LITELLM_API_KEY") -if _litellm_key and not os.environ.get("OPENAI_API_KEY"): - os.environ["OPENAI_API_KEY"] = _litellm_key - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create( - acp_type="async", - config=AsyncACPConfig(type="base"), -) - - -class ConversationState(BaseModel): - """Per-task conversation state persisted via ``adk.state``. - - ``input_list`` holds the OpenAI Agents SDK conversation history — the same - structure ``Runner.run`` accepts as input and ``result.to_input_list()`` - returns. Persisting it between turns gives the agent multi-turn memory. - """ - - input_list: list[dict[str, Any]] = [] - turn_number: int = 0 - - -@acp.on_task_create -async def handle_task_create(params: CreateTaskParams): - """Initialize per-task state on task creation. - - A fresh task starts with no message history; the conversation is built up by - ``handle_task_event_send`` on each subsequent user message. - """ - logger.info(f"Task created: {params.task.id}") - await adk.state.create( - task_id=params.task.id, - agent_id=params.agent.id, - state=ConversationState(), - ) - - -@acp.on_task_event_send -async def handle_task_event_send(params: SendEventParams): - """Handle each user message: load prior history, run the agent, save updated history.""" - task_id = params.task.id - agent_id = params.agent.id - user_message = params.event.content.content - - logger.info(f"Processing message for thread {task_id}") - - # Echo the user's message into the task history so it shows up in the UI. - await adk.messages.create(task_id=task_id, content=params.event.content) - - # Load the previous conversation history from state. If state is missing - # (e.g. task wasn't initialised via on_task_create), fall back to a fresh - # one so the agent still responds — just without memory of prior turns. - task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) - if task_state is None: - state = ConversationState() - task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state) - else: - state = ConversationState.model_validate(task_state.state) - - state.turn_number += 1 - state.input_list.append({"role": "user", "content": user_message}) - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name=f"Turn {state.turn_number}", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - # The OpenAI Agents SDK sandbox runtime runs the full tool-call loop - # internally (model -> shell command on the local host -> output -> - # model -> ... -> final answer), so we get a single final result. - result = await run_agent(state.input_list) - final_output = result.final_output - - # Persist the assistant's final answer as a TaskMessage so it shows up - # in the UI. (Unlike the streaming Pydantic AI tutorial, the sandbox run - # is non-streaming, so we post the final text ourselves.) - await adk.messages.create( - task_id=task_id, - content=TextContent(author="agent", content=final_output), - ) - - # Save the updated message history so the next turn picks up here. - state.input_list = result.to_input_list() - await adk.state.update( - state_id=task_state.id, - task_id=task_id, - agent_id=agent_id, - state=state, - ) - - if turn_span: - turn_span.output = {"final_output": final_output} - - -@acp.on_task_cancel -async def handle_task_canceled(params: CancelTaskParams): - logger.info(f"Task canceled: {params.task.id}") diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py deleted file mode 100644 index 177bb287d..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/agent.py +++ /dev/null @@ -1,95 +0,0 @@ -"""OpenAI Agents SDK local-sandbox agent definition (async, non-Temporal). - -This mirrors the Pydantic AI tutorial (110): the agent is the boundary between -this module and the API layer (acp.py). The difference is the runtime — here we -use the OpenAI Agents SDK ``SandboxAgent`` together with the **local** sandbox -backend (``UnixLocalSandboxClient``). - -The local sandbox runs shell commands ON THE HOST — the agent's own -container/process. There is no Docker, no Temporal, and no remote sandbox -infrastructure. The OpenAI Agents SDK runs its own tool-call loop internally: -when the model decides to run a shell command, the sandbox executes it locally -and feeds the output back to the model until it produces a final answer. -""" - -from __future__ import annotations - -from datetime import datetime - -from agents import Runner, set_tracing_disabled -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.run_config import RunConfig -from agents.sandbox.sandboxes.unix_local import ( - UnixLocalSandboxClient, - UnixLocalSandboxClientOptions, -) - -from project.tools import get_capabilities - -# Disable the openai-agents SDK's native tracer so it doesn't ship traces to -# api.openai.com using OPENAI_API_KEY (which may be a gateway/proxy key and would -# 401). Agentex tracing still runs via the tracing manager configured in acp.py. -set_tracing_disabled(True) - -MODEL_NAME = "gpt-4o-mini" -INSTRUCTIONS = """You are a local sandbox assistant. - -Current date and time: {timestamp} - -You have access to shell tools that run real commands on the local machine. - -Guidelines: -- ALWAYS use the shell tools to actually run commands — never guess or make up - output. If the user asks for the Python version, run `python3 --version`. If - they ask to list files, run `ls`. If they ask you to compute something, use - `python3 -c "..."`. -- Run the minimal command(s) needed to answer the question. -- Report the real command output back to the user, concisely. -""" - - -def create_agent() -> SandboxAgent: - """Build and return the OpenAI Agents SDK sandbox agent. - - The agent is granted shell capabilities (see ``project.tools``). The actual - sandbox backend (where the shell commands run) is supplied at run time via - the ``RunConfig`` returned by ``create_run_config``. - """ - return SandboxAgent( - name="Local Sandbox Assistant", - model=MODEL_NAME, - instructions=INSTRUCTIONS.format( - timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S") - ), - capabilities=get_capabilities(), - ) - - -def create_run_config() -> RunConfig: - """Build the RunConfig that points the agent at the LOCAL sandbox backend. - - ``UnixLocalSandboxClient`` (backend_id="unix_local") runs shell commands on - the host — the agent's own process — so no Docker or remote infra is needed. - """ - return RunConfig( - sandbox=SandboxRunConfig( - client=UnixLocalSandboxClient(), - options=UnixLocalSandboxClientOptions(), - ) - ) - - -async def run_agent(input_list: list) -> "Runner": - """Run the sandbox agent over the conversation so far and return the result. - - The OpenAI Agents SDK handles the full tool-call loop internally: the model - issues shell commands, the local sandbox runs them on the host, and the - output is fed back until the model produces a final answer. - - We pass the full ``input_list`` (prior turns + the new user message) so the - agent has conversation memory across turns; the caller persists - ``result.to_input_list()`` back into ``adk.state`` for the next turn. - """ - agent = create_agent() - run_config = create_run_config() - return await Runner.run(agent, input=input_list, run_config=run_config, max_turns=10) diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py deleted file mode 100644 index a931fa273..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/tools.py +++ /dev/null @@ -1,29 +0,0 @@ -"""Sandbox capabilities for the async OpenAI Agents SDK local-sandbox agent. - -Unlike the Pydantic AI tutorial (110), this agent does not register hand-written -Python functions as tools. Instead it is given *capabilities* — the OpenAI Agents -SDK sandbox runtime turns each capability into a real set of tools (run a shell -command, read a file, etc.) backed by an actual sandbox backend. - -Here we use the ``Shell`` capability, which lets the model run real shell commands. -With the local (``unix_local``) backend those commands execute ON THE HOST — the -agent's own process/container — so there is no Docker, Temporal, or remote infra -involved. This module hosts the capability factory so the agent wiring in -``project.agent`` stays readable and the capability set is easy to extend -(e.g. add ``Filesystem()`` or ``Memory()``). -""" - -from __future__ import annotations - -from agents.sandbox.capabilities import Shell - - -def get_capabilities() -> list: - """Return the sandbox capabilities the agent is allowed to use. - - Returns: - A list of OpenAI Agents SDK sandbox capabilities. We grant ``Shell`` so - the agent can run real shell commands on the local machine. Add - ``Filesystem()`` or ``Memory()`` here to expand what the agent can do. - """ - return [Shell()] diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml deleted file mode 100644 index 75c6254f3..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "ab120-openai-agents-local-sandbox" -version = "0.1.0" -description = "An async OpenAI Agents SDK agent using a local (unix_local) sandbox" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "openai-agents>=0.14.3,<0.15", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py b/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py deleted file mode 100644 index 0c7904eac..000000000 --- a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/tests/test_agent.py +++ /dev/null @@ -1,122 +0,0 @@ -"""Tests for the async OpenAI Agents SDK local-sandbox agent. - -This test suite validates that the agent actually runs shell commands in the -LOCAL sandbox (unix_local backend) by polling for the agent's response: -- Ask for the Python version -> response contains "Python 3" -- Ask it to compute 21 * 2 with python3 -> response contains "42" - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: ab120-openai-agents-local-sandbox) -""" - -import os -import uuid - -import pytest -import pytest_asyncio -from test_utils.async_utils import send_event_and_poll_yielding - -from agentex import AsyncAgentex -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "ab120-openai-agents-local-sandbox") - - -@pytest_asyncio.fixture -async def client(): - """Create an AsyncAgentex client instance for testing.""" - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -async def _send_and_collect_agent_text( - client: AsyncAgentex, agent_id: str, task_id: str, user_message: str -) -> str: - """Send a user message and accumulate all agent text responses into a string.""" - parts: list[str] = [] - async for message in send_event_and_poll_yielding( - client=client, - agent_id=agent_id, - task_id=task_id, - user_message=user_message, - timeout=60, - sleep_interval=1.0, - yield_updates=True, - ): - content = message.content - if content and content.type == "text" and content.author == "agent": - if content.content and content.content not in parts: - parts.append(content.content) - return "\n".join(parts) - - -class TestLocalSandboxEvents: - """Test the async local-sandbox OpenAI Agents SDK agent.""" - - @pytest.mark.asyncio - async def test_shell_python_version(self, client: AsyncAgentex, agent_id: str): - """The agent should run `python3 --version` in the local sandbox. - - The sandbox runs on Python 3.12, so the real output contains "Python 3". - """ - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) - task = task_response.result - assert task is not None - - text = await _send_and_collect_agent_text( - client, - agent_id, - task.id, - "Use your shell to print the Python version on this machine, then " - "tell me what it is.", - ) - assert text, "Expected a non-empty response from the sandbox agent." - assert "Python 3" in text - - @pytest.mark.asyncio - async def test_shell_compute(self, client: AsyncAgentex, agent_id: str): - """The agent should use python3 in the sandbox to compute 21 * 2 == 42.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) - task = task_response.result - assert task is not None - - text = await _send_and_collect_agent_text( - client, - agent_id, - task.id, - "Use python3 in your shell to compute 21 * 2 and tell me the result.", - ) - assert text, "Expected a non-empty response from the sandbox agent." - assert "42" in text - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile b/examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile deleted file mode 100644 index a31c89a31..000000000 --- a/examples/tutorials/10_async/00_base/130_harness_openai/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 10_async/00_base/130_harness_openai/pyproject.toml /app/130_harness_openai/pyproject.toml -COPY 10_async/00_base/130_harness_openai/README.md /app/130_harness_openai/README.md - -WORKDIR /app/130_harness_openai - -# Copy the project code -COPY 10_async/00_base/130_harness_openai/project /app/130_harness_openai/project - -# Copy the test files -COPY 10_async/00_base/130_harness_openai/tests /app/130_harness_openai/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] pytest-asyncio httpx - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=ab130-harness-openai - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/project/__init__.py b/examples/tutorials/10_async/00_base/130_harness_openai/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/.dockerignore b/examples/tutorials/10_async/00_base/140_codex/.dockerignore similarity index 100% rename from examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/.dockerignore rename to examples/tutorials/10_async/00_base/140_codex/.dockerignore diff --git a/examples/tutorials/10_async/00_base/harness_codex/Dockerfile b/examples/tutorials/10_async/00_base/140_codex/Dockerfile similarity index 64% rename from examples/tutorials/10_async/00_base/harness_codex/Dockerfile rename to examples/tutorials/10_async/00_base/140_codex/Dockerfile index 06b76aae2..ca5b99ffe 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/Dockerfile +++ b/examples/tutorials/10_async/00_base/140_codex/Dockerfile @@ -22,18 +22,18 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 -COPY 10_async/00_base/harness_codex/pyproject.toml /app/harness_codex/pyproject.toml -COPY 10_async/00_base/harness_codex/README.md /app/harness_codex/README.md +COPY 10_async/00_base/140_codex/pyproject.toml /app/140_codex/pyproject.toml +COPY 10_async/00_base/140_codex/README.md /app/140_codex/README.md -WORKDIR /app/harness_codex +WORKDIR /app/140_codex -COPY 10_async/00_base/harness_codex/project /app/harness_codex/project -COPY 10_async/00_base/harness_codex/tests /app/harness_codex/tests +COPY 10_async/00_base/140_codex/project /app/140_codex/project +COPY 10_async/00_base/140_codex/tests /app/140_codex/tests COPY test_utils /app/test_utils RUN uv pip install --system .[dev] ENV PYTHONPATH=/app -ENV AGENT_NAME=ab-harness-codex +ENV AGENT_NAME=ab140-codex CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/harness_codex/README.md b/examples/tutorials/10_async/00_base/140_codex/README.md similarity index 94% rename from examples/tutorials/10_async/00_base/harness_codex/README.md rename to examples/tutorials/10_async/00_base/140_codex/README.md index 9bbcd927a..a00ddb562 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/README.md +++ b/examples/tutorials/10_async/00_base/140_codex/README.md @@ -1,4 +1,4 @@ -# harness_codex (async base) +# 140_codex (async base) Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap, `CodexTurn`, and `UnifiedEmitter` for an **async** (Redis-streaming, no Temporal) @@ -28,7 +28,7 @@ Live runs require: ```bash cd /path/to/scale-agentex-python -uv run --all-packages --all-extras pytest examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py -q +uv run --all-packages --all-extras pytest examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py -q ``` ## Running live integration tests diff --git a/examples/tutorials/10_async/00_base/harness_codex/conftest.py b/examples/tutorials/10_async/00_base/140_codex/conftest.py similarity index 100% rename from examples/tutorials/10_async/00_base/harness_codex/conftest.py rename to examples/tutorials/10_async/00_base/140_codex/conftest.py diff --git a/examples/tutorials/10_async/00_base/harness_codex/manifest.yaml b/examples/tutorials/10_async/00_base/140_codex/manifest.yaml similarity index 84% rename from examples/tutorials/10_async/00_base/harness_codex/manifest.yaml rename to examples/tutorials/10_async/00_base/140_codex/manifest.yaml index e88e2029d..be020b141 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/manifest.yaml +++ b/examples/tutorials/10_async/00_base/140_codex/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../../ include_paths: - - 10_async/00_base/harness_codex + - 10_async/00_base/140_codex - test_utils - dockerfile: 10_async/00_base/harness_codex/Dockerfile - dockerignore: 10_async/00_base/harness_codex/.dockerignore + dockerfile: 10_async/00_base/140_codex/Dockerfile + dockerignore: 10_async/00_base/140_codex/.dockerignore local_development: agent: @@ -16,7 +16,7 @@ local_development: agent: acp_type: async - name: ab-harness-codex + name: ab140-codex description: Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess temporal: @@ -46,7 +46,7 @@ deployment: global: agent: - name: "ab-harness-codex" + name: "ab140-codex" description: "Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/harness_langgraph/project/__init__.py b/examples/tutorials/10_async/00_base/140_codex/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/harness_langgraph/project/__init__.py rename to examples/tutorials/10_async/00_base/140_codex/project/__init__.py diff --git a/examples/tutorials/10_async/00_base/harness_codex/project/acp.py b/examples/tutorials/10_async/00_base/140_codex/project/acp.py similarity index 100% rename from examples/tutorials/10_async/00_base/harness_codex/project/acp.py rename to examples/tutorials/10_async/00_base/140_codex/project/acp.py diff --git a/examples/tutorials/10_async/00_base/harness_codex/pyproject.toml b/examples/tutorials/10_async/00_base/140_codex/pyproject.toml similarity index 96% rename from examples/tutorials/10_async/00_base/harness_codex/pyproject.toml rename to examples/tutorials/10_async/00_base/140_codex/pyproject.toml index c25a65c47..bdf7c462f 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/pyproject.toml +++ b/examples/tutorials/10_async/00_base/140_codex/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "ab-harness-codex" +name = "ab140-codex" version = "0.1.0" description = "Async (base) tutorial agent driving the unified harness surface via local codex CLI subprocess" readme = "README.md" diff --git a/examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py b/examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py similarity index 99% rename from examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py rename to examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py index b50ee9116..68ca5aded 100644 --- a/examples/tutorials/10_async/00_base/harness_codex/tests/test_agent.py +++ b/examples/tutorials/10_async/00_base/140_codex/tests/test_agent.py @@ -129,7 +129,7 @@ async def test_yield_turn_is_passthrough(self): LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1" AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "ab-harness-codex") +AGENT_NAME = os.environ.get("AGENT_NAME", "ab140-codex") @pytest.mark.skipif( diff --git a/examples/tutorials/10_async/00_base/harness_codex/project/__init__.py b/examples/tutorials/10_async/00_base/harness_codex/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/README.md b/examples/tutorials/10_async/00_base/harness_langgraph/README.md deleted file mode 100644 index 7efe28207..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/README.md +++ /dev/null @@ -1,57 +0,0 @@ -# Tutorial: Async Harness LangGraph Agent - -This tutorial demonstrates how to build an **async** LangGraph agent on AgentEx -using the **unified harness surface**: - -```python -turn = LangGraphTurn(stream, model=None) -emitter = UnifiedEmitter(task_id=task_id, trace_id=task_id, ...) -result = await emitter.auto_send_turn(turn) -``` - -Compare with ``100_langgraph``, which uses the bespoke -``stream_langgraph_events`` helper directly. - -## Key Concepts - -### Unified Harness - -`LangGraphTurn` implements the `HarnessTurn` protocol: it wraps the raw -LangGraph `astream()` generator and exposes `events` (an async generator of -`TaskMessageUpdate`) and `usage()` (token counts captured from the final -`AIMessage`). - -`UnifiedEmitter.auto_send_turn(turn)` pushes each event to Redis via -`streaming_task_message_context`, accumulates the final text, and returns a -`TurnResult(final_text=..., usage=...)`. - -The same `LangGraphTurn` object can also be passed to -`UnifiedEmitter.yield_turn` in the sync channel. - -### AGX1-377 Note - -LangGraph emits tool requests as `StreamTaskMessageFull` events (from "updates" -node outputs). The `SpanDeriver` does not open tool spans from Full events -today; that gap is tracked in AGX1-373. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server using unified harness (LangGraphTurn + auto_send_turn) | -| `project/graph.py` | LangGraph state graph (identical to 100_langgraph) | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration (name: a-harness-langgraph) | - -## Running Locally - -```bash -agentex agents run -``` - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml b/examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml deleted file mode 100644 index bb19e25b3..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/manifest.yaml +++ /dev/null @@ -1,58 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/00_base/harness_langgraph - - test_utils - dockerfile: 10_async/00_base/harness_langgraph/Dockerfile - dockerignore: 10_async/00_base/harness_langgraph/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: async - name: a-harness-langgraph - description: An async LangGraph agent using the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn) - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "a-harness-langgraph" - description: "An async LangGraph agent using the unified harness surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/__init__.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py deleted file mode 100644 index a99395424..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/project/acp.py +++ /dev/null @@ -1,109 +0,0 @@ -"""ACP handler for async harness LangGraph agent. - -Uses the unified harness surface: ``LangGraphTurn`` wraps the LangGraph -``astream()`` generator, and ``UnifiedEmitter.auto_send_turn`` streams events -to Redis and returns a ``TurnResult`` with the accumulated final text. - -Differences from ``100_langgraph`` (bespoke path): -- No ``create_langgraph_tracing_handler`` boilerplate. -- ``stream_langgraph_events`` is replaced by - ``UnifiedEmitter.auto_send_turn(LangGraphTurn(stream))``. -- Tool calls/responses go through ``streaming_task_message_context`` - (same code path as text deltas), making the event stream channel-agnostic. -- Usage data (token counts) is captured on ``LangGraphTurn.usage()`` after - ``auto_send_turn`` returns. - -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` -events (from "updates"). The ``SpanDeriver`` does not open tool spans from -Full events today; that gap is tracked in AGX1-373. -""" - -from __future__ import annotations - -import os - -from dotenv import load_dotenv - -load_dotenv() - -import agentex.lib.adk as adk -from project.graph import create_graph -from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams -from agentex.lib.types.fastacp import AsyncACPConfig -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.core.harness.emitter import UnifiedEmitter -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create( - acp_type="async", - config=AsyncACPConfig(type="base"), -) - -_graph = None - - -async def get_graph(): - global _graph - if _graph is None: - _graph = await create_graph() - return _graph - - -@acp.on_task_event_send -async def handle_task_event_send(params: SendEventParams): - """Handle incoming events, streaming tokens and tool calls via unified harness.""" - graph = await get_graph() - task_id = params.task.id - user_message = params.event.content.content - - logger.info(f"Processing message for thread {task_id}") - - await adk.messages.create(task_id=task_id, content=params.event.content) - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name="message", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - stream = graph.astream( - {"messages": [{"role": "user", "content": user_message}]}, - config={"configurable": {"thread_id": task_id}}, - stream_mode=["messages", "updates"], - ) - - turn = LangGraphTurn(stream, model=None) - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - - result = await emitter.auto_send_turn(turn) - - if turn_span: - turn_span.output = {"final_output": result.final_text} - - -@acp.on_task_create -async def handle_task_create(params: CreateTaskParams): - logger.info(f"Task created: {params.task.id}") - - -@acp.on_task_cancel -async def handle_task_canceled(params: CancelTaskParams): - logger.info(f"Task canceled: {params.task.id}") diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py deleted file mode 100644 index 4aeac3b3c..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/project/graph.py +++ /dev/null @@ -1,67 +0,0 @@ -"""LangGraph graph definition for the harness_langgraph async agent. - -Identical to ``100_langgraph/project/graph.py`` — the graph definition is not -affected by the harness migration. Only ``acp.py`` changes. -""" - -from __future__ import annotations - -from typing import Any, Annotated -from datetime import datetime -from typing_extensions import TypedDict - -from langgraph.graph import START, StateGraph -from langchain_openai import ChatOpenAI -from langgraph.prebuilt import ToolNode, tools_condition -from langchain_core.messages import SystemMessage -from langgraph.graph.message import add_messages - -from project.tools import TOOLS -from agentex.lib.adk import create_checkpointer - -MODEL_NAME = "gpt-5" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -class AgentState(TypedDict): - """State schema for the agent graph.""" - - messages: Annotated[list[Any], add_messages] - - -async def create_graph(): - """Create and compile the agent graph with checkpointer.""" - llm = ChatOpenAI( - model=MODEL_NAME, - reasoning={"effort": "high", "summary": "auto"}, - ) - llm_with_tools = llm.bind_tools(TOOLS) - - checkpointer = await create_checkpointer() - - def agent_node(state: AgentState) -> dict[str, Any]: - """Process the current state and generate a response.""" - messages = state["messages"] - if not messages or not isinstance(messages[0], SystemMessage): - system_content = SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) - messages = [SystemMessage(content=system_content)] + messages - response = llm_with_tools.invoke(messages) - return {"messages": [response]} - - builder = StateGraph(AgentState) - builder.add_node("agent", agent_node) - builder.add_node("tools", ToolNode(tools=TOOLS)) - builder.add_edge(START, "agent") - builder.add_conditional_edges("agent", tools_condition, "tools") - builder.add_edge("tools", "agent") - - return builder.compile(checkpointer=checkpointer) diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py b/examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py deleted file mode 100644 index 6e7614300..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/project/tools.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Tool definitions for the harness_langgraph async agent.""" - -from langchain_core.tools import Tool - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" - - -weather_tool = Tool( - name="get_weather", - func=get_weather, - description="Get the current weather for a city. Input should be a city name.", -) - -TOOLS = [weather_tool] diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml b/examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml deleted file mode 100644 index 69856e6db..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/pyproject.toml +++ /dev/null @@ -1,37 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "a-harness-langgraph" -version = "0.1.0" -description = "An async LangGraph agent using the unified harness surface" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "langgraph", - "langchain-openai", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py b/examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py deleted file mode 100644 index 762b2b90c..000000000 --- a/examples/tutorials/10_async/00_base/harness_langgraph/tests/test_agent.py +++ /dev/null @@ -1,100 +0,0 @@ -""" -Tests for the async harness LangGraph agent. - -Validates the unified harness surface (LangGraphTurn + UnifiedEmitter.auto_send_turn) -end-to-end against a live AgentEx server. - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: a-harness-langgraph) -""" - -import os - -import pytest -import pytest_asyncio - -from agentex import AsyncAgentex -from agentex.types import TextContentParam -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest -from agentex.lib.sdk.fastacp.base.base_acp_server import uuid - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "a-harness-langgraph") - - -@pytest_asyncio.fixture -async def client(): - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingEvents: - @pytest.mark.asyncio - async def test_send_event(self, client: AsyncAgentex, agent_id: str): - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="Hello! What can you help me with?", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - @pytest.mark.asyncio - async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="What's the weather in San Francisco?", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - -class TestStreamingEvents: - @pytest.mark.asyncio - async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str): - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="Tell me a short joke.", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile b/examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile deleted file mode 100644 index 3c1b9dfea..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/Dockerfile +++ /dev/null @@ -1,50 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 10_async/00_base/harness_pydantic_ai/pyproject.toml /app/harness_pydantic_ai/pyproject.toml -COPY 10_async/00_base/harness_pydantic_ai/README.md /app/harness_pydantic_ai/README.md - -WORKDIR /app/harness_pydantic_ai - -# Copy the project code -COPY 10_async/00_base/harness_pydantic_ai/project /app/harness_pydantic_ai/project - -# Copy the test files -COPY 10_async/00_base/harness_pydantic_ai/tests /app/harness_pydantic_ai/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] pytest-asyncio httpx - -# Set environment variables -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=ab-harness-pydantic-ai - -# Run the agent using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md b/examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md deleted file mode 100644 index 51acb62bd..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/README.md +++ /dev/null @@ -1,54 +0,0 @@ -# Async Pydantic AI Harness Test Agent - -A minimal **async** (Redis-streaming) Pydantic AI agent that drives the -**unified harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) -directly. - -## Why this agent exists - -The `10_async/00_base/110_pydantic_ai` tutorial streams via the -`stream_pydantic_ai_events` helper (which uses the unified surface internally). -This harness test agent calls `emitter.auto_send_turn(...)` **explicitly** at the -agent-author level, making the unified-surface wiring visible and giving the -async channel direct coverage. - -## How it wires the unified surface - -In `project/acp.py`: - -```python -emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, -) -async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: - turn = PydanticAITurn(tee_messages(stream), model=MODEL_NAME, coalesce_tool_requests=True) - result = await emitter.auto_send_turn(turn) -``` - -- `coalesce_tool_requests=True` is required on the async/auto_send path until - AGX1-377 lands: tool requests are delivered as a single `Full(tool_request)` - rather than streamed `Start + Delta + Done`. -- The `UnifiedEmitter` is constructed from the ACP context (`task_id` + - `trace_id` + `parent_span_id`) so messages auto-send to the task stream - (Redis) and tracing is automatic. -- Multi-turn memory is persisted via `adk.state` (pydantic-ai message history - round-tripped through `ModelMessagesTypeAdapter`). - -## Files - -- `project/acp.py` — async ACP handler using `emitter.auto_send_turn(...)`. -- `project/agent.py` — builds the `pydantic_ai.Agent` with one tool. -- `project/tools.py` — `get_weather(city)` returning a constant. -- `tests/test_agent.py` — live integration test (requires a running agent). - -## Tools - -- `get_weather(city: str) -> str`: returns a fixed "sunny and 72°F" string. - -## Offline coverage - -Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake -streaming/tracing, no network) live in the SDK repo at -`tests/lib/core/harness/test_harness_pydantic_ai_async.py`. diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml deleted file mode 100644 index f9e50f329..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/manifest.yaml +++ /dev/null @@ -1,58 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/00_base/harness_pydantic_ai - - test_utils - dockerfile: 10_async/00_base/harness_pydantic_ai/Dockerfile - dockerignore: 10_async/00_base/harness_pydantic_ai/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - -agent: - acp_type: async - name: ab-harness-pydantic-ai - description: An async Pydantic AI harness test agent using the unified emitter surface - - temporal: - enabled: false - - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "ab-harness-pydantic-ai" - description: "An async Pydantic AI harness test agent using the unified emitter surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/__init__.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py deleted file mode 100644 index 95b638f8b..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/acp.py +++ /dev/null @@ -1,159 +0,0 @@ -"""ACP handler for the async harness Pydantic AI test agent. - -This agent exercises the UNIFIED HARNESS SURFACE on the async (Redis-streaming) -channel — ``UnifiedEmitter.auto_send_turn(PydanticAITurn(...))`` -— calling it directly rather than via the ``stream_pydantic_ai_events`` helper -(which the ``110_pydantic_ai`` tutorial uses). This makes the unified-surface -wiring explicit at the agent-author level. - -Multi-turn memory is persisted via ``adk.state``: on each turn we load the -previous pydantic-ai ``message_history`` from state, run the agent with it, -then save the updated history back. -""" - -from __future__ import annotations - -import os -from typing import Any, AsyncIterator - -from dotenv import load_dotenv - -load_dotenv() - -from pydantic_ai.run import AgentRunResultEvent -from pydantic_ai.messages import ModelMessagesTypeAdapter - -import agentex.lib.adk as adk -from project.agent import MODEL_NAME, create_agent -from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams -from agentex.lib.core.harness import UnifiedEmitter -from agentex.lib.types.fastacp import AsyncACPConfig -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.lib.utils.model_utils import BaseModel -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn -from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config - -logger = make_logger(__name__) - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -acp = FastACP.create( - acp_type="async", - config=AsyncACPConfig(type="base"), -) - -_agent = None - - -def get_agent(): - global _agent - if _agent is None: - _agent = create_agent() - return _agent - - -class ConversationState(BaseModel): - """Per-task conversation state persisted via ``adk.state``. - - ``history_json`` holds the pydantic-ai message history serialized by - ``ModelMessagesTypeAdapter`` — pydantic-ai's official way to round-trip - ``ModelMessage`` objects through JSON. - """ - - history_json: str = "[]" - turn_number: int = 0 - - -@acp.on_task_create -async def handle_task_create(params: CreateTaskParams): - """Initialize per-task state on task creation.""" - logger.info(f"Task created: {params.task.id}") - await adk.state.create( - task_id=params.task.id, - agent_id=params.agent.id, - state=ConversationState(), - ) - - -@acp.on_task_event_send -async def handle_task_event_send(params: SendEventParams): - """Handle each user message through the unified auto_send_turn path.""" - agent = get_agent() - task_id = params.task.id - agent_id = params.agent.id - user_message = params.event.content.content - - logger.info(f"Processing message for thread {task_id}") - - # Echo the user's message into the task history. - await adk.messages.create(task_id=task_id, content=params.event.content) - - # Load the previous conversation history from state (fall back to fresh). - task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) - if task_state is None: - state = ConversationState() - task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state) - else: - state = ConversationState.model_validate(task_state.state) - - state.turn_number += 1 - previous_messages = ModelMessagesTypeAdapter.validate_json(state.history_json) - - async with adk.tracing.span( - trace_id=task_id, - task_id=task_id, - name=f"Turn {state.turn_number}", - input={"message": user_message}, - data={"__span_type__": "AGENT_WORKFLOW"}, - ) as turn_span: - # Construct the UnifiedEmitter from the ACP context so tracing is - # automatic and messages are auto-sent to the task stream (Redis). - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - - # Capture the terminal AgentRunResultEvent to persist message history. - captured_messages: list[Any] = [] - - async def tee_messages(upstream) -> AsyncIterator[Any]: - async for event in upstream: - if isinstance(event, AgentRunResultEvent): - captured_messages[:] = list(event.result.all_messages()) - yield event - - async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: - # The unified auto_send path delivers streamed tool requests natively - # (Start+Delta+Done), so no coalescing workaround is needed. - turn = PydanticAITurn( - tee_messages(stream), - model=MODEL_NAME, - ) - result = await emitter.auto_send_turn(turn) - - # Save the updated message history so the next turn picks up here. - if captured_messages: - state.history_json = ModelMessagesTypeAdapter.dump_json(captured_messages).decode() - await adk.state.update( - state_id=task_state.id, - task_id=task_id, - agent_id=agent_id, - state=state, - ) - - if turn_span: - turn_span.output = {"final_output": result.final_text} - - -@acp.on_task_cancel -async def handle_task_canceled(params: CancelTaskParams): - logger.info(f"Task canceled: {params.task.id}") diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py deleted file mode 100644 index e7b764d82..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/agent.py +++ /dev/null @@ -1,39 +0,0 @@ -"""Pydantic AI agent definition for the async harness test agent. - -The Agent is the boundary between this module and the API layer (acp.py). -Pydantic AI handles its own tool-call loop internally — no graph required. -""" - -from __future__ import annotations - -from datetime import datetime - -from pydantic_ai import Agent - -from project.tools import get_weather - -__all__ = ["create_agent", "MODEL_NAME"] - -MODEL_NAME = "openai:gpt-4o-mini" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -def create_agent() -> Agent: - """Build and return the Pydantic AI agent with tools registered.""" - agent = Agent( - MODEL_NAME, - system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), - ) - - agent.tool_plain(get_weather) - - return agent diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py deleted file mode 100644 index 0f16a7cb0..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/project/tools.py +++ /dev/null @@ -1,20 +0,0 @@ -"""Tool definitions for the async harness Pydantic AI agent. - -Pydantic AI tools are registered directly on the Agent via decorators -(see project.agent). This module hosts the bare function so it is easy to -unit-test in isolation. -""" - -from __future__ import annotations - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml deleted file mode 100644 index 3dc1e0e41..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "ab-harness-pydantic-ai" -version = "0.1.0" -description = "An async Pydantic AI harness test agent using the unified emitter surface" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "pydantic-ai-slim[openai]>=1.0,<2", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py deleted file mode 100644 index 11098c7d5..000000000 --- a/examples/tutorials/10_async/00_base/harness_pydantic_ai/tests/test_agent.py +++ /dev/null @@ -1,118 +0,0 @@ -"""Live tests for the async harness Pydantic AI agent. - -These tests require a running agent (server + deployed agent) and exercise the -unified-surface async handler end-to-end over the wire. They mirror the -``110_pydantic_ai`` async tutorial tests but target this harness agent. - -Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives -in ``tests/lib/core/harness/test_harness_pydantic_ai_async.py`` in the SDK repo. - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: ab-harness-pydantic-ai) -""" - -import os - -import pytest -import pytest_asyncio - -from agentex import AsyncAgentex -from agentex.types import TextContentParam -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest -from agentex.lib.sdk.fastacp.base.base_acp_server import uuid - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "ab-harness-pydantic-ai") - - -@pytest_asyncio.fixture -async def client(): - """Create an AsyncAgentex client instance for testing.""" - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingEvents: - """Test non-streaming event sending through the unified auto_send_turn path.""" - - @pytest.mark.asyncio - async def test_send_event(self, client: AsyncAgentex, agent_id: str): - """Test sending an event to the async harness Pydantic AI agent.""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="Hello! What can you help me with?", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - @pytest.mark.asyncio - async def test_tool_calling(self, client: AsyncAgentex, agent_id: str): - """Test that the agent can use tools (e.g., weather tool).""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="What's the weather in San Francisco?", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - -class TestStreamingEvents: - """Test streaming event sending.""" - - @pytest.mark.asyncio - async def test_send_event_and_stream(self, client: AsyncAgentex, agent_id: str): - """Test sending an event and streaming the response.""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - event_content = TextContentParam( - type="text", - author="user", - content="Tell me a short joke.", - ) - await client.agents.send_event( - agent_id=agent_id, - params={"task_id": task.id, "content": event_content}, - ) - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md index b221c1238..66466693b 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/README.md @@ -1,153 +1,59 @@ -# Tutorial 110 (temporal): Pydantic AI Agent +# Temporal Pydantic AI Agent -This tutorial demonstrates a **durable** Pydantic AI agent on AgentEx, backed by Temporal: -- Workflow state survives crashes mid-conversation (Temporal replay) -- Every LLM call and every tool call becomes its own Temporal activity (independent retries + observability) -- Streaming via Redis still works — token-by-token deltas appear in the UI in real time +A minimal **Temporal-backed** Pydantic AI agent that drives the **unified +harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) from +inside the model activity's `event_stream_handler`. -This is the Temporal counterpart to the async base tutorial at [`10_async/00_base/110_pydantic_ai/`](../../00_base/110_pydantic_ai/). +## Why this agent exists -## Why Temporal? Why not just async? +This agent calls `emitter.auto_send_turn(...)` **explicitly** inside +the `event_stream_handler`, making the unified-surface wiring visible and giving +the temporal channel direct coverage. -In async base 110, the agent state lives in memory inside the ACP process. If that process dies mid-LLM-call, the in-flight turn is lost. Temporal fixes this by: +## How it wires the unified surface -1. Recording every external interaction (LLM call, tool call) to a durable event log. -2. On worker restart, **replaying** the workflow code, using cached activity results to skip work that already finished. -3. Letting workflows live forever — multi-day conversations or human-in-the-loop flows just work. - -## Architecture at a glance - -Two long-running processes plus shared infrastructure: - -``` -┌──────────────────────────┐ ┌──────────────────────────┐ -│ uvicorn project.acp:acp │ │ python -m run_worker │ -│ (HTTP shim, forwards │ │ (executes workflows + │ -│ signals to Temporal) │ │ activities) │ -└──────────────────────────┘ └──────────────────────────┘ - │ │ - └────► Temporal server ◄───────────┘ - (event log + queue) - - Redis ◄─── activities push deltas - │ - └─── Agentex API tails ──► UI client -``` - -The HTTP server is a thin shim that translates `task/event/send` into Temporal signals. The worker is where your agent code actually runs. Temporal sits in between, recording everything. - -## Key code patterns - -### `project/agent.py` — wrap the base agent in `TemporalAgent` - -```python -base_agent = Agent(MODEL_NAME, deps_type=TaskDeps, system_prompt=...) -base_agent.tool_plain(get_weather) - -temporal_agent = TemporalAgent( - base_agent, - name="at110_pydantic_ai_agent", - event_stream_handler=event_handler, # streams to Redis from inside the model activity -) -``` - -`TemporalAgent` (from `pydantic_ai.durable_exec.temporal`) wraps a normal Pydantic AI Agent so that: -- Each LLM call runs in its own activity -- Each tool call runs in its own activity -- The wrapping is invisible to the workflow code that calls `temporal_agent.run(...)` - -### `project/workflow.py` — declare `__pydantic_ai_agents__` +In `project/agent.py`, the `event_stream_handler` runs inside the model activity +and constructs a `UnifiedEmitter` from `RunContext.deps`: ```python -@workflow.defn(name=environment_variables.WORKFLOW_NAME) -class At110PydanticAiWorkflow(BaseWorkflow): - __pydantic_ai_agents__ = [temporal_agent] # ← discovered by PydanticAIPlugin - - @workflow.signal(name=SignalName.RECEIVE_EVENT) - async def on_task_event_send(self, params): - await adk.messages.create(task_id=params.task.id, content=params.event.content) - result = await temporal_agent.run( - params.event.content.content, - deps=TaskDeps(task_id=params.task.id), - ) +async def event_handler(run_context, events): + emitter = UnifiedEmitter( + task_id=run_context.deps.task_id, + trace_id=run_context.deps.task_id, + parent_span_id=run_context.deps.parent_span_id, + ) + turn = PydanticAITurn(events, model=MODEL_NAME, coalesce_tool_requests=True) + await emitter.auto_send_turn(turn) ``` -The `__pydantic_ai_agents__` attribute is how `PydanticAIPlugin` discovers which activities to register on the worker — no manual activity list needed. - -### `project/acp.py` — no handlers, just plugin wiring - -```python -acp = FastACP.create( - acp_type="async", - config=TemporalACPConfig( - type="temporal", - temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), - plugins=[PydanticAIPlugin()], - ), -) -``` - -When `type="temporal"`, FastACP auto-wires HTTP → workflow signals. You don't define `@acp.on_task_event_send` anywhere — Temporal handles it. - -### `project/run_worker.py` — boot the worker with the plugin - -```python -worker = AgentexWorker( - task_queue=task_queue_name, - plugins=[PydanticAIPlugin()], -) -await worker.run( - activities=get_all_activities(), - workflow=At110PydanticAiWorkflow, -) -``` - -`get_all_activities()` returns the built-in Agentex activities (state, messages, streaming, tracing). Pydantic AI's per-agent activities are auto-added by the plugin. - -## Files - -| File | Purpose | -|------|---------| -| `project/acp.py` | Thin HTTP shim — `FastACP.create(type="temporal", ...)` | -| `project/workflow.py` | `@workflow.defn` class with the signal handler | -| `project/agent.py` | Base Pydantic AI Agent wrapped in `TemporalAgent` | -| `project/tools.py` | Tool functions (must be `async` for Temporal compatibility) | -| `project/run_worker.py` | Worker boot script (separate process) | -| `tests/test_agent.py` | End-to-end test verifying tool round-trips | -| `manifest.yaml` | Sets `temporal.enabled: true` and declares workflow + queue name | - -## Running Locally - -You'll need three terminals open (this is the price of Temporal): - -```bash -# Terminal 1 — backend services (separate repo) -cd ~/scale-agentex/agentex -make dev # brings up Temporal, Redis, Postgres, Agentex API - -# Terminal 2 — this tutorial (ACP server + Temporal worker) -cd ~/scale-agentex-python/examples/tutorials/10_async/10_temporal/110_pydantic_ai -agentex agents run # this also launches the worker process - -# Terminal 3 — tests -cd ~/scale-agentex-python/examples/tutorials/10_async/10_temporal/110_pydantic_ai -uv run pytest tests/test_agent.py -v -``` - -Watch the Temporal UI at http://localhost:8233 — you'll see workflow executions, signal events, and one activity per LLM call + one per tool call. - -## Sync vs Async vs Temporal — How the code differs - -| Concern | Sync (040) | Async base (110) | Temporal (this one) | -|---|---|---|---| -| `project/acp.py` | `@acp.on_message_send` yields events | `@acp.on_task_event_send` pushes to Redis | **No handlers** — `FastACP.create(type="temporal", ...)` | -| Where the agent runs | In the ACP HTTP process | In the ACP HTTP process | In a separate worker process | -| Durability | Ephemeral — request-scoped | Ephemeral — process-scoped | **Durable** — survives worker restarts via Temporal replay | -| Per-call retries | None | None | Each model + tool call automatically retried by Temporal | -| Code we add | — | `acp.py` handler | `workflow.py`, `run_worker.py`, wrap agent in `TemporalAgent` | - -## Notes - -- Multi-turn conversation memory is not wired here. Workflow state (`self._turn_number`) is durable, but message history isn't currently threaded into `temporal_agent.run(..., message_history=...)`. To add: load via `adk.messages.list(task_id=...)` inside the signal handler and pass through. -- Reasoning/thinking tokens are not exercised by `gpt-4o-mini`. Swap to a reasoning-capable model to exercise that branch end-to-end. -- Tools must be `async` (Pydantic AI's Temporal integration requires it — sync tools would run in threads, breaking Temporal's determinism guarantees). +- The handler runs inside a Temporal activity, so it can freely make + non-deterministic Redis + tracing writes. +- `coalesce_tool_requests=True` is required on the auto_send path until + AGX1-377 lands. +- `deps` (set by `project/workflow.py`) threads the `task_id` and the per-turn + `parent_span_id` into the handler so tool spans nest under the workflow's turn + span. + +## Structure + +- `project/acp.py` — thin ACP server; FastACP auto-wires HTTP routes to the + workflow when `TemporalACPConfig` is used. +- `project/agent.py` — base `Agent` + `TemporalAgent` + the unified-surface + `event_stream_handler`. +- `project/workflow.py` — durable workflow; each turn delegates to + `temporal_agent.run(...)`. +- `project/run_worker.py` — Temporal worker entry point. +- `project/tools.py` — async `get_weather(city)` returning a constant. +- `tests/test_agent.py` — live integration test (requires Temporal + Redis + + ACP server + worker). + +## Tools + +- `get_weather(city: str) -> str` (async): returns a fixed "sunny and 72°F" + string. Each tool call becomes its own Temporal activity. + +## Offline coverage + +Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake +streaming/tracing, no Temporal server) live in the SDK repo under +`tests/lib/core/harness/` (the pydantic-ai temporal suite). diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml index 15d00076f..7ca454b05 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/manifest.yaml @@ -18,7 +18,7 @@ local_development: agent: acp_type: async name: at110-pydantic-ai - description: A Temporal-backed Pydantic AI agent with tool calling and Redis streaming + description: A Temporal-backed Pydantic AI harness test agent using the unified emitter surface temporal: enabled: true @@ -42,8 +42,6 @@ agent: - env_var_name: SGP_CLIENT_BASE_URL secret_name: sgp-client-base-url secret_key: url - # env: - # OPENAI_BASE_URL: "https://your-litellm-proxy/v1" deployment: image: @@ -53,7 +51,7 @@ deployment: global: agent: name: "at110-pydantic-ai" - description: "A Temporal-backed Pydantic AI agent" + description: "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface" replicaCount: 1 resources: requests: diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py index dacb45ad6..c142dcf70 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/acp.py @@ -1,7 +1,7 @@ -"""ACP server for the Temporal Pydantic AI tutorial. +"""ACP server for the Temporal harness Pydantic AI test agent. -This file is intentionally thin. When ``acp_type="async"`` is combined -with ``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: +This file is intentionally thin. When ``acp_type="async"`` is combined with +``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: HTTP task/create → @workflow.run on the workflow class HTTP task/event/send → @workflow.signal(SignalName.RECEIVE_EVENT) diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py index a33a317cc..4e59688ce 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/agent.py @@ -1,18 +1,20 @@ -"""Pydantic AI agent definition for the Temporal tutorial. +"""Pydantic AI agent definition for the Temporal harness test agent. This module constructs the base ``pydantic_ai.Agent`` once at import time, registers tools on it, and wraps it in ``TemporalAgent`` from ``pydantic_ai.durable_exec.temporal``. -The ``TemporalAgent`` wrapper makes every model call and every tool call -run as a Temporal activity automatically. The workflow code stays -deterministic; the non-deterministic work (LLM HTTP calls, tool execution) -moves into recorded activities. - -Streaming back to Agentex happens via ``event_stream_handler``, which -receives Pydantic AI ``AgentStreamEvent``s from inside the model activity -and forwards them to Redis using our existing ``stream_pydantic_ai_events`` -helper. The ``task_id`` is threaded into the handler via ``deps``. +The ``TemporalAgent`` wrapper makes every model call and every tool call run as +a Temporal activity automatically. The workflow stays deterministic; the +non-deterministic work (LLM HTTP calls, tool execution) moves into recorded +activities. + +Streaming back to Agentex happens via ``event_stream_handler``, which receives +Pydantic AI ``AgentStreamEvent``s from inside the model activity and forwards +them through the UNIFIED HARNESS SURFACE (``UnifiedEmitter.auto_send_turn`` + +``PydanticAITurn``) — called directly rather than via ``stream_pydantic_ai_events``. +The ``task_id`` and per-turn ``parent_span_id`` are threaded into the handler +via ``deps``. """ from __future__ import annotations @@ -26,10 +28,10 @@ from pydantic_ai.durable_exec.temporal import TemporalAgent from project.tools import get_weather -from agentex.lib.adk import ( - stream_pydantic_ai_events, - create_pydantic_ai_tracing_handler, -) +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn + +__all__ = ["TaskDeps", "temporal_agent", "base_agent", "MODEL_NAME"] MODEL_NAME = "openai:gpt-4o-mini" SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. @@ -48,13 +50,13 @@ class TaskDeps(BaseModel): """Per-run dependencies passed into the agent via ``deps=``. Pydantic AI's ``RunContext.deps`` is the canonical place to thread - request-scoped data (like the Agentex task_id) into tools and - event handlers — including code that runs inside Temporal activities. + request-scoped data (like the Agentex task_id) into tools and event + handlers — including code that runs inside Temporal activities. """ task_id: str - # When set, the event handler nests per-tool-call spans under this - # span. Typically the ID of the per-turn span opened by the workflow. + # When set, the event handler nests per-tool-call spans under this span. + # Typically the ID of the per-turn span opened by the workflow. parent_span_id: str | None = None @@ -77,32 +79,33 @@ async def event_handler( run_context: RunContext[TaskDeps], events: AsyncIterable[AgentStreamEvent], ) -> None: - """Stream Pydantic AI events to Agentex via Redis from inside the model activity. + """Stream Pydantic AI events to Agentex via the unified surface. Pydantic AI calls this with the live event stream as soon as the model - activity begins emitting parts. Because the handler runs inside the - activity (not the workflow), it can freely make non-deterministic - Redis writes — including the tracing HTTP calls that record per-tool-call - spans under the workflow's per-turn span (when ``parent_span_id`` is set). + activity begins emitting parts. Because the handler runs inside the activity + (not the workflow), it can freely make non-deterministic Redis + tracing + writes. + + The UnifiedEmitter is constructed from ``deps`` (task_id + parent_span_id), + so tool spans nest under the workflow's per-turn span and messages auto-send + to the task stream. The auto_send path delivers streamed tool requests + natively, so no coalescing workaround is needed. """ - tracing_handler = create_pydantic_ai_tracing_handler( + emitter = UnifiedEmitter( + task_id=run_context.deps.task_id, trace_id=run_context.deps.task_id, parent_span_id=run_context.deps.parent_span_id, - task_id=run_context.deps.task_id, - ) - await stream_pydantic_ai_events( - events, - run_context.deps.task_id, - tracing_handler=tracing_handler, ) + turn = PydanticAITurn(events, model=MODEL_NAME) + await emitter.auto_send_turn(turn) -# Construct the durable agent at module load time so that the -# PydanticAIPlugin can auto-discover its activities via the workflow's -# ``__pydantic_ai_agents__`` attribute. +# Construct the durable agent at module load time so that the PydanticAIPlugin +# can auto-discover its activities via the workflow's ``__pydantic_ai_agents__`` +# attribute. base_agent = _build_base_agent() temporal_agent: TemporalAgent[TaskDeps, str] = TemporalAgent( base_agent, - name="at110_pydantic_ai_agent", + name="pydantic_ai_agent", event_stream_handler=event_handler, ) diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py index e54c9d1dc..4b4d43d19 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/run_worker.py @@ -1,18 +1,18 @@ -"""Temporal worker for the Pydantic AI tutorial. +"""Temporal worker for the harness Pydantic AI test agent. -Run as a separate long-lived process alongside the ACP HTTP server. The -worker polls Temporal for workflow + activity tasks and executes them. +Run as a separate long-lived process alongside the ACP HTTP server. The worker +polls Temporal for workflow + activity tasks and executes them. -The ``PydanticAIPlugin`` reads ``__pydantic_ai_agents__`` off the workflow -class and registers every model/tool activity the TemporalAgent needs — -so we don't have to enumerate activities by hand here. +The ``PydanticAIPlugin`` reads ``__pydantic_ai_agents__`` off the workflow class +and registers every model/tool activity the TemporalAgent needs — so we don't +have to enumerate activities by hand here. """ import asyncio from pydantic_ai.durable_exec.temporal import PydanticAIPlugin -from project.workflow import At110PydanticAiWorkflow +from project.workflow import HarnessPydanticAiWorkflow from agentex.lib.utils.debug import setup_debug_if_enabled from agentex.lib.utils.logging import make_logger from agentex.lib.environment_variables import EnvironmentVariables @@ -31,8 +31,8 @@ async def main(): raise ValueError("WORKFLOW_TASK_QUEUE is not set") # get_all_activities() returns the built-in Agentex activities (state, - # messages, streaming, tracing). Pydantic AI's TemporalAgent activities - # are auto-registered by PydanticAIPlugin via __pydantic_ai_agents__. + # messages, streaming, tracing). Pydantic AI's TemporalAgent activities are + # auto-registered by PydanticAIPlugin via __pydantic_ai_agents__. worker = AgentexWorker( task_queue=task_queue_name, plugins=[PydanticAIPlugin()], @@ -40,7 +40,7 @@ async def main(): await worker.run( activities=get_all_activities(), - workflow=At110PydanticAiWorkflow, + workflow=HarnessPydanticAiWorkflow, ) diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py index 75640fcb7..bbd6c5200 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/tools.py @@ -1,9 +1,8 @@ -"""Tool definitions for the Temporal Pydantic AI agent. +"""Tool definitions for the Temporal harness Pydantic AI agent. These functions are registered on the base Pydantic AI agent. When the agent is wrapped in ``TemporalAgent``, each tool call becomes its own Temporal -activity automatically — independently retryable and observable in the -Temporal UI. +activity automatically — independently retryable and observable. Tools must be ``async`` because Pydantic AI's Temporal integration requires it: non-async tools would run in threads, which is non-deterministic and diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py index bb07ac818..9a01be7de 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/project/workflow.py @@ -1,16 +1,16 @@ -"""Temporal workflow for the Pydantic AI tutorial. +"""Temporal workflow for the harness Pydantic AI test agent. The workflow holds task state durably across crashes. Its signal handler -delegates the actual agent run to ``temporal_agent.run(...)`` — which -internally schedules model and tool activities, each independently -durable. The ``event_stream_handler`` registered on ``temporal_agent`` -pushes streaming deltas to Redis while the model activity runs. +delegates the actual agent run to ``temporal_agent.run(...)`` — which internally +schedules model and tool activities, each independently durable. The +``event_stream_handler`` registered on ``temporal_agent`` (see project.agent) +pushes streaming deltas through the unified harness surface while the model +activity runs. Multi-turn memory is kept on the workflow instance itself -(``self._message_history``). Temporal's workflow state is already durable -and replay-safe, so unlike the async-base tutorial we don't need an -external ``adk.state`` round-trip — the message list survives crashes -because Temporal replays activity results that produced it. +(``self._message_history``). Temporal's workflow state is already durable and +replay-safe, so unlike the async-base agent we don't need an external +``adk.state`` round-trip. """ from __future__ import annotations @@ -56,14 +56,14 @@ @workflow.defn(name=environment_variables.WORKFLOW_NAME) -class At110PydanticAiWorkflow(BaseWorkflow): +class HarnessPydanticAiWorkflow(BaseWorkflow): """Long-running Temporal workflow that delegates each turn to a Pydantic AI TemporalAgent. The ``__pydantic_ai_agents__`` attribute is the marker the ``PydanticAIPlugin`` looks for at worker startup: it pulls - ``temporal_agent.temporal_activities`` off this list and registers them - on the worker automatically — so we don't have to list activities by - hand in ``run_worker.py``. + ``temporal_agent.temporal_activities`` off this list and registers them on + the worker automatically — so we don't have to list activities by hand in + ``run_worker.py``. """ __pydantic_ai_agents__ = [temporal_agent] @@ -74,8 +74,8 @@ def __init__(self): self._turn_number = 0 # Conversation history accumulated across turns. Each entry is a # pydantic-ai ``ModelMessage``. Temporal replays the activity that - # produced these messages, so the list is rebuilt deterministically - # if the workflow ever recovers from a crash. + # produced these messages, so the list is rebuilt deterministically if + # the workflow ever recovers from a crash. self._message_history: list["ModelMessage"] = [] @workflow.signal(name=SignalName.RECEIVE_EVENT) @@ -93,17 +93,10 @@ async def on_task_event_send(self, params: SendEventParams) -> None: name=f"Turn {self._turn_number}", input={"message": params.event.content.content}, ) as span: - # temporal_agent.run() is the magic line. From the outside it - # looks like a regular async call. Internally it schedules: - # 1. A model activity (LLM HTTP call recorded by Temporal) - # 2. For each tool the model invokes, a tool activity - # 3. Each activity is retried, observable, and durable - # While the model activity runs, the event_stream_handler on - # temporal_agent pushes deltas to Redis so the UI sees tokens. - # - # Passing ``message_history`` makes the run remember prior turns: - # without it the agent would respond to each user message as if - # it had never seen the conversation before. + # temporal_agent.run() schedules a model activity, per-tool + # activities, and the event_stream_handler activity (which pushes + # deltas through the unified surface). Passing ``message_history`` + # makes the run remember prior turns. result = await temporal_agent.run( params.event.content.content, message_history=self._message_history, @@ -112,8 +105,8 @@ async def on_task_event_send(self, params: SendEventParams) -> None: parent_span_id=span.id if span else None, ), ) - # Persist the new full history (user + assistant + any tool - # rounds) so the next turn picks up from here. + # Persist the new full history (user + assistant + any tool rounds) + # so the next turn picks up from here. self._message_history = list(result.all_messages()) if span: span.output = {"final_output": result.output} diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml index 9f47733c0..2f308f2a1 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "hatchling.build" [project] name = "at110-pydantic-ai" version = "0.1.0" -description = "A Temporal-backed Pydantic AI agent with tool calling and Redis streaming" +description = "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface" readme = "README.md" requires-python = ">=3.12" dependencies = [ diff --git a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py index d01276ab8..974cddcc0 100644 --- a/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py +++ b/examples/tutorials/10_async/10_temporal/110_pydantic_ai/tests/test_agent.py @@ -1,9 +1,10 @@ -"""Tests for the Temporal Pydantic AI agent. +"""Live tests for the Temporal Pydantic AI agent. -This test suite validates: -- The agent responds to a basic message -- Tool calls are visible in the message history (proving each tool call - ran as its own Temporal activity) +These tests require a running agent (Temporal + Redis + ACP server + worker) and +exercise the unified-surface event_stream_handler end-to-end over the wire. + +Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives +in the SDK repo under ``tests/lib/core/harness/`` (the pydantic-ai temporal suite). To run these tests: 1. Make sure the agent is running (worker + ACP server) @@ -16,10 +17,7 @@ import pytest import pytest_asyncio -from test_utils.async_utils import ( - poll_messages, - send_event_and_poll_yielding, -) +from test_utils.async_utils import poll_messages, send_event_and_poll_yielding from agentex import AsyncAgentex from agentex.types.task_message import TaskMessage @@ -51,14 +49,12 @@ async def agent_id(client, agent_name): class TestNonStreamingEvents: - """Test that the Temporal-backed Pydantic AI agent responds and uses tools.""" + """Test that the Temporal-backed harness agent responds and uses tools.""" @pytest.mark.asyncio async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): """Drive a full turn: create task, send a weather question, verify tool round-trip.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None @@ -71,11 +67,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): sleep_interval=1.0, ): assert isinstance(message, TaskMessage) - if ( - message.content - and message.content.type == "text" - and message.content.author == "agent" - ): + if message.content and message.content.type == "text" and message.content.author == "agent": task_creation_found = True break assert task_creation_found, "Task creation welcome message not found" @@ -101,11 +93,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): if final_message and getattr(final_message, "streaming_status", None) == "DONE": break - if ( - message.content - and message.content.type == "text" - and message.content.author == "agent" - ): + if message.content and message.content.type == "text" and message.content.author == "agent": final_message = message content_length = len(getattr(message.content, "content", "") or "") if message.streaming_status == "DONE" and content_length > 0: @@ -115,9 +103,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): assert seen_tool_request, "Expected a tool_request (agent calling get_weather)" assert seen_tool_response, "Expected a tool_response (get_weather result)" assert final_message is not None, "Expected a final agent text message" - final_text = ( - getattr(final_message.content, "content", None) if final_message.content else None - ) + final_text = getattr(final_message.content, "content", None) if final_message.content else None assert isinstance(final_text, str) and len(final_text) > 0 # The get_weather tool always returns "72°F" — the response should mention it. assert "72" in final_text, "Expected weather response to mention 72°F" diff --git a/examples/tutorials/10_async/00_base/130_harness_openai/.dockerignore b/examples/tutorials/10_async/10_temporal/120_openai_agents/.dockerignore similarity index 100% rename from examples/tutorials/10_async/00_base/130_harness_openai/.dockerignore rename to examples/tutorials/10_async/10_temporal/120_openai_agents/.dockerignore diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile b/examples/tutorials/10_async/10_temporal/120_openai_agents/Dockerfile similarity index 65% rename from examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile rename to examples/tutorials/10_async/10_temporal/120_openai_agents/Dockerfile index f6c9fb59b..700f56cea 100644 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/Dockerfile +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/Dockerfile @@ -22,20 +22,20 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 -COPY 10_async/10_temporal/harness_langgraph/pyproject.toml /app/harness_langgraph/pyproject.toml -COPY 10_async/10_temporal/harness_langgraph/README.md /app/harness_langgraph/README.md +COPY 10_async/10_temporal/120_openai_agents/pyproject.toml /app/120_openai_agents/pyproject.toml +COPY 10_async/10_temporal/120_openai_agents/README.md /app/120_openai_agents/README.md -WORKDIR /app/harness_langgraph +WORKDIR /app/120_openai_agents -COPY 10_async/10_temporal/harness_langgraph/project /app/harness_langgraph/project -COPY 10_async/10_temporal/harness_langgraph/tests /app/harness_langgraph/tests +COPY 10_async/10_temporal/120_openai_agents/project /app/120_openai_agents/project +COPY 10_async/10_temporal/120_openai_agents/tests /app/120_openai_agents/tests COPY test_utils /app/test_utils RUN uv pip install --system .[dev] ENV PYTHONPATH=/app -ENV AGENT_NAME=at-harness-langgraph +ENV AGENT_NAME=at120-openai-agents CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/README.md b/examples/tutorials/10_async/10_temporal/120_openai_agents/README.md similarity index 94% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/README.md rename to examples/tutorials/10_async/10_temporal/120_openai_agents/README.md index 0415ae225..4db26d0a1 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/README.md +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/README.md @@ -9,7 +9,7 @@ LLM calls are non-deterministic, so they can't run directly in a Temporal workflow. This tutorial keeps the workflow (`project/workflow.py`) deterministic and delegates each turn to a custom activity (`project/activities.py`). The activity uses the SAME `OpenAITurn` adapter as -the sync (`060_harness_openai`) and async (`130_harness_openai`) variants, and +the sync (`050_openai_agents`) and async (`120_openai_agents`) variants, and delivers via `UnifiedEmitter.auto_send_turn` — which is designed to run inside an activity (it writes streaming side effects to Redis and returns the final text + usage). diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/environments.yaml b/examples/tutorials/10_async/10_temporal/120_openai_agents/environments.yaml similarity index 100% rename from examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/environments.yaml rename to examples/tutorials/10_async/10_temporal/120_openai_agents/environments.yaml diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml b/examples/tutorials/10_async/10_temporal/120_openai_agents/manifest.yaml similarity index 78% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml rename to examples/tutorials/10_async/10_temporal/120_openai_agents/manifest.yaml index 64a943438..4b59db442 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/manifest.yaml +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../../ include_paths: - - 10_async/10_temporal/140_harness_openai + - 10_async/10_temporal/120_openai_agents - test_utils - dockerfile: 10_async/10_temporal/140_harness_openai/Dockerfile - dockerignore: 10_async/10_temporal/140_harness_openai/.dockerignore + dockerfile: 10_async/10_temporal/120_openai_agents/Dockerfile + dockerignore: 10_async/10_temporal/120_openai_agents/.dockerignore local_development: agent: @@ -17,14 +17,14 @@ local_development: agent: acp_type: async - name: at140-harness-openai + name: at120-openai-agents description: A Temporal-backed OpenAI Agents SDK agent on the unified harness surface temporal: enabled: true workflows: - - name: at140-harness-openai - queue_name: at140_harness_openai_queue + - name: at120-openai-agents + queue_name: at120_openai_agents_queue credentials: - env_var_name: REDIS_URL @@ -50,7 +50,7 @@ deployment: global: agent: - name: "at140-harness-openai" + name: "at120-openai-agents" description: "A Temporal-backed OpenAI Agents SDK agent on the unified harness surface" replicaCount: 1 resources: diff --git a/examples/tutorials/00_sync/harness_pydantic_ai/project/__init__.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/__init__.py similarity index 100% rename from examples/tutorials/00_sync/harness_pydantic_ai/project/__init__.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/__init__.py diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/acp.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/acp.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/acp.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/acp.py diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/activities.py similarity index 92% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/activities.py index a70ee0c5d..2a8a773c4 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/activities.py +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/activities.py @@ -25,7 +25,7 @@ logger = make_logger(__name__) -RUN_HARNESS_AGENT_ACTIVITY = "run_harness_openai_agent" +RUN_AGENT_ACTIVITY = "run_openai_agent" class RunHarnessAgentParams(BaseModel): @@ -51,8 +51,8 @@ class RunHarnessAgentResult(BaseModel): class HarnessActivities: """Hosts the harness-backed OpenAI agent activity.""" - @activity.defn(name=RUN_HARNESS_AGENT_ACTIVITY) - async def run_harness_openai_agent(self, params: RunHarnessAgentParams) -> RunHarnessAgentResult: + @activity.defn(name=RUN_AGENT_ACTIVITY) + async def run_openai_agent(self, params: RunHarnessAgentParams) -> RunHarnessAgentResult: """Run the agent for one turn and auto-send its output. Threads the running conversation through ``input_list`` so multi-turn diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/agent.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/agent.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/agent.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/agent.py diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/run_worker.py similarity index 91% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/run_worker.py index 69586a395..b82ee0f50 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/run_worker.py +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/run_worker.py @@ -2,7 +2,7 @@ Runs as a separate long-lived process alongside the ACP HTTP server. Registers the built-in Agentex activities plus the custom harness agent activity -(``HarnessActivities.run_harness_openai_agent``), and the workflow. +(``HarnessActivities.run_openai_agent``), and the workflow. """ import asyncio @@ -28,7 +28,7 @@ async def main(): harness_activities = HarnessActivities() all_activities = [ - harness_activities.run_harness_openai_agent, + harness_activities.run_openai_agent, *get_all_activities(), ] diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/tools.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/tools.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/tools.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/tools.py diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/workflow.py similarity index 97% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/project/workflow.py index 69ad7b365..566bd93b6 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/workflow.py +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/project/workflow.py @@ -1,7 +1,7 @@ """Temporal workflow for the OpenAI Agents harness tutorial. The workflow stays deterministic: it echoes the user message and delegates the -non-deterministic LLM run to ``run_harness_openai_agent`` (see +non-deterministic LLM run to ``run_openai_agent`` (see ``project.activities``). That activity runs the OpenAI Agents SDK and delivers the turn through the unified harness surface (``OpenAITurn`` + ``UnifiedEmitter.auto_send_turn``). @@ -18,7 +18,7 @@ from agentex.lib import adk from project.activities import ( - RUN_HARNESS_AGENT_ACTIVITY, + RUN_AGENT_ACTIVITY, RunHarnessAgentParams, RunHarnessAgentResult, ) @@ -77,7 +77,7 @@ async def on_task_event_send(self, params: SendEventParams) -> None: input={"message": params.event.content.content}, ) as span: turn_result = await workflow.execute_activity( - RUN_HARNESS_AGENT_ACTIVITY, + RUN_AGENT_ACTIVITY, RunHarnessAgentParams( task_id=params.task.id, user_message=params.event.content.content, diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml b/examples/tutorials/10_async/10_temporal/120_openai_agents/pyproject.toml similarity index 95% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml rename to examples/tutorials/10_async/10_temporal/120_openai_agents/pyproject.toml index 5bf53f6be..e6c77fae3 100644 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/pyproject.toml +++ b/examples/tutorials/10_async/10_temporal/120_openai_agents/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "at140-harness-openai" +name = "at120-openai-agents" version = "0.1.0" description = "A Temporal-backed OpenAI Agents SDK agent on the unified harness surface" readme = "README.md" diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/120_openai_agents/tests/test_agent.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/140_harness_openai/tests/test_agent.py rename to examples/tutorials/10_async/10_temporal/120_openai_agents/tests/test_agent.py diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore deleted file mode 100644 index c49489471..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore +++ /dev/null @@ -1,43 +0,0 @@ -# Python -__pycache__/ -*.py[cod] -*$py.class -*.so -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg - -# Environments -.env** -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# IDE -.idea/ -.vscode/ -*.swp -*.swo - -# Git -.git -.gitignore - -# Misc -.DS_Store diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile deleted file mode 100644 index d4927d0ce..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile +++ /dev/null @@ -1,62 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - nodejs \ - npm \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/** - -# Install tctl (Temporal CLI) -RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ - tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ - chmod +x /usr/local/bin/tctl && \ - rm /tmp/tctl.tar.gz - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -# Copy pyproject.toml and README.md to install dependencies -COPY 10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml /app/120_openai_agents_local_sandbox/pyproject.toml -COPY 10_async/10_temporal/120_openai_agents_local_sandbox/README.md /app/120_openai_agents_local_sandbox/README.md - -WORKDIR /app/120_openai_agents_local_sandbox - -# Copy the project code -COPY 10_async/10_temporal/120_openai_agents_local_sandbox/project /app/120_openai_agents_local_sandbox/project - -# Copy the test files -COPY 10_async/10_temporal/120_openai_agents_local_sandbox/tests /app/120_openai_agents_local_sandbox/tests - -# Copy shared test utilities -COPY test_utils /app/test_utils - -# Install the required Python packages with dev dependencies -RUN uv pip install --system .[dev] - -WORKDIR /app/120_openai_agents_local_sandbox - -ENV PYTHONPATH=/app - -# Set test environment variables -ENV AGENT_NAME=at120-openai-agents-local-sandbox - -# Run the ACP server using uvicorn -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] - -# When we deploy the worker, we will replace the CMD with the following -# CMD ["python", "-m", "run_worker"] diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md deleted file mode 100644 index 161bc43da..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/README.md +++ /dev/null @@ -1,130 +0,0 @@ -# Tutorial 120: Temporal OpenAI Agents SDK with a Local Sandbox - -This tutorial demonstrates running an [OpenAI Agents SDK](https://developers.openai.com/api/docs/guides/agents) -`SandboxAgent` inside a **Temporal** workflow, backed by the **local** -(`unix_local`) sandbox. - -The agent is a "local sandbox assistant": it answers questions by actually running -real shell commands (e.g. `python3 --version`, `ls`, `python3 -c "..."`) instead of -guessing. Because it runs inside Temporal, the sandbox tool calls become durable, -retried, and observable activities. - -This mirrors the canonical OpenAI Agents SDK Temporal example -(`060_open_ai_agents_sdk_hello_world`) and the tools example -(`070_open_ai_agents_sdk_tools`). The new piece is the **Temporal sandbox bridge**. - -## Key Concepts - -### Temporal ACP -The Temporal ACP model (`acp_type: async`, `temporal.enabled: true`) maps task -lifecycle to a Temporal workflow: -- `@workflow.run` (`on_task_create`) keeps the conversation alive. -- `@workflow.signal(name=SignalName.RECEIVE_EVENT)` (`on_task_event_send`) handles - each user message. - -No ACP handlers are registered by hand — the `TemporalACPConfig` wires them to the -workflow automatically. - -### Streaming (Interceptor + Model Provider + Hooks) -Real-time streaming uses STANDARD Temporal components — no forked plugin: -- **`ContextInterceptor`** threads `task_id` through activity headers. The workflow - sets `self._task_id` so the interceptor can read it. -- **`TemporalStreamingModelProvider`** returns a model that streams tokens to Redis - in real time while still returning the complete response to Temporal for - determinism / replay safety. -- **`TemporalStreamingHooks`** creates the lifecycle messages (tool request / - response, etc.) in the database. - -The `stream_lifecycle_content` activity must be registered on the worker alongside -`get_all_activities()`. - -### The Temporal sandbox bridge (`UnixLocalSandboxClient`) -The sandbox client is registered ON THE WORKER (and the ACP) via the standard -plugin: - -```python -from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient -from temporalio.contrib.openai_agents import OpenAIAgentsPlugin, SandboxClientProvider - -OpenAIAgentsPlugin( - model_provider=TemporalStreamingModelProvider(), - sandbox_clients=[SandboxClientProvider("local", UnixLocalSandboxClient())], -) -``` - -Inside the workflow, the run is pointed at that backend by name: - -```python -from temporalio.contrib.openai_agents.workflow import temporal_sandbox_client -from agents.sandbox import SandboxAgent, SandboxRunConfig -from agents.run_config import RunConfig -from agents.sandbox.snapshot import NoopSnapshotSpec -from agents.sandbox.capabilities import Shell -from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClientOptions - -agent = SandboxAgent( - name="Local Sandbox Assistant", - model="gpt-4o-mini", - instructions="...use the shell tools to actually run commands...", - capabilities=[Shell()], -) -run_config = RunConfig( - sandbox=SandboxRunConfig( - client=temporal_sandbox_client("local"), - options=UnixLocalSandboxClientOptions(), - snapshot=NoopSnapshotSpec(), # skip the per-turn workspace snapshot - ) -) -result = await Runner.run( - agent, self._state.input_list, run_config=run_config, - hooks=TemporalStreamingHooks(task_id=params.task.id), -) -``` - -`temporal_sandbox_client("local")` resolves the worker-registered client, so the -sandbox shell tool calls run as Temporal activities (durable + observable in the -Temporal UI). - -## Two important lessons - -1. **Don't double-post the assistant message.** The `TemporalStreamingModelProvider` - already streams AND persists the assistant's response. If you also call - `adk.messages.create(...)` after `Runner.run`, the answer shows up twice. We only - persist conversation state for the next turn via `result.to_input_list()`. -2. **Use `NoopSnapshotSpec()`.** Without it, the sandbox tries to take a per-turn - workspace snapshot, and stopping the sandbox can raise - `WorkspaceArchiveReadError`. `NoopSnapshotSpec()` skips that snapshot. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | Temporal ACP server (plugin + sandbox client + interceptor) | -| `project/run_worker.py` | Temporal worker (registers workflow, activities, plugin, sandbox client) | -| `project/workflow.py` | `BaseWorkflow` that runs the `SandboxAgent` against the local sandbox | -| `tests/test_agent.py` | Integration tests (polling pattern) | -| `manifest.yaml` | Agent configuration (temporal enabled) | -| `environments.yaml` | Per-environment deployment overrides | - -## Running Locally - -```bash -# From this directory -agentex agents run -``` - -Set `OPENAI_API_KEY` (or `LITELLM_API_KEY` if you're behind the Scale LiteLLM -gateway) in your environment or in a `.env` file in `project/` so the agent can call -the model. - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` - -## Further Reading - -- OpenAI Agents SDK guide: https://developers.openai.com/api/docs/guides/agents -- The async (non-Temporal) variant: `10_async/00_base/120_openai_agents_local_sandbox` -- The canonical OpenAI Agents SDK Temporal example: `10_async/10_temporal/060_open_ai_agents_sdk_hello_world` diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml deleted file mode 100644 index 86ac89288..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/manifest.yaml +++ /dev/null @@ -1,111 +0,0 @@ -# Agent Manifest Configuration -# --------------------------- -# This file defines how your agent should be built and deployed. - -# Build Configuration -# ------------------ -build: - context: - # Root directory for the build context - root: ../../../ # Up to tutorials level to include test_utils - - # Paths to include in the Docker build context - include_paths: - - 10_async/10_temporal/120_openai_agents_local_sandbox - - test_utils - - # Path to your agent's Dockerfile (relative to the root directory) - dockerfile: 10_async/10_temporal/120_openai_agents_local_sandbox/Dockerfile - - # Path to your agent's .dockerignore - dockerignore: 10_async/10_temporal/120_openai_agents_local_sandbox/.dockerignore - - -# Local Development Configuration -# ----------------------------- -local_development: - agent: - port: 8000 # Port where your local ACP server is running - host_address: host.docker.internal # Host address for Docker networking - - # File paths for local development (relative to this manifest.yaml) - paths: - # Path to ACP server file - acp: project/acp.py - # Path to temporal worker file - worker: project/run_worker.py - - -# Agent Configuration -# ----------------- -agent: - # Type of agent - either sync or async - acp_type: async - - # Unique name for your agent - name: at120-openai-agents-local-sandbox - - # Description of what your agent does - description: A Temporal OpenAI Agents SDK agent using a local (unix_local) sandbox - - # Temporal workflow configuration - temporal: - enabled: true - workflows: - # Name of the workflow class (must match the @workflow.defn name in workflow.py) - - name: at120-openai-agents-local-sandbox - - # Queue name for task distribution - queue_name: at120_openai_agents_local_sandbox_queue - - # Credentials mapping (maps Kubernetes secrets to environment variables) - credentials: - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - - # Environment variables for running locally and for deployment - env: - OPENAI_AGENTS_DISABLE_TRACING: "1" - - -# Deployment Configuration -# ----------------------- -deployment: - # Container image configuration - image: - repository: "" # Update with your container registry - tag: "latest" # Default tag, should be versioned in production - - imagePullSecrets: - - name: my-registry-secret # Update with your image pull secret name - - # Global deployment settings that apply to all clusters - global: - agent: - name: "at120-openai-agents-local-sandbox" - description: "A Temporal OpenAI Agents SDK agent using a local (unix_local) sandbox" - - # Default replica count - replicaCount: 1 - - # Default resource requirements - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/__init__.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py deleted file mode 100644 index 196e1e7cd..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/acp.py +++ /dev/null @@ -1,83 +0,0 @@ -import os -import sys - -from temporalio.contrib.openai_agents import ( - OpenAIAgentsPlugin, - SandboxClientProvider, -) -from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient - -# === DEBUG SETUP (AgentEx CLI Debug Support) === -if os.getenv("AGENTEX_DEBUG_ENABLED") == "true": - try: - import debugpy - debug_port = int(os.getenv("AGENTEX_DEBUG_PORT", "5679")) - debug_type = os.getenv("AGENTEX_DEBUG_TYPE", "acp") - wait_for_attach = os.getenv("AGENTEX_DEBUG_WAIT_FOR_ATTACH", "false").lower() == "true" - - # Configure debugpy - debugpy.configure(subProcess=False) - debugpy.listen(debug_port) - - print(f"🐛 [{debug_type.upper()}] Debug server listening on port {debug_port}") - - if wait_for_attach: - print(f"⏳ [{debug_type.upper()}] Waiting for debugger to attach...") - debugpy.wait_for_client() - print(f"✅ [{debug_type.upper()}] Debugger attached!") - else: - print(f"📡 [{debug_type.upper()}] Ready for debugger attachment") - - except ImportError: - print("❌ debugpy not available. Install with: pip install debugpy") - sys.exit(1) - except Exception as e: - print(f"❌ Debug setup failed: {e}") - sys.exit(1) -# === END DEBUG SETUP === - -from agentex.lib.types.fastacp import TemporalACPConfig -from agentex.lib.sdk.fastacp.fastacp import FastACP -from agentex.lib.core.temporal.plugins.openai_agents.models.temporal_streaming_model import ( - TemporalStreamingModelProvider, -) -from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import ( - ContextInterceptor, -) - -context_interceptor = ContextInterceptor() -temporal_streaming_model_provider = TemporalStreamingModelProvider() - -# Create the ACP server. We register the STANDARD OpenAIAgentsPlugin with: -# - the streaming model provider (real-time token streaming + persistence) -# - the LOCAL sandbox backend, registered under the name "local" so the -# workflow can resolve it via ``temporal_sandbox_client("local")`` -# plus the ContextInterceptor that threads task_id through activity headers. -acp = FastACP.create( - acp_type="async", - config=TemporalACPConfig( - # When deployed to the cluster, the Temporal address is set automatically. - # For local development, we set the address manually to talk to the local - # Temporal service set up via docker compose. - type="temporal", - temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), - plugins=[ - OpenAIAgentsPlugin( - model_provider=temporal_streaming_model_provider, - sandbox_clients=[ - SandboxClientProvider("local", UnixLocalSandboxClient()), - ], - ) - ], - interceptors=[context_interceptor], - ), -) - - -# Notice that we don't need to register any handlers when we use type="temporal". -# These handlers are automatically registered when the ACP is created: -# -# @acp.on_task_create -> the workflow method decorated with @workflow.run -# @acp.on_task_event_send -> the workflow method decorated with -# @workflow.signal(name=SignalName.RECEIVE_EVENT) -# @acp.on_task_cancel -> handled by the temporal client (cancels the workflow) diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py deleted file mode 100644 index a2b7bdf6b..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/run_worker.py +++ /dev/null @@ -1,80 +0,0 @@ -import asyncio - -from temporalio.contrib.openai_agents import ( - OpenAIAgentsPlugin, - SandboxClientProvider, -) -from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClient - -from project.workflow import At120OpenaiAgentsLocalSandboxWorkflow -from agentex.lib.utils.debug import setup_debug_if_enabled -from agentex.lib.utils.logging import make_logger -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.activities import get_all_activities -from agentex.lib.core.temporal.workers.worker import AgentexWorker -from agentex.lib.core.temporal.plugins.openai_agents.hooks.activities import ( - stream_lifecycle_content, -) -from agentex.lib.core.temporal.plugins.openai_agents.models.temporal_streaming_model import ( - TemporalStreamingModelProvider, -) -from agentex.lib.core.temporal.plugins.openai_agents.interceptors.context_interceptor import ( - ContextInterceptor, -) - -environment_variables = EnvironmentVariables.refresh() - -logger = make_logger(__name__) - - -async def main(): - # Setup debug mode if enabled - setup_debug_if_enabled() - - task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE - if task_queue_name is None: - raise ValueError("WORKFLOW_TASK_QUEUE is not set") - - # Register activities. ``stream_lifecycle_content`` powers the streaming - # lifecycle hooks; the rest are the standard AgentEx activities. - all_activities = get_all_activities() + [stream_lifecycle_content] - - # ============================================================================ - # STREAMING + SANDBOX SETUP - # ============================================================================ - # 1. ContextInterceptor threads task_id through activity headers so the - # streaming model + hooks know which task to stream/persist to. - # 2. TemporalStreamingModelProvider returns a model that streams tokens to - # Redis in real time while still returning the complete response to - # Temporal for determinism / replay safety. - # 3. SandboxClientProvider registers the LOCAL sandbox backend - # (UnixLocalSandboxClient) under the name "local". The workflow resolves - # it at run time via ``temporal_sandbox_client("local")``, so the sandbox - # tool calls run as durable Temporal activities. - # - # We use the STANDARD temporalio.contrib.openai_agents.OpenAIAgentsPlugin — - # no forked plugin needed. - context_interceptor = ContextInterceptor() - temporal_streaming_model_provider = TemporalStreamingModelProvider() - - worker = AgentexWorker( - task_queue=task_queue_name, - plugins=[ - OpenAIAgentsPlugin( - model_provider=temporal_streaming_model_provider, - sandbox_clients=[ - SandboxClientProvider("local", UnixLocalSandboxClient()), - ], - ) - ], - interceptors=[context_interceptor], - ) - - await worker.run( - activities=all_activities, - workflow=At120OpenaiAgentsLocalSandboxWorkflow, - ) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py deleted file mode 100644 index 45b61b04e..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/project/workflow.py +++ /dev/null @@ -1,213 +0,0 @@ -"""OpenAI Agents SDK + Temporal: Local Sandbox Tutorial - -This tutorial demonstrates running an OpenAI Agents SDK ``SandboxAgent`` inside a -Temporal workflow, backed by the **local** (``unix_local``) sandbox. The agent is -a "local sandbox assistant": it answers questions by actually running real shell -commands (e.g. ``python3 --version``, ``ls``, ``python3 -c "..."``) instead of -guessing. - -KEY CONCEPTS DEMONSTRATED: -- A ``SandboxAgent`` granted the ``Shell`` capability inside a durable Temporal - workflow. -- The Temporal sandbox bridge: ``temporal_sandbox_client("local")`` resolves to - the ``UnixLocalSandboxClient`` registered on the worker via - ``SandboxClientProvider`` (see ``run_worker.py`` / ``acp.py``). The sandbox tool - calls run as Temporal activities, so they are durable, retried, and observable. -- Real-time streaming + persistence via ``TemporalStreamingModelProvider`` + - ``ContextInterceptor`` (configured on the worker) and ``TemporalStreamingHooks``. - -IMPORTANT LESSONS (applied below): - (a) Do NOT post the assistant message yourself with ``adk.messages.create`` - after ``Runner.run``. The ``TemporalStreamingModelProvider`` already streams - and persists the assistant's response — posting it again would duplicate the - answer in the UI. We only persist conversation state for the next turn via - ``result.to_input_list()``. - (b) Use ``NoopSnapshotSpec()`` so the per-turn workspace snapshot is skipped. - Without it, stopping the sandbox can raise ``WorkspaceArchiveReadError``. -""" - -from __future__ import annotations - -import os -import json - -from agents import Runner -from temporalio import workflow - -from agentex.lib import adk -from agentex.lib.types.acp import SendEventParams, CreateTaskParams -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.utils.model_utils import BaseModel -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.types.workflow import SignalName -from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow -from agentex.lib.core.tracing.tracing_processor_manager import ( - add_tracing_processor_config, -) -from agentex.lib.core.temporal.plugins.openai_agents.hooks.hooks import ( - TemporalStreamingHooks, -) - -# OpenAI Agents SDK sandbox imports. These are safe to import at workflow module -# load time; the actual sandbox client is resolved at run time via -# ``temporal_sandbox_client`` (which maps to the worker-registered backend). -with workflow.unsafe.imports_passed_through(): - from agents.sandbox import SandboxAgent, SandboxRunConfig - from agents.run_config import RunConfig - from agents.sandbox.snapshot import NoopSnapshotSpec - from agents.sandbox.capabilities import Shell - from agents.sandbox.sandboxes.unix_local import UnixLocalSandboxClientOptions - from temporalio.contrib.openai_agents.workflow import temporal_sandbox_client - -# Configure tracing processor (optional - only if you have SGP credentials) -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - ) -) - -environment_variables = EnvironmentVariables.refresh() - -if environment_variables.WORKFLOW_NAME is None: - raise ValueError("Environment variable WORKFLOW_NAME is not set") - -if environment_variables.AGENT_NAME is None: - raise ValueError("Environment variable AGENT_NAME is not set") - -logger = make_logger(__name__) - -MODEL_NAME = "gpt-4o-mini" -INSTRUCTIONS = """You are a local sandbox assistant. - -You have access to shell tools that run real commands on the local machine. - -Guidelines: -- ALWAYS use the shell tools to actually run commands — never guess or make up - output. If the user asks for the Python version, run `python3 --version`. If - they ask to list files, run `ls`. If they ask you to compute something, use - `python3 -c "..."`. -- Run the minimal command(s) needed to answer the question. -- Report the real command output back to the user, concisely. -""" - - -class StateModel(BaseModel): - """State model for preserving conversation history across turns.""" - - input_list: list = [] - turn_number: int = 0 - - -@workflow.defn(name=environment_variables.WORKFLOW_NAME) -class At120OpenaiAgentsLocalSandboxWorkflow(BaseWorkflow): - """Long-running Temporal workflow that runs a SandboxAgent against the local sandbox.""" - - def __init__(self): - super().__init__(display_name=environment_variables.AGENT_NAME) - self._complete_task = False - self._state: StateModel | None = None - self._task_id = None - self._trace_id = None - self._parent_span_id = None - - @workflow.signal(name=SignalName.RECEIVE_EVENT) - async def on_task_event_send(self, params: SendEventParams) -> None: - logger.info(f"Received task event: {params.task.id}") - - if self._state is None: - raise ValueError("State is not initialized") - - self._state.turn_number += 1 - - # The ContextInterceptor reads ``self._task_id`` off the workflow - # instance and threads it through activity headers so the streaming - # model + hooks know which task to stream/persist to. - self._task_id = params.task.id - self._trace_id = params.task.id - - # Add the user message to conversation history. - self._state.input_list.append({"role": "user", "content": params.event.content.content}) - - # Echo back the client's message so it shows up in the UI. - await adk.messages.create(task_id=params.task.id, content=params.event.content) - - async with adk.tracing.span( - trace_id=params.task.id, - name=f"Turn {self._state.turn_number}", - input=self._state.model_dump(), - ) as span: - self._parent_span_id = span.id if span else None - - # Build the sandbox agent. The Shell capability becomes real shell - # tools backed by the sandbox client resolved at run time. - agent = SandboxAgent( - name="Local Sandbox Assistant", - model=MODEL_NAME, - instructions=INSTRUCTIONS, - capabilities=[Shell()], - ) - - # Point the run at the LOCAL sandbox backend registered on the worker - # under the name "local". ``temporal_sandbox_client`` resolves that - # registration so the sandbox tool calls execute as Temporal - # activities (durable + observable). - # - # IMPORTANT: ``NoopSnapshotSpec()`` skips the per-turn workspace - # snapshot — otherwise stopping the sandbox can raise - # ``WorkspaceArchiveReadError``. - run_config = RunConfig( - sandbox=SandboxRunConfig( - client=temporal_sandbox_client("local"), - options=UnixLocalSandboxClientOptions(), - snapshot=NoopSnapshotSpec(), - ) - ) - - # TemporalStreamingHooks creates the lifecycle messages (tool - # request/response, etc.) and works with the streaming model - # provider to stream tokens to the UI in real time. - result = await Runner.run( - agent, - self._state.input_list, - run_config=run_config, - hooks=TemporalStreamingHooks(task_id=params.task.id), - max_turns=10, - ) - - # IMPORTANT: We do NOT post the assistant message ourselves here. - # The TemporalStreamingModelProvider already streamed and persisted - # the assistant's response. We only persist conversation state for - # the next turn. - self._state.input_list = result.to_input_list() - - if span: - span.output = self._state.model_dump() - - @workflow.run - async def on_task_create(self, params: CreateTaskParams) -> str: - logger.info(f"Task created: {params.task.id}") - - self._state = StateModel(input_list=[], turn_number=0) - - await adk.messages.create( - task_id=params.task.id, - content=TextContent( - author="agent", - content=( - f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n" - f"Send me a message and I'll run real shell commands in a local " - f"sandbox (backed by Temporal) to answer." - ), - ), - ) - - await workflow.wait_condition(lambda: self._complete_task, timeout=None) - return "Task completed" - - @workflow.signal - async def complete_task_signal(self) -> None: - logger.info("Received complete_task signal") - self._complete_task = True diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml deleted file mode 100644 index 696894e32..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/pyproject.toml +++ /dev/null @@ -1,36 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "at120_openai_agents_local_sandbox" -version = "0.1.0" -description = "A Temporal OpenAI Agents SDK agent using a local (unix_local) sandbox" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk>=0.6.0", - "openai-agents>=0.14.3,<0.15", - "temporalio>=1.18.2", - "scale-gp", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "black", - "isort", - "flake8", - "debugpy>=1.8.15", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py deleted file mode 100644 index 5e161c061..000000000 --- a/examples/tutorials/10_async/10_temporal/120_openai_agents_local_sandbox/tests/test_agent.py +++ /dev/null @@ -1,144 +0,0 @@ -"""Tests for the Temporal OpenAI Agents SDK local-sandbox agent. - -This test suite validates that the agent actually runs shell commands in the -LOCAL sandbox (unix_local backend) via the Temporal sandbox bridge, by polling -for the agent's response: -- Ask for the Python version -> response contains "Python 3" -- Ask it to compute 21 * 2 with python3 -> response contains "42" - -To run these tests: -1. Make sure the agent is running (via docker-compose or `agentex agents run`) -2. Set the AGENTEX_API_BASE_URL environment variable if not using default -3. Run: pytest test_agent.py -v - -Configuration: -- AGENTEX_API_BASE_URL: Base URL for the AgentEx server (default: http://localhost:5003) -- AGENT_NAME: Name of the agent to test (default: at120-openai-agents-local-sandbox) -""" - -import os -import uuid - -import pytest -import pytest_asyncio -from test_utils.async_utils import ( - poll_messages, - send_event_and_poll_yielding, -) - -from agentex import AsyncAgentex -from agentex.types.task_message import TaskMessage -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest - -# Configuration from environment variables -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "at120-openai-agents-local-sandbox") - - -@pytest_asyncio.fixture -async def client(): - """Create an AsyncAgentex client instance for testing.""" - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - """Return the agent name for testing.""" - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - """Retrieve the agent ID based on the agent name.""" - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -async def _create_task_and_await_welcome(client: AsyncAgentex, agent_id: str) -> str: - """Create a task and wait for the workflow's welcome message; return the task id.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) - task = task_response.result - assert task is not None - - welcome_found = False - async for message in poll_messages( - client=client, - task_id=task.id, - timeout=30, - sleep_interval=1.0, - ): - assert isinstance(message, TaskMessage) - if message.content and message.content.type == "text" and message.content.author == "agent": - welcome_found = True - break - assert welcome_found, "Task creation (welcome) message not found" - return task.id - - -async def _send_and_collect_agent_text( - client: AsyncAgentex, agent_id: str, task_id: str, user_message: str -) -> str: - """Send a user message and accumulate the streamed agent text into a string.""" - final_message = None - async for message in send_event_and_poll_yielding( - client=client, - agent_id=agent_id, - task_id=task_id, - user_message=user_message, - timeout=60, - sleep_interval=1.0, - yield_updates=True, # Get updates as streaming writes chunks - ): - if message.content and message.content.type == "text" and message.content.author == "agent": - final_message = message - if message.streaming_status == "DONE": - break - - assert final_message is not None, "Should have received an agent text message" - assert final_message.content is not None, "Final message should have content" - return final_message.content.content or "" - - -class TestLocalSandboxEvents: - """Test the Temporal local-sandbox OpenAI Agents SDK agent.""" - - @pytest.mark.asyncio - async def test_shell_python_version(self, client: AsyncAgentex, agent_id: str): - """The agent should run `python3 --version` in the local sandbox. - - The sandbox runs on Python 3.12, so the real output contains "Python 3". - """ - task_id = await _create_task_and_await_welcome(client, agent_id) - text = await _send_and_collect_agent_text( - client, - agent_id, - task_id, - "Use your shell to print the Python version on this machine, then " - "tell me what it is.", - ) - assert text, "Expected a non-empty response from the sandbox agent." - assert "Python 3" in text - - @pytest.mark.asyncio - async def test_shell_compute(self, client: AsyncAgentex, agent_id: str): - """The agent should use python3 in the sandbox to compute 21 * 2 == 42.""" - task_id = await _create_task_and_await_welcome(client, agent_id) - text = await _send_and_collect_agent_text( - client, - agent_id, - task_id, - "Use python3 in your shell to compute 21 * 2 and tell me the result.", - ) - assert text, "Expected a non-empty response from the sandbox agent." - assert "42" in text - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore b/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore index c4f7a8b4b..c49489471 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/.dockerignore @@ -40,4 +40,4 @@ venv.bak/ .gitignore # Misc -.DS_Store \ No newline at end of file +.DS_Store diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/.env.example b/examples/tutorials/10_async/10_temporal/130_langgraph/.env.example deleted file mode 100644 index ab1a5790f..000000000 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/.env.example +++ /dev/null @@ -1,13 +0,0 @@ -# at130-langgraph - Environment Variables -# Copy this file to .env and fill in the values - -# API key for your LLM provider -LITELLM_API_KEY= - -# LLM base URL (optional - override to use a different provider) -# OPENAI_BASE_URL= - -# SGP Configuration (optional - for tracing) -# SGP_API_KEY= -# SGP_ACCOUNT_ID= -# SGP_CLIENT_BASE_URL= \ No newline at end of file diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/README.md b/examples/tutorials/10_async/10_temporal/130_langgraph/README.md index 61ccaf66a..0820f56ab 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/README.md +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/README.md @@ -1,58 +1,49 @@ -# at130-langgraph — AgentEx Temporal + LangGraph +# Tutorial: Temporal LangGraph Agent -A minimal Temporal-backed [LangGraph](https://langchain-ai.github.io/langgraph/) -agent. It uses the official [`temporalio.contrib.langgraph`](https://docs.temporal.io/develop/python/integrations/langgraph) -plugin so each LangGraph node runs as a durable **Temporal activity** (the LLM -`agent` node) or inline in the **workflow** (the `tools` node) — set per node -with `execute_in`. *Temporal is the runtime; LangGraph is the agent framework.* +This tutorial demonstrates how to build a **Temporal-backed** LangGraph agent on +AgentEx using the **unified harness surface**. The agent's LLM node runs as a +durable Temporal activity; the tools node runs inline in the workflow. -> The Temporal LangGraph plugin is currently **experimental**. +## Key Concepts -## The graph +### Temporal + LangGraph -``` -START → agent → (tool calls?) → tools → agent - → (no tool calls?) → END -``` - -- `agent` (`execute_in="activity"`): the LLM call — a retried, observable Temporal activity. -- `tools` (`execute_in="workflow"`): runs the tool calls inline in the workflow. +The ``LangGraphPlugin`` from ``temporalio.contrib.langgraph`` turns annotated graph +nodes into Temporal activities or inline workflow callables: -The router and tools are `async` so LangGraph awaits them directly (a sync -callable is offloaded via `run_in_executor`, which Temporal workflows forbid). +- `agent` node: `execute_in="activity"` (durable, retryable LLM call) +- `tools` node: `execute_in="workflow"` (inline, fast tool execution) -## Project structure - -``` -130_langgraph/ -├── project/ -│ ├── acp.py # Thin async ACP server; registers the LangGraphPlugin -│ ├── workflow.py # Runs the graph each turn; keeps multi-turn memory -│ ├── graph.py # LangGraph graph; nodes tagged execute_in activity/workflow -│ └── tools.py # Async tool(s) -└── run_worker.py is project/run_worker.py -``` +### Message surfacing -## Running +After each turn, ``emit_langgraph_messages`` converts the new LangGraph messages +(tool requests, tool responses, final text) into AgentEx ``TaskMessage`` objects +and posts them to the task's message stream. -```bash -agentex agents run --manifest manifest.yaml -``` +This is the Temporal-specific path. The non-Temporal async/sync channels use +``UnifiedEmitter.auto_send_turn`` / ``UnifiedEmitter.yield_turn`` with +``LangGraphTurn`` instead. -Open the Temporal UI at http://localhost:8080 to watch the workflow and the -`agent` activity execute. Use `dev.ipynb` to create a task and send messages. +## Files -## Adding tools +| File | Description | +|------|-------------| +| `project/acp.py` | ACP server (Temporal config, LangGraphPlugin) | +| `project/graph.py` | LangGraph graph (agent + tools nodes) | +| `project/workflow.py` | Temporal workflow (signal handlers, emit_langgraph_messages) | +| `project/run_worker.py` | Temporal worker runner | +| `project/tools.py` | Tool definitions (weather example) | +| `tests/test_agent.py` | Integration tests | +| `manifest.yaml` | Agent configuration (name: at130-langgraph) | -Define an **async** `@tool` in `project/tools.py` and add it to `TOOLS`. The -model is bound with `TOOLS` and the tool node runs them by name. +## Running Locally -For a fuller version with human-in-the-loop approval and graph-introspection -queries, scaffold the `temporal-langgraph` template via `agentex init`. +```bash +agentex agents run +``` -## Tests +## Running Tests -- `tests/test_graph_temporal.py` — hermetic ReAct-loop test with a stub model, - plus a live end-to-end run through the real Temporal plugin (skipped unless - `LITELLM_API_KEY` is set). -- `tests/test_agent.py` — live integration against a running agent. +```bash +pytest tests/test_agent.py -v +``` diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb b/examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb deleted file mode 100644 index 5320daac7..000000000 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/dev.ipynb +++ /dev/null @@ -1,126 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "36834357", - "metadata": {}, - "outputs": [], - "source": [ - "from agentex import Agentex\n", - "\n", - "client = Agentex(base_url=\"http://localhost:5003\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d1c309d6", - "metadata": {}, - "outputs": [], - "source": [ - "AGENT_NAME = \"at130-langgraph\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9f6e6ef0", - "metadata": {}, - "outputs": [], - "source": [ - "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", - "import uuid\n", - "\n", - "rpc_response = client.agents.create_task(\n", - " agent_name=AGENT_NAME,\n", - " params={\n", - " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", - " \"params\": {}\n", - " }\n", - ")\n", - "\n", - "task = rpc_response.result\n", - "print(task)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b03b0d37", - "metadata": {}, - "outputs": [], - "source": [ - "# Send an event to the agent\n", - "\n", - "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", - "# - TextContent: A message with just text content \n", - "# - DataContent: A message with JSON-serializable data content\n", - "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", - "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", - "\n", - "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", - "\n", - "rpc_response = client.agents.send_event(\n", - " agent_name=AGENT_NAME,\n", - " params={\n", - " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", - " \"task_id\": task.id,\n", - " }\n", - ")\n", - "\n", - "event = rpc_response.result\n", - "print(event)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a6927cc0", - "metadata": {}, - "outputs": [], - "source": [ - "# Subscribe to the async task messages produced by the agent\n", - "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", - "\n", - "task_messages = subscribe_to_async_task_messages(\n", - " client=client,\n", - " task=task, \n", - " only_after_timestamp=event.created_at, \n", - " print_messages=True,\n", - " rich_print=True,\n", - " timeout=5,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4864e354", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.9" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml b/examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml deleted file mode 100644 index d54d8e5ff..000000000 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/environments.yaml +++ /dev/null @@ -1,64 +0,0 @@ -# Agent Environment Configuration -# ------------------------------ -# This file defines environment-specific settings for your agent. -# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. - -# ********** EXAMPLE ********** -# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI -# environments: -# dev: -# auth: -# principal: -# user_id: "1234567890" -# user_name: "John Doe" -# user_email: "john.doe@example.com" -# user_role: "admin" -# user_permissions: "read, write, delete" -# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts -# replicas: 3 -# resources: -# requests: -# cpu: "1000m" -# memory: "2Gi" -# limits: -# cpu: "2000m" -# memory: "4Gi" -# env: -# - name: LOG_LEVEL -# value: "DEBUG" -# - name: ENVIRONMENT -# value: "staging" -# -# kubernetes: -# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived -# # namespace and deploy it with in the same namespace that already exists for a separate agent. -# namespace: "team-at130-langgraph" -# ********** END EXAMPLE ********** - -schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI -environments: - dev: - auth: - principal: - user_id: # TODO: Fill in - account_id: # TODO: Fill in - helm_overrides: - # This is used to override the global helm values.yaml file in the agentex-agent helm charts - replicaCount: 2 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" - temporal-worker: - enabled: true - replicaCount: 2 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" \ No newline at end of file diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml b/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml index d1f5960b1..936ebfa68 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/manifest.yaml @@ -1,20 +1,5 @@ -# Agent Manifest Configuration -# --------------------------- -# This file defines how your agent should be built and deployed. - -# Build Configuration -# ------------------ -# The build config defines what gets packaged into your agent's Docker image. -# This same configuration is used whether building locally or remotely. -# -# When building: -# 1. All files from include_paths are collected into a build context -# 2. The context is filtered by dockerignore rules -# 3. The Dockerfile uses this context to build your agent's image -# 4. The image is pushed to a registry and used to run your agent build: context: - # Build from the tutorials root so shared test_utils are available. root: ../../../ include_paths: - 10_async/10_temporal/130_langgraph @@ -22,107 +7,45 @@ build: dockerfile: 10_async/10_temporal/130_langgraph/Dockerfile dockerignore: 10_async/10_temporal/130_langgraph/.dockerignore - -# Local Development Configuration -# ----------------------------- -# Only used when running the agent locally local_development: agent: - port: 8000 # Port where your local ACP server is running - host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) - - # File paths for local development (relative to this manifest.yaml) + port: 8000 + host_address: host.docker.internal paths: - # Path to ACP server file - # Examples: - # project/acp.py (standard) - # src/server.py (custom structure) - # ../shared/acp.py (shared across projects) - # /absolute/path/acp.py (absolute path) acp: project/acp.py - - # Path to temporal worker file - # Examples: - # project/run_worker.py (standard) - # workers/temporal.py (custom structure) - # ../shared/worker.py (shared across projects) worker: project/run_worker.py - -# Agent Configuration -# ----------------- agent: - # Type of agent - either sync or async acp_type: async - - # Unique name for your agent - # Used for task routing and monitoring name: at130-langgraph + description: "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities" - # Description of what your agent does - # Helps with documentation and discovery - description: "A Temporal-backed LangGraph agent whose nodes run as Temporal activities" - - # Temporal workflow configuration - # This enables your agent to run as a Temporal workflow for long-running tasks temporal: enabled: true workflows: - # Name of the workflow class - # Must match the @workflow.defn name in your workflow.py - name: at130-langgraph - - # Queue name for task distribution - # Used by Temporal to route tasks to your agent - # Convention: _task_queue queue_name: at130_langgraph_queue - # Optional: Health check port for temporal worker - # Defaults to 80 if not specified - # health_check_port: 80 - - # Optional: Credentials mapping - # Maps Kubernetes secrets to environment variables - # Common credentials include: credentials: - env_var_name: REDIS_URL secret_name: redis-url-secret secret_key: url - # - env_var_name: LITELLM_API_KEY - # secret_name: litellm-api-key - # secret_key: api-key - - # Optional: Set Environment variables for running your agent locally as well - # as for deployment later on - env: {} - # LITELLM_API_KEY: "" - # OPENAI_BASE_URL: "" - # OPENAI_ORG_ID: "" + env: {} -# Deployment Configuration -# ----------------------- -# Configuration for deploying your agent to Kubernetes clusters deployment: - # Container image configuration image: - repository: "" # Update with your container registry - tag: "latest" # Default tag, should be versioned in production + repository: "" + tag: "latest" - imagePullSecrets: [] # Update with your image pull secret name - # - name: my-registry-secret + imagePullSecrets: [] - # Global deployment settings that apply to all clusters - # These can be overridden in cluster-specific environments (environments.yaml) global: - # Default replica count replicaCount: 1 - - # Default resource requirements resources: requests: cpu: "500m" memory: "1Gi" limits: cpu: "1000m" - memory: "2Gi" \ No newline at end of file + memory: "2Gi" diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py index c01f8831c..7af9c5e68 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/acp.py @@ -1,19 +1,13 @@ -"""ACP server for the Temporal LangGraph agent. +"""ACP server for the Temporal harness LangGraph agent. -This file is intentionally thin. When ``acp_type="async"`` is combined with -``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: +Follows the ``130_langgraph`` pattern: the Temporal ``LangGraphPlugin`` runs +graph nodes as Temporal activities. The agent logic lives in ``workflow.py`` +(the runtime) and ``graph.py`` (the LangGraph graph), executed by the Temporal +worker (``run_worker.py``), not by this HTTP process. - HTTP task/create → @workflow.run on the workflow class - HTTP task/event/send → @workflow.signal(SignalName.RECEIVE_EVENT) - HTTP task/cancel → workflow cancellation via the Temporal client - -so we don't define any handlers here. The agent logic lives in -``project/workflow.py`` (the runtime) and ``project/graph.py`` (the LangGraph -graph whose nodes run as Temporal activities), executed by the Temporal worker -(``project/run_worker.py``), not by this HTTP process. - -The ``LangGraphPlugin`` is registered here too so the Temporal client started -by FastACP shares the same graph registry as the worker. +The workflow uses ``emit_langgraph_messages`` to surface turn messages to +AgentEx. That helper is Temporal-specific and is not replaced by the unified +harness here (``UnifiedEmitter`` targets the non-Temporal async/sync channels). """ from __future__ import annotations @@ -33,10 +27,8 @@ acp = FastACP.create( acp_type="async", config=TemporalACPConfig( - # When deployed to the cluster, the Temporal address is set automatically. - # Locally we point at the Temporal service from docker compose. type="temporal", temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})], ), -) \ No newline at end of file +) diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py index 0589aa9ba..7adba3ae4 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/graph.py @@ -1,24 +1,9 @@ """LangGraph graph for at130-langgraph — nodes run as Temporal activities. -The ``temporalio.contrib.langgraph`` plugin runs each node where its -``execute_in`` metadata says: the LLM ``agent`` node as a durable Temporal -**activity**, the ``tools`` node inline in the **workflow**. - - START → agent → (tool calls?) → tools → agent - → (no tool calls?) → END - -The router and tools are ``async`` so LangGraph awaits them directly — a sync -callable would be offloaded via ``run_in_executor``, which Temporal's workflow -event loop does not support. - -The in-workflow ``tools`` node is a plain ``async`` function rather than -LangGraph's ``ToolNode`` prebuilt on purpose. The plugin wraps an in-workflow -node in ``wrap_workflow``, whose closure captures the wrapped object. When that -object is itself a LangChain ``Runnable`` (as ``ToolNode`` is), LangGraph's -``compile()`` subgraph detection (``find_subgraph_pregel`` → -``get_function_nonlocals``) recurses through that wrapper without cycle -detection and never terminates, tripping Temporal's deadlock detector. A plain -function isn't a ``Runnable``, so compile stays trivial. +Identical in structure to ``130_langgraph/project/graph.py``. The graph +definition is not affected by the harness migration; only the agent naming +changes. The LLM ``agent`` node runs as a durable Temporal activity; +the ``tools`` node runs inline in the workflow. """ from __future__ import annotations @@ -40,10 +25,8 @@ from project.tools import TOOLS -# Look up tools by name for the in-workflow tools node. _TOOLS_BY_NAME = {tool.name: tool for tool in TOOLS} -# Name this graph is registered under in the LangGraphPlugin (acp.py / run_worker.py). GRAPH_NAME = "at130-langgraph" MODEL_NAME = "gpt-4o" SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. @@ -62,37 +45,27 @@ async def agent_node(state: AgentState) -> dict[str, Any]: llm = ChatOpenAI(model=MODEL_NAME).bind_tools(TOOLS) messages = state["messages"] if not messages or not isinstance(messages[0], SystemMessage): - system = SystemMessage( - content=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")) - ) + system = SystemMessage(content=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) messages = [system, *messages] return {"messages": [await llm.ainvoke(messages)]} async def tools_node(state: AgentState) -> dict[str, Any]: - """Run the tool calls the model requested. Runs inline in the workflow. - - A plain ``async`` function (not LangGraph's ``ToolNode``) — see the module - docstring for why a ``Runnable`` tools node can't be compiled here. - """ + """Run the tool calls the model requested. Runs inline in the workflow.""" last = state["messages"][-1] results: list[Any] = [] for call in getattr(last, "tool_calls", None) or []: tool = _TOOLS_BY_NAME.get(call["name"]) - # Mirror ToolNode: surface an unknown/hallucinated tool name as an error - # ToolMessage so the graph keeps running instead of crashing the node. if tool is None: output = f"Error: unknown tool {call['name']!r}. Available: {list(_TOOLS_BY_NAME)}" else: output = await tool.ainvoke(call["args"]) - results.append( - ToolMessage(content=str(output), tool_call_id=call["id"], name=call["name"]) - ) + results.append(ToolMessage(content=str(output), tool_call_id=call["id"], name=call["name"])) return {"messages": results} async def route_after_agent(state: AgentState) -> str: - """Go to the tools node if the model requested tools, else finish (async router).""" + """Go to the tools node if the model requested tools, else finish.""" last = state["messages"][-1] return "tools" if getattr(last, "tool_calls", None) else END diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py index 7040f560b..4b31bf396 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/run_worker.py @@ -5,8 +5,7 @@ The ``LangGraphPlugin`` is given the graph registry (``{ GRAPH_NAME: graph }``). At runtime it turns the graph's ``execute_in="activity"`` nodes into Temporal -activities and registers them on the worker automatically — so we don't have -to enumerate node activities by hand. +activities and registers them on the worker automatically. """ import asyncio @@ -14,7 +13,7 @@ from temporalio.contrib.langgraph import LangGraphPlugin from project.graph import GRAPH_NAME, build_graph -from project.workflow import At130LanggraphWorkflow +from project.workflow import AtHarnessLanggraphWorkflow from agentex.lib.utils.debug import setup_debug_if_enabled from agentex.lib.utils.logging import make_logger from agentex.lib.environment_variables import EnvironmentVariables @@ -32,9 +31,6 @@ async def main(): if task_queue_name is None: raise ValueError("WORKFLOW_TASK_QUEUE is not set") - # AgentexWorker runs workflows with an unsandboxed runner, so importing - # langchain/langgraph inside the workflow + nodes is fine. The LangGraph - # plugin registers the graph's activity-nodes for us. worker = AgentexWorker( task_queue=task_queue_name, plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})], @@ -42,9 +38,9 @@ async def main(): await worker.run( activities=get_all_activities(), - workflow=At130LanggraphWorkflow, + workflow=AtHarnessLanggraphWorkflow, ) if __name__ == "__main__": - asyncio.run(main()) \ No newline at end of file + asyncio.run(main()) diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py index 20b7185ee..e7220016e 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/tools.py @@ -1,20 +1,37 @@ -"""Tools for the LangGraph agent. +"""Tool definitions for the 130_langgraph temporal agent.""" -Tools are ``async`` so the in-workflow tool node can await them directly -(a sync tool would be offloaded via ``run_in_executor``, which Temporal's -workflow event loop does not allow). -""" +from langchain_core.tools import Tool -from __future__ import annotations -from langchain_core.tools import tool +def get_weather(city: str) -> str: + """Get the current weather for a city. + Args: + city: The name of the city to get weather for. -@tool -async def get_weather(city: str) -> str: - """Get the current weather for a city.""" - # TODO: replace with a real weather API call. + Returns: + A string describing the weather conditions. + """ return f"The weather in {city} is sunny and 72°F" -TOOLS = [get_weather] +async def aget_weather(city: str) -> str: + """Native async tool entrypoint. + + ``tools_node`` runs inline in the Temporal workflow and invokes tools via + ``tool.ainvoke``. A sync-only tool forces LangChain to bridge through + ``run_in_executor`` (a thread pool), which the deterministic Temporal + workflow event loop forbids (``NotImplementedError``). Providing a real + coroutine keeps tool execution on the workflow loop. + """ + return get_weather(city) + + +weather_tool = Tool( + name="get_weather", + func=get_weather, + coroutine=aget_weather, + description="Get the current weather for a city. Input should be a city name.", +) + +TOOLS = [weather_tool] diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py b/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py index a50670251..b9224ca00 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/project/workflow.py @@ -1,4 +1,4 @@ -"""Temporal workflow for at130-langgraph — Temporal as the LangGraph runtime. +"""Temporal workflow for at130-langgraph. Each turn the workflow runs the LangGraph graph (``project/graph.py``) via the ``temporalio.contrib.langgraph`` plugin. The plugin runs the LLM ``agent`` node @@ -37,7 +37,7 @@ @workflow.defn(name=environment_variables.WORKFLOW_NAME) -class At130LanggraphWorkflow(BaseWorkflow): +class AtHarnessLanggraphWorkflow(BaseWorkflow): """Runs the LangGraph agent each turn; its nodes run as Temporal activities.""" def __init__(self) -> None: @@ -56,10 +56,7 @@ async def on_task_event_send(self, params: SendEventParams) -> None: result = await compiled.ainvoke({"messages": self._messages}) self._messages = result["messages"] - # Surface the messages this turn produced (tool calls, results, final - # text) to the AgentEx UI. The SDK helper does the LangGraph→AgentEx - # message conversion. - await emit_langgraph_messages(self._messages[self._emitted:], params.task.id) + await emit_langgraph_messages(self._messages[self._emitted :], params.task.id) self._emitted = len(self._messages) @workflow.signal diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml b/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml index e22905de4..6d2262761 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/pyproject.toml @@ -5,13 +5,11 @@ build-backend = "hatchling.build" [project] name = "at130-langgraph" version = "0.1.0" -description = "A Temporal-backed LangGraph agent whose nodes run as Temporal activities" +description = "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities" requires-python = ">=3.12" dependencies = [ "agentex-sdk", "scale-gp", - # Temporal with the LangGraph plugin (temporalio.contrib.langgraph), - # which runs LangGraph nodes as Temporal activities. Needs >=1.27.0. "temporalio[langgraph]>=1.27.0", "langchain-openai", "langchain-core", @@ -39,4 +37,4 @@ target-version = ['py312'] [tool.isort] profile = "black" -line_length = 88 \ No newline at end of file +line_length = 88 diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py index b798f568f..f2292389f 100644 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py +++ b/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_agent.py @@ -1,4 +1,4 @@ -"""Integration tests for the Temporal + LangGraph agent (live agent required). +"""Integration tests for the Temporal harness LangGraph agent (live agent required). These drive a *running* agent over the AgentEx API and verify that: - the agent sends a welcome message on task creation, @@ -6,9 +6,6 @@ (proving the LLM node ran as a Temporal activity and the tool node ran), - the final answer reflects the tool output. -For fast, network-free coverage of the graph + human-in-the-loop logic, see -``test_graph_temporal.py``. - To run: 1. Start the agent (worker + ACP server): ``agentex agents run --manifest manifest.yaml`` 2. Set AGENTEX_API_BASE_URL if not using the default @@ -60,29 +57,18 @@ class TestNonStreamingEvents: @pytest.mark.asyncio async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): """Create a task, ask about weather, verify the tool round-trip.""" - task_response = await client.agents.create_task( - agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex) - ) + task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) task = task_response.result assert task is not None - # Wait for the welcome message from on_task_create task_creation_found = False - async for message in poll_messages( - client=client, task_id=task.id, timeout=30, sleep_interval=1.0 - ): + async for message in poll_messages(client=client, task_id=task.id, timeout=30, sleep_interval=1.0): assert isinstance(message, TaskMessage) - if ( - message.content - and message.content.type == "text" - and message.content.author == "agent" - ): + if message.content and message.content.type == "text" and message.content.author == "agent": task_creation_found = True break assert task_creation_found, "Task creation welcome message not found" - # Ask about weather — the agent (LangGraph node, as a Temporal activity) - # should call get_weather. seen_tool_request = False seen_tool_response = False final_message = None @@ -101,11 +87,7 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): if message.content and message.content.type == "tool_response": seen_tool_response = True - if ( - message.content - and message.content.type == "text" - and message.content.author == "agent" - ): + if message.content and message.content.type == "text" and message.content.author == "agent": final_message = message content_length = len(getattr(message.content, "content", "") or "") if getattr(message, "streaming_status", None) in (None, "DONE") and content_length > 0: @@ -115,11 +97,8 @@ async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): assert seen_tool_request, "Expected a tool_request (agent calling get_weather)" assert seen_tool_response, "Expected a tool_response (get_weather result)" assert final_message is not None, "Expected a final agent text message" - final_text = ( - getattr(final_message.content, "content", None) if final_message.content else None - ) + final_text = getattr(final_message.content, "content", None) if final_message.content else None assert isinstance(final_text, str) and len(final_text) > 0 - # get_weather always returns "72°F" — the response should mention it. assert "72" in final_text, "Expected weather response to mention 72°F" diff --git a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py b/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py deleted file mode 100644 index 485b896f6..000000000 --- a/examples/tutorials/10_async/10_temporal/130_langgraph/tests/test_graph_temporal.py +++ /dev/null @@ -1,105 +0,0 @@ -"""Tests for the Temporal + LangGraph agent's graph. - -Two layers: - -1. ``TestGraphLogic`` — hermetic, no network. Compiles the actual shipped - graph (``project/graph.py``) with a deterministic stub model and runs the - ReAct loop (agent → tools → agent) to completion. - -2. ``TestTemporalPlugin`` — end-to-end through the real Temporal LangGraph - plugin on a local Temporal server, proving the LLM node runs as an activity - and the tool node in the workflow. Needs a real model, so it is skipped - unless ``LITELLM_API_KEY`` (or ``OPENAI_API_KEY``) is set. - -Run from the agent's own (uv) environment: pytest tests/test_graph_temporal.py -v -""" - -from __future__ import annotations - -import os -import uuid - -import pytest - -pytest.importorskip("langgraph") -pytest.importorskip("temporalio.contrib.langgraph") - -import project.graph as graph_module -from temporalio import workflow -from project.graph import GRAPH_NAME, build_graph -from langchain_core.messages import AIMessage, ToolMessage -from temporalio.contrib.langgraph import graph as lg_graph - - -@workflow.defn -class _DriverWorkflow: - """Module-level driver workflow (Temporal forbids local workflow classes).""" - - @workflow.run - async def run(self, message: str) -> str: - compiled = lg_graph(GRAPH_NAME).compile() - result = await compiled.ainvoke({"messages": [{"role": "user", "content": message}]}) - return result["messages"][-1].content - - -class _StubModel: - """Deterministic stand-in for ``ChatOpenAI(...).bind_tools(...)``. - - First call → emit a tool call for ``get_weather``; once a ToolMessage is in - the history → emit a plain text answer. Drives the full ReAct loop offline. - """ - - def bind_tools(self, _tools): - return self - - async def ainvoke(self, messages): - if any(isinstance(m, ToolMessage) for m in messages): - return AIMessage(content="All done — the tool has run.") - return AIMessage( - content="", - tool_calls=[{"id": "call_1", "name": "get_weather", "args": {"city": "Denver"}}], - ) - - -class TestGraphLogic: - """Hermetic test of the ReAct loop, no network.""" - - @pytest.mark.asyncio - async def test_react_loop_runs_tool(self, monkeypatch): - monkeypatch.setattr(graph_module, "ChatOpenAI", lambda *_a, **_k: _StubModel()) - compiled = build_graph().compile() - result = await compiled.ainvoke({"messages": [{"role": "user", "content": "go"}]}) - - tool_outputs = [m.content for m in result["messages"] if isinstance(m, ToolMessage)] - assert any("sunny" in o for o in tool_outputs) - assert "done" in result["messages"][-1].content.lower() - - -@pytest.mark.skipif( - not (os.environ.get("LITELLM_API_KEY") or os.environ.get("OPENAI_API_KEY")), - reason="needs a real model (set LITELLM_API_KEY) for the live Temporal run", -) -class TestTemporalPlugin: - """End-to-end through the real Temporal LangGraph plugin on a local server.""" - - @pytest.mark.asyncio - async def test_nodes_run_as_activities_via_plugin(self): - from temporalio.worker import Worker, UnsandboxedWorkflowRunner - from temporalio.testing import WorkflowEnvironment - from temporalio.contrib.langgraph import LangGraphPlugin - - plugin = LangGraphPlugin(graphs={GRAPH_NAME: build_graph()}) - async with await WorkflowEnvironment.start_local(plugins=[plugin]) as env: - async with Worker( - env.client, - task_queue="tq", - workflows=[_DriverWorkflow], - workflow_runner=UnsandboxedWorkflowRunner(), - ): - out = await env.client.execute_workflow( - _DriverWorkflow.run, - "What's the weather in Denver? Use the get_weather tool.", - id=f"wf-{uuid.uuid4()}", - task_queue="tq", - ) - assert "denver" in out.lower() diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore b/examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore deleted file mode 100644 index c49489471..000000000 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/.dockerignore +++ /dev/null @@ -1,43 +0,0 @@ -# Python -__pycache__/ -*.py[cod] -*$py.class -*.so -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg - -# Environments -.env** -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# IDE -.idea/ -.vscode/ -*.swp -*.swo - -# Git -.git -.gitignore - -# Misc -.DS_Store diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile b/examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile deleted file mode 100644 index c107e3269..000000000 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -COPY 10_async/10_temporal/140_harness_openai/pyproject.toml /app/140_harness_openai/pyproject.toml -COPY 10_async/10_temporal/140_harness_openai/README.md /app/140_harness_openai/README.md - -WORKDIR /app/140_harness_openai - -COPY 10_async/10_temporal/140_harness_openai/project /app/140_harness_openai/project -COPY 10_async/10_temporal/140_harness_openai/tests /app/140_harness_openai/tests -COPY test_utils /app/test_utils - -RUN uv pip install --system .[dev] - -ENV PYTHONPATH=/app - -ENV AGENT_NAME=at140-harness-openai - -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] - -# When we deploy the worker, we will replace the CMD with the following -# CMD ["python", "-m", "run_worker"] diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml b/examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml deleted file mode 100644 index f90511911..000000000 --- a/examples/tutorials/10_async/10_temporal/140_harness_openai/environments.yaml +++ /dev/null @@ -1,64 +0,0 @@ -# Agent Environment Configuration -# ------------------------------ -# This file defines environment-specific settings for your agent. -# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. - -# ********** EXAMPLE ********** -# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI -# environments: -# dev: -# auth: -# principal: -# user_id: "1234567890" -# user_name: "John Doe" -# user_email: "john.doe@example.com" -# user_role: "admin" -# user_permissions: "read, write, delete" -# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts -# replicas: 3 -# resources: -# requests: -# cpu: "1000m" -# memory: "2Gi" -# limits: -# cpu: "2000m" -# memory: "4Gi" -# env: -# - name: LOG_LEVEL -# value: "DEBUG" -# - name: ENVIRONMENT -# value: "staging" -# -# kubernetes: -# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived -# # namespace and deploy it with in the same namespace that already exists for a separate agent. -# namespace: "team-example-tutorial" -# ********** END EXAMPLE ********** - -schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI -environments: - dev: - auth: - principal: - user_id: # TODO: Fill in - account_id: # TODO: Fill in - helm_overrides: - # This is used to override the global helm values.yaml file in the agentex-agent helm charts - replicaCount: 2 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" - temporal-worker: - enabled: true - replicaCount: 2 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" \ No newline at end of file diff --git a/examples/tutorials/10_async/10_temporal/140_harness_openai/project/__init__.py b/examples/tutorials/10_async/10_temporal/140_harness_openai/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/00_base/harness_pydantic_ai/.dockerignore b/examples/tutorials/10_async/10_temporal/150_codex/.dockerignore similarity index 100% rename from examples/tutorials/10_async/00_base/harness_pydantic_ai/.dockerignore rename to examples/tutorials/10_async/10_temporal/150_codex/.dockerignore diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile b/examples/tutorials/10_async/10_temporal/150_codex/Dockerfile similarity index 66% rename from examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile rename to examples/tutorials/10_async/10_temporal/150_codex/Dockerfile index e2f8807fd..9561548c4 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/Dockerfile +++ b/examples/tutorials/10_async/10_temporal/150_codex/Dockerfile @@ -22,19 +22,19 @@ RUN uv pip install --system --upgrade pip setuptools wheel ENV UV_HTTP_TIMEOUT=1000 -COPY 10_async/10_temporal/harness_codex/pyproject.toml /app/harness_codex/pyproject.toml -COPY 10_async/10_temporal/harness_codex/README.md /app/harness_codex/README.md +COPY 10_async/10_temporal/150_codex/pyproject.toml /app/150_codex/pyproject.toml +COPY 10_async/10_temporal/150_codex/README.md /app/150_codex/README.md -WORKDIR /app/harness_codex +WORKDIR /app/150_codex -COPY 10_async/10_temporal/harness_codex/project /app/harness_codex/project -COPY 10_async/10_temporal/harness_codex/tests /app/harness_codex/tests +COPY 10_async/10_temporal/150_codex/project /app/150_codex/project +COPY 10_async/10_temporal/150_codex/tests /app/150_codex/tests COPY test_utils /app/test_utils RUN uv pip install --system .[dev] ENV PYTHONPATH=/app -ENV AGENT_NAME=at-harness-codex +ENV AGENT_NAME=at150-codex CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/README.md b/examples/tutorials/10_async/10_temporal/150_codex/README.md similarity index 95% rename from examples/tutorials/10_async/10_temporal/harness_codex/README.md rename to examples/tutorials/10_async/10_temporal/150_codex/README.md index 4f9b76955..498b81374 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/README.md +++ b/examples/tutorials/10_async/10_temporal/150_codex/README.md @@ -1,4 +1,4 @@ -# harness_codex (Temporal) +# 150_codex (Temporal) Tutorial agent demonstrating the `convert_codex_to_agentex_events` tap, `CodexTurn`, and `UnifiedEmitter` for a **Temporal-durable** async ACP agent. @@ -36,7 +36,7 @@ Live runs require: ```bash cd /path/to/scale-agentex-python -uv run --all-packages --all-extras pytest examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py -q +uv run --all-packages --all-extras pytest examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py -q ``` ## Running live integration tests diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/conftest.py b/examples/tutorials/10_async/10_temporal/150_codex/conftest.py similarity index 72% rename from examples/tutorials/10_async/10_temporal/harness_codex/conftest.py rename to examples/tutorials/10_async/10_temporal/150_codex/conftest.py index 4ae6ce61a..6370f278d 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/conftest.py +++ b/examples/tutorials/10_async/10_temporal/150_codex/conftest.py @@ -11,7 +11,7 @@ # AGENT_NAME must match the manifest's agent name: the live test queries the # server by this name, and project.workflow reads it at import time. -os.environ.setdefault("AGENT_NAME", "at-harness-codex") +os.environ.setdefault("AGENT_NAME", "at150-codex") os.environ.setdefault("ACP_URL", "http://localhost:8000") -os.environ.setdefault("WORKFLOW_NAME", "at-harness-codex") -os.environ.setdefault("WORKFLOW_TASK_QUEUE", "at_harness_codex_queue") +os.environ.setdefault("WORKFLOW_NAME", "at150-codex") +os.environ.setdefault("WORKFLOW_TASK_QUEUE", "at150_codex_queue") diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml b/examples/tutorials/10_async/10_temporal/150_codex/manifest.yaml similarity index 80% rename from examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml rename to examples/tutorials/10_async/10_temporal/150_codex/manifest.yaml index 3bc21dccc..d64bdfad0 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/manifest.yaml +++ b/examples/tutorials/10_async/10_temporal/150_codex/manifest.yaml @@ -2,10 +2,10 @@ build: context: root: ../../../ include_paths: - - 10_async/10_temporal/harness_codex + - 10_async/10_temporal/150_codex - test_utils - dockerfile: 10_async/10_temporal/harness_codex/Dockerfile - dockerignore: 10_async/10_temporal/harness_codex/.dockerignore + dockerfile: 10_async/10_temporal/150_codex/Dockerfile + dockerignore: 10_async/10_temporal/150_codex/.dockerignore local_development: agent: @@ -17,14 +17,14 @@ local_development: agent: acp_type: async - name: at-harness-codex + name: at150-codex description: Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess temporal: enabled: true workflows: - - name: at-harness-codex - queue_name: at_harness_codex_queue + - name: at150-codex + queue_name: at150_codex_queue credentials: - env_var_name: OPENAI_API_KEY @@ -50,7 +50,7 @@ deployment: global: agent: - name: "at-harness-codex" + name: "at150-codex" description: "Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess" replicaCount: 1 resources: diff --git a/examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/__init__.py b/examples/tutorials/10_async/10_temporal/150_codex/project/__init__.py similarity index 100% rename from examples/tutorials/10_async/00_base/120_openai_agents_local_sandbox/project/__init__.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/__init__.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/acp.py b/examples/tutorials/10_async/10_temporal/150_codex/project/acp.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/harness_codex/project/acp.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/acp.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/activities.py b/examples/tutorials/10_async/10_temporal/150_codex/project/activities.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/harness_codex/project/activities.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/activities.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/run_worker.py b/examples/tutorials/10_async/10_temporal/150_codex/project/run_worker.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/harness_codex/project/run_worker.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/run_worker.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/workflow.py b/examples/tutorials/10_async/10_temporal/150_codex/project/workflow.py similarity index 100% rename from examples/tutorials/10_async/10_temporal/harness_codex/project/workflow.py rename to examples/tutorials/10_async/10_temporal/150_codex/project/workflow.py diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml b/examples/tutorials/10_async/10_temporal/150_codex/pyproject.toml similarity index 96% rename from examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml rename to examples/tutorials/10_async/10_temporal/150_codex/pyproject.toml index c4d67d285..7e1d6250f 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/pyproject.toml +++ b/examples/tutorials/10_async/10_temporal/150_codex/pyproject.toml @@ -3,7 +3,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [project] -name = "at-harness-codex" +name = "at150-codex" version = "0.1.0" description = "Temporal tutorial agent driving the unified harness surface via local codex CLI subprocess" readme = "README.md" diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py similarity index 99% rename from examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py rename to examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py index 2066b35b1..fa6c66083 100644 --- a/examples/tutorials/10_async/10_temporal/harness_codex/tests/test_agent.py +++ b/examples/tutorials/10_async/10_temporal/150_codex/tests/test_agent.py @@ -213,7 +213,7 @@ async def _auto_send(_self, turn, *_a, **_kw): LIVE = os.environ.get("CODEX_LIVE_TESTS", "") == "1" AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "at-harness-codex") +AGENT_NAME = os.environ.get("AGENT_NAME", "at150-codex") @pytest.mark.skipif( diff --git a/examples/tutorials/10_async/10_temporal/harness_codex/project/__init__.py b/examples/tutorials/10_async/10_temporal/harness_codex/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/README.md b/examples/tutorials/10_async/10_temporal/harness_langgraph/README.md deleted file mode 100644 index 4df6969f1..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# Tutorial: Temporal Harness LangGraph Agent - -This tutorial demonstrates how to build a **Temporal-backed** LangGraph agent on -AgentEx, following the ``130_langgraph`` pattern. The agent's LLM node runs as a -durable Temporal activity; the tools node runs inline in the workflow. - -This agent is named ``at-harness-langgraph`` to distinguish it from -``at130-langgraph`` (the bespoke reference). The graph and workflow structure are -identical; only the agent name changes. - -## Key Concepts - -### Temporal + LangGraph - -The ``LangGraphPlugin`` from ``temporalio.contrib.langgraph`` turns annotated graph -nodes into Temporal activities or inline workflow callables: - -- `agent` node: `execute_in="activity"` (durable, retryable LLM call) -- `tools` node: `execute_in="workflow"` (inline, fast tool execution) - -### Message surfacing - -After each turn, ``emit_langgraph_messages`` converts the new LangGraph messages -(tool requests, tool responses, final text) into AgentEx ``TaskMessage`` objects -and posts them to the task's message stream. - -This is the Temporal-specific path. The non-Temporal async/sync channels use -``UnifiedEmitter.auto_send_turn`` / ``UnifiedEmitter.yield_turn`` with -``LangGraphTurn`` instead. - -## Files - -| File | Description | -|------|-------------| -| `project/acp.py` | ACP server (Temporal config, LangGraphPlugin) | -| `project/graph.py` | LangGraph graph (agent + tools nodes) | -| `project/workflow.py` | Temporal workflow (signal handlers, emit_langgraph_messages) | -| `project/run_worker.py` | Temporal worker runner | -| `project/tools.py` | Tool definitions (weather example) | -| `tests/test_agent.py` | Integration tests | -| `manifest.yaml` | Agent configuration (name: at-harness-langgraph) | - -## Running Locally - -```bash -agentex agents run -``` - -## Running Tests - -```bash -pytest tests/test_agent.py -v -``` diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml b/examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml deleted file mode 100644 index 596d38eb4..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/manifest.yaml +++ /dev/null @@ -1,51 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/10_temporal/harness_langgraph - - test_utils - dockerfile: 10_async/10_temporal/harness_langgraph/Dockerfile - dockerignore: 10_async/10_temporal/harness_langgraph/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - worker: project/run_worker.py - -agent: - acp_type: async - name: at-harness-langgraph - description: "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities" - - temporal: - enabled: true - workflows: - - name: at-harness-langgraph - queue_name: at_harness_langgraph_queue - - credentials: - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env: {} - -deployment: - image: - repository: "" - tag: "latest" - - imagePullSecrets: [] - - global: - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/__init__.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py deleted file mode 100644 index 7af9c5e68..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/acp.py +++ /dev/null @@ -1,34 +0,0 @@ -"""ACP server for the Temporal harness LangGraph agent. - -Follows the ``130_langgraph`` pattern: the Temporal ``LangGraphPlugin`` runs -graph nodes as Temporal activities. The agent logic lives in ``workflow.py`` -(the runtime) and ``graph.py`` (the LangGraph graph), executed by the Temporal -worker (``run_worker.py``), not by this HTTP process. - -The workflow uses ``emit_langgraph_messages`` to surface turn messages to -AgentEx. That helper is Temporal-specific and is not replaced by the unified -harness here (``UnifiedEmitter`` targets the non-Temporal async/sync channels). -""" - -from __future__ import annotations - -import os - -from dotenv import load_dotenv - -load_dotenv() - -from temporalio.contrib.langgraph import LangGraphPlugin - -from project.graph import GRAPH_NAME, build_graph -from agentex.lib.types.fastacp import TemporalACPConfig -from agentex.lib.sdk.fastacp.fastacp import FastACP - -acp = FastACP.create( - acp_type="async", - config=TemporalACPConfig( - type="temporal", - temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), - plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})], - ), -) diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py deleted file mode 100644 index ce9c2b520..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/graph.py +++ /dev/null @@ -1,85 +0,0 @@ -"""LangGraph graph for at-harness-langgraph — nodes run as Temporal activities. - -Identical in structure to ``130_langgraph/project/graph.py``. The graph -definition is not affected by the harness migration; only the agent naming -changes. The LLM ``agent`` node runs as a durable Temporal activity; -the ``tools`` node runs inline in the workflow. -""" - -from __future__ import annotations - -import os -from typing import Any, Annotated -from datetime import datetime, timedelta - -_litellm_key = os.environ.get("LITELLM_API_KEY") -if _litellm_key: - os.environ.setdefault("OPENAI_API_KEY", _litellm_key) - -from typing_extensions import TypedDict - -from langgraph.graph import END, START, StateGraph -from langchain_openai import ChatOpenAI -from langchain_core.messages import ToolMessage, SystemMessage -from langgraph.graph.message import add_messages - -from project.tools import TOOLS - -_TOOLS_BY_NAME = {tool.name: tool for tool in TOOLS} - -GRAPH_NAME = "at-harness-langgraph" -MODEL_NAME = "gpt-4o" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Be concise and use tools when they help answer the question.""" - - -class AgentState(TypedDict): - messages: Annotated[list[Any], add_messages] - - -async def agent_node(state: AgentState) -> dict[str, Any]: - """The 'agent' node — one LLM call. Runs as a durable Temporal activity.""" - llm = ChatOpenAI(model=MODEL_NAME).bind_tools(TOOLS) - messages = state["messages"] - if not messages or not isinstance(messages[0], SystemMessage): - system = SystemMessage(content=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"))) - messages = [system, *messages] - return {"messages": [await llm.ainvoke(messages)]} - - -async def tools_node(state: AgentState) -> dict[str, Any]: - """Run the tool calls the model requested. Runs inline in the workflow.""" - last = state["messages"][-1] - results: list[Any] = [] - for call in getattr(last, "tool_calls", None) or []: - tool = _TOOLS_BY_NAME.get(call["name"]) - if tool is None: - output = f"Error: unknown tool {call['name']!r}. Available: {list(_TOOLS_BY_NAME)}" - else: - output = await tool.ainvoke(call["args"]) - results.append(ToolMessage(content=str(output), tool_call_id=call["id"], name=call["name"])) - return {"messages": results} - - -async def route_after_agent(state: AgentState) -> str: - """Go to the tools node if the model requested tools, else finish.""" - last = state["messages"][-1] - return "tools" if getattr(last, "tool_calls", None) else END - - -def build_graph() -> StateGraph: - """Build the agent graph; the LLM node runs as an activity, tools in the workflow.""" - builder = StateGraph(AgentState) - builder.add_node( - "agent", - agent_node, - metadata={"execute_in": "activity", "start_to_close_timeout": timedelta(minutes=5)}, - ) - builder.add_node("tools", tools_node, metadata={"execute_in": "workflow"}) - builder.add_edge(START, "agent") - builder.add_conditional_edges("agent", route_after_agent, {"tools": "tools", END: END}) - builder.add_edge("tools", "agent") - return builder diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py deleted file mode 100644 index ca64464fc..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/run_worker.py +++ /dev/null @@ -1,46 +0,0 @@ -"""Temporal worker for at-harness-langgraph. - -Run as a separate long-lived process alongside the ACP HTTP server. The -worker polls Temporal for workflow + activity tasks and executes them. - -The ``LangGraphPlugin`` is given the graph registry (``{ GRAPH_NAME: graph }``). -At runtime it turns the graph's ``execute_in="activity"`` nodes into Temporal -activities and registers them on the worker automatically. -""" - -import asyncio - -from temporalio.contrib.langgraph import LangGraphPlugin - -from project.graph import GRAPH_NAME, build_graph -from project.workflow import AtHarnessLanggraphWorkflow -from agentex.lib.utils.debug import setup_debug_if_enabled -from agentex.lib.utils.logging import make_logger -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.activities import get_all_activities -from agentex.lib.core.temporal.workers.worker import AgentexWorker - -environment_variables = EnvironmentVariables.refresh() -logger = make_logger(__name__) - - -async def main(): - setup_debug_if_enabled() - - task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE - if task_queue_name is None: - raise ValueError("WORKFLOW_TASK_QUEUE is not set") - - worker = AgentexWorker( - task_queue=task_queue_name, - plugins=[LangGraphPlugin(graphs={GRAPH_NAME: build_graph()})], - ) - - await worker.run( - activities=get_all_activities(), - workflow=AtHarnessLanggraphWorkflow, - ) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py deleted file mode 100644 index 10943c9d2..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/tools.py +++ /dev/null @@ -1,37 +0,0 @@ -"""Tool definitions for the harness_langgraph temporal agent.""" - -from langchain_core.tools import Tool - - -def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" - - -async def aget_weather(city: str) -> str: - """Native async tool entrypoint. - - ``tools_node`` runs inline in the Temporal workflow and invokes tools via - ``tool.ainvoke``. A sync-only tool forces LangChain to bridge through - ``run_in_executor`` (a thread pool), which the deterministic Temporal - workflow event loop forbids (``NotImplementedError``). Providing a real - coroutine keeps tool execution on the workflow loop. - """ - return get_weather(city) - - -weather_tool = Tool( - name="get_weather", - func=get_weather, - coroutine=aget_weather, - description="Get the current weather for a city. Input should be a city name.", -) - -TOOLS = [weather_tool] diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py deleted file mode 100644 index 4125dca39..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/project/workflow.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Temporal workflow for at-harness-langgraph. - -Each turn the workflow runs the LangGraph graph (``project/graph.py``) via the -``temporalio.contrib.langgraph`` plugin. The plugin runs the LLM ``agent`` node -as a durable Temporal activity and the ``tools`` node inline in the workflow. - -Multi-turn memory is kept on the workflow instance (``self._messages``) — it's -durable and replay-safe for free, so no checkpoint database is needed. -""" - -from __future__ import annotations - -import json -from typing import Any - -from temporalio import workflow -from temporalio.contrib.langgraph import graph as lg_graph - -from agentex.lib import adk -from project.graph import GRAPH_NAME -from agentex.lib.adk import emit_langgraph_messages -from agentex.protocol.acp import SendEventParams, CreateTaskParams -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.types.workflow import SignalName -from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow - -environment_variables = EnvironmentVariables.refresh() - -if environment_variables.WORKFLOW_NAME is None: - raise ValueError("Environment variable WORKFLOW_NAME is not set") -if environment_variables.AGENT_NAME is None: - raise ValueError("Environment variable AGENT_NAME is not set") - -logger = make_logger(__name__) - - -@workflow.defn(name=environment_variables.WORKFLOW_NAME) -class AtHarnessLanggraphWorkflow(BaseWorkflow): - """Runs the LangGraph agent each turn; its nodes run as Temporal activities.""" - - def __init__(self) -> None: - super().__init__(display_name=environment_variables.AGENT_NAME) - self._complete_task = False - self._messages: list[Any] = [] - self._emitted = 0 - - @workflow.signal(name=SignalName.RECEIVE_EVENT) - async def on_task_event_send(self, params: SendEventParams) -> None: - """Echo the user's message, run the graph, surface the new messages.""" - await adk.messages.create(task_id=params.task.id, content=params.event.content) - self._messages.append({"role": "user", "content": params.event.content.content}) - - compiled = lg_graph(GRAPH_NAME).compile() - result = await compiled.ainvoke({"messages": self._messages}) - self._messages = result["messages"] - - await emit_langgraph_messages(self._messages[self._emitted :], params.task.id) - self._emitted = len(self._messages) - - @workflow.signal - async def complete_task_signal(self) -> None: - self._complete_task = True - - @workflow.run - async def on_task_create(self, params: CreateTaskParams) -> str: - await adk.messages.create( - task_id=params.task.id, - content=TextContent( - author="agent", - content=( - f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n\n" - "Send me a message and I'll respond using a LangGraph agent whose nodes " - "run as durable Temporal activities." - ), - ), - ) - await workflow.wait_condition(lambda: self._complete_task, timeout=None) - return "Task completed" diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml b/examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml deleted file mode 100644 index 897f54dd6..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/pyproject.toml +++ /dev/null @@ -1,40 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "at-harness-langgraph" -version = "0.1.0" -description = "A Temporal-backed LangGraph agent (harness variant) whose nodes run as Temporal activities" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "temporalio[langgraph]>=1.27.0", - "langchain-openai", - "langchain-core", - "grandalf", - "python-dotenv", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", - "debugpy>=1.8.15", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py deleted file mode 100644 index 05d9ffa01..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_langgraph/tests/test_agent.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Integration tests for the Temporal harness LangGraph agent (live agent required). - -These drive a *running* agent over the AgentEx API and verify that: -- the agent sends a welcome message on task creation, -- a weather question triggers a tool_request / tool_response round-trip - (proving the LLM node ran as a Temporal activity and the tool node ran), -- the final answer reflects the tool output. - -To run: -1. Start the agent (worker + ACP server): ``agentex agents run --manifest manifest.yaml`` -2. Set AGENTEX_API_BASE_URL if not using the default -3. ``pytest tests/test_agent.py -v`` -""" - -import os -import uuid - -import pytest -import pytest_asyncio -from test_utils.async_utils import ( - poll_messages, - send_event_and_poll_yielding, -) - -from agentex import AsyncAgentex -from agentex.types.task_message import TaskMessage -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "at-harness-langgraph") - - -@pytest_asyncio.fixture -async def client(): - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingEvents: - """The Temporal-backed LangGraph agent responds and uses tools.""" - - @pytest.mark.asyncio - async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): - """Create a task, ask about weather, verify the tool round-trip.""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - task_creation_found = False - async for message in poll_messages(client=client, task_id=task.id, timeout=30, sleep_interval=1.0): - assert isinstance(message, TaskMessage) - if message.content and message.content.type == "text" and message.content.author == "agent": - task_creation_found = True - break - assert task_creation_found, "Task creation welcome message not found" - - seen_tool_request = False - seen_tool_response = False - final_message = None - async for message in send_event_and_poll_yielding( - client=client, - agent_id=agent_id, - task_id=task.id, - user_message="What is the weather in San Francisco? Use your tool.", - timeout=60, - sleep_interval=1.0, - ): - assert isinstance(message, TaskMessage) - - if message.content and message.content.type == "tool_request": - seen_tool_request = True - if message.content and message.content.type == "tool_response": - seen_tool_response = True - - if message.content and message.content.type == "text" and message.content.author == "agent": - final_message = message - content_length = len(getattr(message.content, "content", "") or "") - if getattr(message, "streaming_status", None) in (None, "DONE") and content_length > 0: - if seen_tool_response: - break - - assert seen_tool_request, "Expected a tool_request (agent calling get_weather)" - assert seen_tool_response, "Expected a tool_response (get_weather result)" - assert final_message is not None, "Expected a final agent text message" - final_text = getattr(final_message.content, "content", None) if final_message.content else None - assert isinstance(final_text, str) and len(final_text) > 0 - assert "72" in final_text, "Expected weather response to mention 72°F" - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore deleted file mode 100644 index c49489471..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/.dockerignore +++ /dev/null @@ -1,43 +0,0 @@ -# Python -__pycache__/ -*.py[cod] -*$py.class -*.so -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg - -# Environments -.env** -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# IDE -.idea/ -.vscode/ -*.swp -*.swo - -# Git -.git -.gitignore - -# Misc -.DS_Store diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile deleted file mode 100644 index 98c74c6e8..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# syntax=docker/dockerfile:1.3 -FROM python:3.12-slim -COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ - -# Install system dependencies -RUN apt-get update && apt-get install -y \ - htop \ - vim \ - curl \ - tar \ - python3-dev \ - postgresql-client \ - build-essential \ - libpq-dev \ - gcc \ - cmake \ - netcat-openbsd \ - && apt-get clean \ - && rm -rf /var/lib/apt/lists/* - -RUN uv pip install --system --upgrade pip setuptools wheel - -ENV UV_HTTP_TIMEOUT=1000 - -COPY 10_async/10_temporal/harness_pydantic_ai/pyproject.toml /app/harness_pydantic_ai/pyproject.toml -COPY 10_async/10_temporal/harness_pydantic_ai/README.md /app/harness_pydantic_ai/README.md - -WORKDIR /app/harness_pydantic_ai - -COPY 10_async/10_temporal/harness_pydantic_ai/project /app/harness_pydantic_ai/project -COPY 10_async/10_temporal/harness_pydantic_ai/tests /app/harness_pydantic_ai/tests -COPY test_utils /app/test_utils - -RUN uv pip install --system .[dev] - -ENV PYTHONPATH=/app - -ENV AGENT_NAME=at-harness-pydantic-ai - -CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] - -# When we deploy the worker, we will replace the CMD with the following -# CMD ["python", "-m", "run_worker"] diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md deleted file mode 100644 index 3e5fef4c6..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# Temporal Pydantic AI Harness Test Agent - -A minimal **Temporal-backed** Pydantic AI agent that drives the **unified -harness surface** (`UnifiedEmitter.auto_send_turn` + `PydanticAITurn`) from -inside the model activity's `event_stream_handler`. - -## Why this agent exists - -The `10_async/10_temporal/110_pydantic_ai` tutorial streams via the -`stream_pydantic_ai_events` helper (which uses the unified surface internally). -This harness test agent calls `emitter.auto_send_turn(...)` **explicitly** inside -the `event_stream_handler`, making the unified-surface wiring visible and giving -the temporal channel direct coverage. - -## How it wires the unified surface - -In `project/agent.py`, the `event_stream_handler` runs inside the model activity -and constructs a `UnifiedEmitter` from `RunContext.deps`: - -```python -async def event_handler(run_context, events): - emitter = UnifiedEmitter( - task_id=run_context.deps.task_id, - trace_id=run_context.deps.task_id, - parent_span_id=run_context.deps.parent_span_id, - ) - turn = PydanticAITurn(events, model=MODEL_NAME, coalesce_tool_requests=True) - await emitter.auto_send_turn(turn) -``` - -- The handler runs inside a Temporal activity, so it can freely make - non-deterministic Redis + tracing writes. -- `coalesce_tool_requests=True` is required on the auto_send path until - AGX1-377 lands. -- `deps` (set by `project/workflow.py`) threads the `task_id` and the per-turn - `parent_span_id` into the handler so tool spans nest under the workflow's turn - span. - -## Structure - -- `project/acp.py` — thin ACP server; FastACP auto-wires HTTP routes to the - workflow when `TemporalACPConfig` is used. -- `project/agent.py` — base `Agent` + `TemporalAgent` + the unified-surface - `event_stream_handler`. -- `project/workflow.py` — durable workflow; each turn delegates to - `temporal_agent.run(...)`. -- `project/run_worker.py` — Temporal worker entry point. -- `project/tools.py` — async `get_weather(city)` returning a constant. -- `tests/test_agent.py` — live integration test (requires Temporal + Redis + - ACP server + worker). - -## Tools - -- `get_weather(city: str) -> str` (async): returns a fixed "sunny and 72°F" - string. Each tool call becomes its own Temporal activity. - -## Offline coverage - -Offline integration tests for the same wiring (pydantic-ai `TestModel` + fake -streaming/tracing, no Temporal server) live in the SDK repo at -`tests/lib/core/harness/test_harness_pydantic_ai_temporal.py`. diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml deleted file mode 100644 index 9efbff918..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/manifest.yaml +++ /dev/null @@ -1,62 +0,0 @@ -build: - context: - root: ../../../ - include_paths: - - 10_async/10_temporal/harness_pydantic_ai - - test_utils - dockerfile: 10_async/10_temporal/harness_pydantic_ai/Dockerfile - dockerignore: 10_async/10_temporal/harness_pydantic_ai/.dockerignore - -local_development: - agent: - port: 8000 - host_address: host.docker.internal - paths: - acp: project/acp.py - worker: project/run_worker.py - -agent: - acp_type: async - name: at-harness-pydantic-ai - description: A Temporal-backed Pydantic AI harness test agent using the unified emitter surface - - temporal: - enabled: true - workflows: - - name: at-harness-pydantic-ai - queue_name: at_harness_pydantic_ai_queue - - credentials: - - env_var_name: REDIS_URL - secret_name: redis-url-secret - secret_key: url - - env_var_name: OPENAI_API_KEY - secret_name: openai-api-key - secret_key: api-key - - env_var_name: SGP_API_KEY - secret_name: sgp-api-key - secret_key: api-key - - env_var_name: SGP_ACCOUNT_ID - secret_name: sgp-account-id - secret_key: account-id - - env_var_name: SGP_CLIENT_BASE_URL - secret_name: sgp-client-base-url - secret_key: url - -deployment: - image: - repository: "" - tag: "latest" - - global: - agent: - name: "at-harness-pydantic-ai" - description: "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface" - replicaCount: 1 - resources: - requests: - cpu: "500m" - memory: "1Gi" - limits: - cpu: "1000m" - memory: "2Gi" diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/__init__.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py deleted file mode 100644 index c142dcf70..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/acp.py +++ /dev/null @@ -1,35 +0,0 @@ -"""ACP server for the Temporal harness Pydantic AI test agent. - -This file is intentionally thin. When ``acp_type="async"`` is combined with -``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: - - HTTP task/create → @workflow.run on the workflow class - HTTP task/event/send → @workflow.signal(SignalName.RECEIVE_EVENT) - HTTP task/cancel → workflow cancellation via the Temporal client - -so we don't define any handlers here. The actual agent code lives in -``project/workflow.py`` and is executed by the Temporal worker -(``project/run_worker.py``), not by this HTTP process. -""" - -from __future__ import annotations - -import os - -from dotenv import load_dotenv - -load_dotenv() - -from pydantic_ai.durable_exec.temporal import PydanticAIPlugin - -from agentex.lib.types.fastacp import TemporalACPConfig -from agentex.lib.sdk.fastacp.fastacp import FastACP - -acp = FastACP.create( - acp_type="async", - config=TemporalACPConfig( - type="temporal", - temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), - plugins=[PydanticAIPlugin()], - ), -) diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py deleted file mode 100644 index 5e8697264..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/agent.py +++ /dev/null @@ -1,111 +0,0 @@ -"""Pydantic AI agent definition for the Temporal harness test agent. - -This module constructs the base ``pydantic_ai.Agent`` once at import time, -registers tools on it, and wraps it in ``TemporalAgent`` from -``pydantic_ai.durable_exec.temporal``. - -The ``TemporalAgent`` wrapper makes every model call and every tool call run as -a Temporal activity automatically. The workflow stays deterministic; the -non-deterministic work (LLM HTTP calls, tool execution) moves into recorded -activities. - -Streaming back to Agentex happens via ``event_stream_handler``, which receives -Pydantic AI ``AgentStreamEvent``s from inside the model activity and forwards -them through the UNIFIED HARNESS SURFACE (``UnifiedEmitter.auto_send_turn`` + -``PydanticAITurn``) — called directly rather than via ``stream_pydantic_ai_events``. -The ``task_id`` and per-turn ``parent_span_id`` are threaded into the handler -via ``deps``. -""" - -from __future__ import annotations - -from datetime import datetime -from collections.abc import AsyncIterable - -from pydantic import BaseModel -from pydantic_ai import Agent, RunContext -from pydantic_ai.messages import AgentStreamEvent -from pydantic_ai.durable_exec.temporal import TemporalAgent - -from project.tools import get_weather -from agentex.lib.core.harness import UnifiedEmitter -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - -__all__ = ["TaskDeps", "temporal_agent", "base_agent", "MODEL_NAME"] - -MODEL_NAME = "openai:gpt-4o-mini" -SYSTEM_PROMPT = """You are a helpful AI assistant with access to tools. - -Current date and time: {timestamp} - -Guidelines: -- Be concise and helpful -- Use tools when they would help answer the user's question -- If you're unsure, ask clarifying questions -- Always provide accurate information -""" - - -class TaskDeps(BaseModel): - """Per-run dependencies passed into the agent via ``deps=``. - - Pydantic AI's ``RunContext.deps`` is the canonical place to thread - request-scoped data (like the Agentex task_id) into tools and event - handlers — including code that runs inside Temporal activities. - """ - - task_id: str - # When set, the event handler nests per-tool-call spans under this span. - # Typically the ID of the per-turn span opened by the workflow. - parent_span_id: str | None = None - - -def _build_base_agent() -> Agent[TaskDeps, str]: - """Build the underlying Pydantic AI agent with tools registered. - - Tools must be registered BEFORE the agent is wrapped in TemporalAgent; - changes to tool registration after wrapping are not reflected. - """ - agent: Agent[TaskDeps, str] = Agent( - MODEL_NAME, - deps_type=TaskDeps, - system_prompt=SYSTEM_PROMPT.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), - ) - agent.tool_plain(get_weather) - return agent - - -async def event_handler( - run_context: RunContext[TaskDeps], - events: AsyncIterable[AgentStreamEvent], -) -> None: - """Stream Pydantic AI events to Agentex via the unified surface. - - Pydantic AI calls this with the live event stream as soon as the model - activity begins emitting parts. Because the handler runs inside the activity - (not the workflow), it can freely make non-deterministic Redis + tracing - writes. - - The UnifiedEmitter is constructed from ``deps`` (task_id + parent_span_id), - so tool spans nest under the workflow's per-turn span and messages auto-send - to the task stream. The auto_send path delivers streamed tool requests - natively, so no coalescing workaround is needed. - """ - emitter = UnifiedEmitter( - task_id=run_context.deps.task_id, - trace_id=run_context.deps.task_id, - parent_span_id=run_context.deps.parent_span_id, - ) - turn = PydanticAITurn(events, model=MODEL_NAME) - await emitter.auto_send_turn(turn) - - -# Construct the durable agent at module load time so that the PydanticAIPlugin -# can auto-discover its activities via the workflow's ``__pydantic_ai_agents__`` -# attribute. -base_agent = _build_base_agent() -temporal_agent: TemporalAgent[TaskDeps, str] = TemporalAgent( - base_agent, - name="harness_pydantic_ai_agent", - event_stream_handler=event_handler, -) diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py deleted file mode 100644 index 4b4d43d19..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/run_worker.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Temporal worker for the harness Pydantic AI test agent. - -Run as a separate long-lived process alongside the ACP HTTP server. The worker -polls Temporal for workflow + activity tasks and executes them. - -The ``PydanticAIPlugin`` reads ``__pydantic_ai_agents__`` off the workflow class -and registers every model/tool activity the TemporalAgent needs — so we don't -have to enumerate activities by hand here. -""" - -import asyncio - -from pydantic_ai.durable_exec.temporal import PydanticAIPlugin - -from project.workflow import HarnessPydanticAiWorkflow -from agentex.lib.utils.debug import setup_debug_if_enabled -from agentex.lib.utils.logging import make_logger -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.activities import get_all_activities -from agentex.lib.core.temporal.workers.worker import AgentexWorker - -environment_variables = EnvironmentVariables.refresh() -logger = make_logger(__name__) - - -async def main(): - setup_debug_if_enabled() - - task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE - if task_queue_name is None: - raise ValueError("WORKFLOW_TASK_QUEUE is not set") - - # get_all_activities() returns the built-in Agentex activities (state, - # messages, streaming, tracing). Pydantic AI's TemporalAgent activities are - # auto-registered by PydanticAIPlugin via __pydantic_ai_agents__. - worker = AgentexWorker( - task_queue=task_queue_name, - plugins=[PydanticAIPlugin()], - ) - - await worker.run( - activities=get_all_activities(), - workflow=HarnessPydanticAiWorkflow, - ) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py deleted file mode 100644 index bbd6c5200..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/tools.py +++ /dev/null @@ -1,24 +0,0 @@ -"""Tool definitions for the Temporal harness Pydantic AI agent. - -These functions are registered on the base Pydantic AI agent. When the agent -is wrapped in ``TemporalAgent``, each tool call becomes its own Temporal -activity automatically — independently retryable and observable. - -Tools must be ``async`` because Pydantic AI's Temporal integration requires -it: non-async tools would run in threads, which is non-deterministic and -unsafe for Temporal replay. -""" - -from __future__ import annotations - - -async def get_weather(city: str) -> str: - """Get the current weather for a city. - - Args: - city: The name of the city to get weather for. - - Returns: - A string describing the weather conditions. - """ - return f"The weather in {city} is sunny and 72°F" diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py deleted file mode 100644 index 9a01be7de..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/project/workflow.py +++ /dev/null @@ -1,137 +0,0 @@ -"""Temporal workflow for the harness Pydantic AI test agent. - -The workflow holds task state durably across crashes. Its signal handler -delegates the actual agent run to ``temporal_agent.run(...)`` — which internally -schedules model and tool activities, each independently durable. The -``event_stream_handler`` registered on ``temporal_agent`` (see project.agent) -pushes streaming deltas through the unified harness surface while the model -activity runs. - -Multi-turn memory is kept on the workflow instance itself -(``self._message_history``). Temporal's workflow state is already durable and -replay-safe, so unlike the async-base agent we don't need an external -``adk.state`` round-trip. -""" - -from __future__ import annotations - -import os -import json -from typing import TYPE_CHECKING - -from temporalio import workflow - -from agentex.lib import adk -from project.agent import TaskDeps, temporal_agent -from agentex.lib.types.acp import SendEventParams, CreateTaskParams -from agentex.lib.types.tracing import SGPTracingProcessorConfig -from agentex.lib.utils.logging import make_logger -from agentex.types.text_content import TextContent -from agentex.lib.environment_variables import EnvironmentVariables -from agentex.lib.core.temporal.types.workflow import SignalName -from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow -from agentex.lib.core.tracing.tracing_processor_manager import ( - add_tracing_processor_config, -) - -if TYPE_CHECKING: - from pydantic_ai.messages import ModelMessage - -add_tracing_processor_config( - SGPTracingProcessorConfig( - sgp_api_key=os.environ.get("SGP_API_KEY", ""), - sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), - sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), - ) -) - -environment_variables = EnvironmentVariables.refresh() - -if environment_variables.WORKFLOW_NAME is None: - raise ValueError("Environment variable WORKFLOW_NAME is not set") -if environment_variables.AGENT_NAME is None: - raise ValueError("Environment variable AGENT_NAME is not set") - -logger = make_logger(__name__) - - -@workflow.defn(name=environment_variables.WORKFLOW_NAME) -class HarnessPydanticAiWorkflow(BaseWorkflow): - """Long-running Temporal workflow that delegates each turn to a Pydantic AI TemporalAgent. - - The ``__pydantic_ai_agents__`` attribute is the marker the - ``PydanticAIPlugin`` looks for at worker startup: it pulls - ``temporal_agent.temporal_activities`` off this list and registers them on - the worker automatically — so we don't have to list activities by hand in - ``run_worker.py``. - """ - - __pydantic_ai_agents__ = [temporal_agent] - - def __init__(self): - super().__init__(display_name=environment_variables.AGENT_NAME) - self._complete_task = False - self._turn_number = 0 - # Conversation history accumulated across turns. Each entry is a - # pydantic-ai ``ModelMessage``. Temporal replays the activity that - # produced these messages, so the list is rebuilt deterministically if - # the workflow ever recovers from a crash. - self._message_history: list["ModelMessage"] = [] - - @workflow.signal(name=SignalName.RECEIVE_EVENT) - async def on_task_event_send(self, params: SendEventParams) -> None: - """Handle a new user message: echo it, then run the agent durably.""" - logger.info(f"Received task event: {params.task.id}") - self._turn_number += 1 - - # Echo the user's message so it shows up in the UI as a chat bubble. - await adk.messages.create(task_id=params.task.id, content=params.event.content) - - async with adk.tracing.span( - trace_id=params.task.id, - task_id=params.task.id, - name=f"Turn {self._turn_number}", - input={"message": params.event.content.content}, - ) as span: - # temporal_agent.run() schedules a model activity, per-tool - # activities, and the event_stream_handler activity (which pushes - # deltas through the unified surface). Passing ``message_history`` - # makes the run remember prior turns. - result = await temporal_agent.run( - params.event.content.content, - message_history=self._message_history, - deps=TaskDeps( - task_id=params.task.id, - parent_span_id=span.id if span else None, - ), - ) - # Persist the new full history (user + assistant + any tool rounds) - # so the next turn picks up from here. - self._message_history = list(result.all_messages()) - if span: - span.output = {"final_output": result.output} - - @workflow.run - async def on_task_create(self, params: CreateTaskParams) -> str: - """Workflow entry point — keep the conversation alive for incoming signals.""" - logger.info(f"Task created: {params.task.id}") - - await adk.messages.create( - task_id=params.task.id, - content=TextContent( - author="agent", - content=( - f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n" - f"Send me a message and I'll respond using a Pydantic AI agent backed by Temporal." - ), - ), - ) - - await workflow.wait_condition(lambda: self._complete_task, timeout=None) - return "Task completed" - - @workflow.signal - async def complete_task_signal(self) -> None: - """Graceful workflow shutdown signal.""" - logger.info("Received complete_task signal") - self._complete_task = True diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml deleted file mode 100644 index 4d9039640..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/pyproject.toml +++ /dev/null @@ -1,38 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "at-harness-pydantic-ai" -version = "0.1.0" -description = "A Temporal-backed Pydantic AI harness test agent using the unified emitter surface" -readme = "README.md" -requires-python = ">=3.12" -dependencies = [ - "agentex-sdk", - "scale-gp", - "temporalio>=1.18.2", - "pydantic-ai-slim[openai]>=1.0,<2", -] - -[project.optional-dependencies] -dev = [ - "pytest", - "pytest-asyncio", - "httpx", - "black", - "isort", - "flake8", - "debugpy>=1.8.15", -] - -[tool.hatch.build.targets.wheel] -packages = ["project"] - -[tool.black] -line-length = 88 -target-version = ['py312'] - -[tool.isort] -profile = "black" -line_length = 88 diff --git a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py b/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py deleted file mode 100644 index a5b90ca34..000000000 --- a/examples/tutorials/10_async/10_temporal/harness_pydantic_ai/tests/test_agent.py +++ /dev/null @@ -1,114 +0,0 @@ -"""Live tests for the Temporal harness Pydantic AI agent. - -These tests require a running agent (Temporal + Redis + ACP server + worker) and -exercise the unified-surface event_stream_handler end-to-end over the wire. They -mirror the ``at110`` temporal tutorial tests but target this harness agent. - -Offline coverage of the same wiring (TestModel + fake streaming/tracing) lives -in ``tests/lib/core/harness/test_harness_pydantic_ai_temporal.py`` in the SDK repo. - -To run these tests: -1. Make sure the agent is running (worker + ACP server) -2. Set AGENTEX_API_BASE_URL if not using the default -3. Run: pytest tests/test_agent.py -v -""" - -import os -import uuid - -import pytest -import pytest_asyncio -from test_utils.async_utils import poll_messages, send_event_and_poll_yielding - -from agentex import AsyncAgentex -from agentex.types.task_message import TaskMessage -from agentex.types.agent_rpc_params import ParamsCreateTaskRequest - -AGENTEX_API_BASE_URL = os.environ.get("AGENTEX_API_BASE_URL", "http://localhost:5003") -AGENT_NAME = os.environ.get("AGENT_NAME", "at-harness-pydantic-ai") - - -@pytest_asyncio.fixture -async def client(): - client = AsyncAgentex(base_url=AGENTEX_API_BASE_URL) - yield client - await client.close() - - -@pytest.fixture -def agent_name(): - return AGENT_NAME - - -@pytest_asyncio.fixture -async def agent_id(client, agent_name): - agents = await client.agents.list() - for agent in agents: - if agent.name == agent_name: - return agent.id - raise ValueError(f"Agent with name {agent_name} not found.") - - -class TestNonStreamingEvents: - """Test that the Temporal-backed harness agent responds and uses tools.""" - - @pytest.mark.asyncio - async def test_send_event_and_poll(self, client: AsyncAgentex, agent_id: str): - """Drive a full turn: create task, send a weather question, verify tool round-trip.""" - task_response = await client.agents.create_task(agent_id, params=ParamsCreateTaskRequest(name=uuid.uuid1().hex)) - task = task_response.result - assert task is not None - - # Wait for the welcome message from on_task_create - task_creation_found = False - async for message in poll_messages( - client=client, - task_id=task.id, - timeout=30, - sleep_interval=1.0, - ): - assert isinstance(message, TaskMessage) - if message.content and message.content.type == "text" and message.content.author == "agent": - task_creation_found = True - break - assert task_creation_found, "Task creation welcome message not found" - - # Ask about weather — the agent should call get_weather - seen_tool_request = False - seen_tool_response = False - final_message = None - async for message in send_event_and_poll_yielding( - client=client, - agent_id=agent_id, - task_id=task.id, - user_message="What is the weather in San Francisco?", - timeout=60, - sleep_interval=1.0, - ): - assert isinstance(message, TaskMessage) - - if message.content and message.content.type == "tool_request": - seen_tool_request = True - if message.content and message.content.type == "tool_response": - seen_tool_response = True - if final_message and getattr(final_message, "streaming_status", None) == "DONE": - break - - if message.content and message.content.type == "text" and message.content.author == "agent": - final_message = message - content_length = len(getattr(message.content, "content", "") or "") - if message.streaming_status == "DONE" and content_length > 0: - if not seen_tool_request or seen_tool_response: - break - - assert seen_tool_request, "Expected a tool_request (agent calling get_weather)" - assert seen_tool_response, "Expected a tool_response (get_weather result)" - assert final_message is not None, "Expected a final agent text message" - final_text = getattr(final_message.content, "content", None) if final_message.content else None - assert isinstance(final_text, str) and len(final_text) > 0 - # The get_weather tool always returns "72°F" — the response should mention it. - assert "72" in final_text, "Expected weather response to mention 72°F" - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) From 2d0e70976cd9cf1619d41566a9112c0811c306dd Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 19:39:34 -0400 Subject: [PATCH 05/12] refactor(harness)!: remove deprecated tracing handlers; migrate CLI templates to the unified surface BREAKING CHANGE: removes create_langgraph_tracing_handler / create_pydantic_ai_tracing_handler and their handler classes (AgentexLangGraphTracingHandler / AgentexPydanticAITracingHandler) from the public adk surface. Span tracing is now derived from the canonical stream by UnifiedEmitter. Migrates the five sync-/default-/temporal- pydantic-ai and langgraph CLI templates onto UnifiedEmitter + the per-harness Turn wrappers (mirroring the migrated tutorials), drops the now-dead tracing_handler parameter from the pydantic-ai sync/async/turn modules, deletes the deprecated-path tests, and trims the resolved AGX1-377/378 workaround markers to plain current-contract comments. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/agentex/lib/adk/__init__.py | 4 - .../lib/adk/_modules/_langgraph_async.py | 18 +- .../lib/adk/_modules/_langgraph_sync.py | 4 +- .../lib/adk/_modules/_langgraph_tracing.py | 273 ---------------- .../lib/adk/_modules/_langgraph_turn.py | 12 +- .../lib/adk/_modules/_pydantic_ai_async.py | 19 +- .../lib/adk/_modules/_pydantic_ai_sync.py | 38 +-- .../lib/adk/_modules/_pydantic_ai_tracing.py | 221 ------------- .../lib/adk/_modules/_pydantic_ai_turn.py | 12 +- .../default-langgraph/project/acp.py.j2 | 24 +- .../default-pydantic-ai/project/acp.py.j2 | 23 +- .../sync-langgraph/project/acp.py.j2 | 22 +- .../sync-pydantic-ai/project/acp.py.j2 | 23 +- .../temporal-pydantic-ai/project/agent.py.j2 | 27 +- src/agentex/lib/core/harness/auto_send.py | 4 +- src/agentex/lib/core/harness/tracer.py | 2 +- .../lib/core/services/adk/providers/openai.py | 9 +- .../adk/providers/test_openai_activities.py | 2 +- tests/lib/adk/test_langgraph_sync.py | 22 -- tests/lib/adk/test_langgraph_sync_unified.py | 7 +- tests/lib/adk/test_pydantic_ai_async.py | 292 +----------------- tests/lib/adk/test_pydantic_ai_sync.py | 84 ----- tests/lib/adk/test_pydantic_ai_turn.py | 4 +- tests/lib/core/harness/conformance/runner.py | 19 +- .../harness/conformance/test_conformance.py | 13 +- .../test_pydantic_ai_conformance.py | 12 +- tests/lib/core/harness/test_auto_send.py | 6 +- .../harness/test_harness_langgraph_async.py | 8 +- .../harness/test_harness_langgraph_sync.py | 2 +- .../harness/test_harness_pydantic_ai_async.py | 8 +- 30 files changed, 128 insertions(+), 1086 deletions(-) delete mode 100644 src/agentex/lib/adk/_modules/_langgraph_tracing.py delete mode 100644 src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py diff --git a/src/agentex/lib/adk/__init__.py b/src/agentex/lib/adk/__init__.py index fedd52f7a..36918af30 100644 --- a/src/agentex/lib/adk/__init__.py +++ b/src/agentex/lib/adk/__init__.py @@ -6,13 +6,11 @@ from agentex.lib.adk._modules.agents import AgentsModule from agentex.lib.adk._modules.agent_task_tracker import AgentTaskTrackerModule from agentex.lib.adk._modules.checkpointer import create_checkpointer -from agentex.lib.adk._modules._langgraph_tracing import create_langgraph_tracing_handler from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events from agentex.lib.adk._modules._langgraph_messages import emit_langgraph_messages from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events from agentex.lib.adk._modules._pydantic_ai_async import stream_pydantic_ai_events from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events -from agentex.lib.adk._modules._pydantic_ai_tracing import create_pydantic_ai_tracing_handler from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events from agentex.lib.adk._modules._claude_code_turn import ( ClaudeCodeTurn, @@ -66,14 +64,12 @@ "agent_task_tracker", # Checkpointing / LangGraph "create_checkpointer", - "create_langgraph_tracing_handler", "stream_langgraph_events", "emit_langgraph_messages", "convert_langgraph_to_agentex_events", # Pydantic AI "stream_pydantic_ai_events", "convert_pydantic_ai_to_agentex_events", - "create_pydantic_ai_tracing_handler", # Claude Code "convert_claude_code_to_agentex_events", "ClaudeCodeTurn", diff --git a/src/agentex/lib/adk/_modules/_langgraph_async.py b/src/agentex/lib/adk/_modules/_langgraph_async.py index 02ef059eb..4d95fc177 100644 --- a/src/agentex/lib/adk/_modules/_langgraph_async.py +++ b/src/agentex/lib/adk/_modules/_langgraph_async.py @@ -11,9 +11,9 @@ harness adapter (pydantic-ai, openai-agents, etc.). The public signature and return type are preserved identically. -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events -(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` -handles Full events correctly; no coalescing wrapper is needed. +LangGraph emits tool requests as ``StreamTaskMessageFull`` events (from +"updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles +Full events correctly; no coalescing wrapper is needed. """ from agentex.lib.utils.temporal import workflow_now_if_in_workflow @@ -35,11 +35,11 @@ async def stream_langgraph_events(stream, task_id: str) -> str: cross-harness consistency. Behavior is identical to the previous bespoke implementation (verified by characterization tests in test_langgraph_async.py). - AGX1-377 note: LangGraph emits tool requests as ``Full`` events (from "updates"), - NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events + LangGraph emits tool requests as ``Full`` events (from "updates"), NOT + Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events correctly; no coalescing wrapper is needed. - AGX1-378 note: ``created_at`` is set from ``workflow.now()`` when called inside a + ``created_at`` is set from ``workflow.now()`` when called inside a Temporal workflow, matching the pattern used by the openai/litellm providers. Outside a workflow (plain async activities, sync agents) it is ``None`` and the server's wall clock is used. @@ -54,10 +54,10 @@ async def stream_langgraph_events(stream, task_id: str) -> str: from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - # AGX1-377 note: LangGraph emits tool requests as Full events (from "updates"), - # NOT Start+Delta+Done like pydantic-ai. auto_send handles Full events correctly; + # LangGraph emits tool requests as Full events (from "updates"), NOT + # Start+Delta+Done like pydantic-ai. auto_send handles Full events correctly; # no coalescing wrapper is needed. - # AGX1-378: stamp messages with workflow.now() inside Temporal for deterministic + # Stamp messages with workflow.now() inside Temporal for deterministic # created_at ordering; falls back to None (server wall clock) outside a workflow. turn = LangGraphTurn(stream, model=None) emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None) diff --git a/src/agentex/lib/adk/_modules/_langgraph_sync.py b/src/agentex/lib/adk/_modules/_langgraph_sync.py index 48231a87d..a1744304b 100644 --- a/src/agentex/lib/adk/_modules/_langgraph_sync.py +++ b/src/agentex/lib/adk/_modules/_langgraph_sync.py @@ -48,8 +48,8 @@ async def convert_langgraph_to_agentex_events( Supports both regular models (chunk.content is a str) and reasoning models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks). - AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` (from - "updates" events), NOT Start+Delta+Done like pydantic-ai. No coalesce_tool_requests + LangGraph emits tool requests as ``StreamTaskMessageFull`` (from "updates" + events), NOT Start+Delta+Done like pydantic-ai. No coalesce_tool_requests option is needed for LangGraph. Args: diff --git a/src/agentex/lib/adk/_modules/_langgraph_tracing.py b/src/agentex/lib/adk/_modules/_langgraph_tracing.py deleted file mode 100644 index 2162201e1..000000000 --- a/src/agentex/lib/adk/_modules/_langgraph_tracing.py +++ /dev/null @@ -1,273 +0,0 @@ -"""LangChain callback handler that creates Agentex spans for LLM calls and tool executions. - -.. deprecated:: - ``AgentexLangGraphTracingHandler`` and ``create_langgraph_tracing_handler`` are - superseded by the unified harness surface (``LangGraphTurn`` + - ``UnifiedEmitter``), which derives spans automatically from the canonical - event stream without requiring a LangChain callback handler. - - They remain importable and functional for backward compatibility, but new - agents should use the unified path instead. -""" -# ruff: noqa: ARG002 -# Callback methods must accept all arguments defined by LangChain's AsyncCallbackHandler interface. - -from __future__ import annotations - -from uuid import UUID -from typing import Any, override - -from langchain_core.outputs import LLMResult -from langchain_core.messages import BaseMessage -from langchain_core.callbacks import AsyncCallbackHandler - -from agentex.types.span import Span -from agentex.lib.utils.logging import make_logger -from agentex.lib.adk._modules.tracing import TracingModule - -logger = make_logger(__name__) - - -class AgentexLangGraphTracingHandler(AsyncCallbackHandler): - """Async LangChain callback handler that records Agentex tracing spans. - - Creates child spans under a parent span for each LLM call and tool execution. - Designed to be passed via ``config={"callbacks": [handler]}`` to LangGraph's - ``graph.astream()`` or ``graph.ainvoke()``. - - Span hierarchy produced:: - - (e.g. "message" turn-level span) - ├── llm: (LLM call) - ├── tool: (tool execution) - └── llm: (LLM call) - - .. deprecated:: - Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified - harness derives equivalent spans from the canonical event stream, - removing the need for a LangChain callback handler entirely. - """ - - def __init__( - self, - trace_id: str, - parent_span_id: str | None = None, - tracing: TracingModule | None = None, - ) -> None: - super().__init__() - self._trace_id = trace_id - self._parent_span_id = parent_span_id - # Lazily initialise TracingModule so the httpx client is created - # inside the *running* event-loop (not at import/construction time). - self._tracing_eager = tracing - self._tracing_lazy: TracingModule | None = None - # Map run_id → Span for in-flight spans - self._spans: dict[UUID, Span] = {} - - @property - def _tracing(self) -> TracingModule: - if self._tracing_eager is not None: - return self._tracing_eager - if self._tracing_lazy is None: - self._tracing_lazy = TracingModule() - return self._tracing_lazy - - # ------------------------------------------------------------------ - # LLM lifecycle - # ------------------------------------------------------------------ - - @override - async def on_chat_model_start( - self, - serialized: dict[str, Any], - messages: list[list[BaseMessage]], - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - **kwargs: Any, - ) -> None: - model_name = (metadata or {}).get("ls_model_name", "") or _extract_model_name(serialized) - span = await self._tracing.start_span( - trace_id=self._trace_id, - name=f"llm:{model_name}" if model_name else "llm", - input=_serialize_messages(messages), - parent_id=self._parent_span_id, - data={"__span_type__": "COMPLETION"}, - ) - if span: - self._spans[run_id] = span - - @override - async def on_llm_end( - self, - response: LLMResult, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = _serialize_llm_result(response) - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - @override - async def on_llm_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = {"error": str(error)} - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - # ------------------------------------------------------------------ - # Tool lifecycle - # ------------------------------------------------------------------ - - @override - async def on_tool_start( - self, - serialized: dict[str, Any], - input_str: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - tags: list[str] | None = None, - metadata: dict[str, Any] | None = None, - inputs: dict[str, Any] | None = None, - **kwargs: Any, - ) -> None: - tool_name = serialized.get("name", "") or serialized.get("id", [""])[-1] - span = await self._tracing.start_span( - trace_id=self._trace_id, - name=f"tool:{tool_name}" if tool_name else "tool", - input={"input": input_str}, - parent_id=self._parent_span_id, - data={"__span_type__": "CUSTOM"}, - ) - if span: - self._spans[run_id] = span - - @override - async def on_tool_end( - self, - output: str, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = {"output": output} - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - @override - async def on_tool_error( - self, - error: BaseException, - *, - run_id: UUID, - parent_run_id: UUID | None = None, - **kwargs: Any, - ) -> None: - span = self._spans.pop(run_id, None) - if span is None: - return - span.output = {"error": str(error)} - await self._tracing.end_span(trace_id=self._trace_id, span=span) - - -# ------------------------------------------------------------------ -# Helpers -# ------------------------------------------------------------------ - - -def _extract_model_name(serialized: dict[str, Any]) -> str: - """Best-effort model name extraction from the serialized callback dict.""" - kwargs = serialized.get("kwargs", {}) - return kwargs.get("model_name", "") or kwargs.get("model", "") - - -def _serialize_messages(messages: list[list[BaseMessage]]) -> dict[str, Any]: - """Serialize LangChain messages into a JSON-safe dict for the span input.""" - result: list[dict[str, Any]] = [] - for batch in messages: - for msg in batch: - entry: dict[str, Any] = {"type": msg.type, "content": msg.content} - tool_calls = getattr(msg, "tool_calls", None) - if tool_calls: - entry["tool_calls"] = tool_calls - result.append(entry) - return {"messages": result} - - -def _serialize_llm_result(response: LLMResult) -> dict[str, Any]: - """Serialize an LLMResult into a JSON-safe dict for the span output.""" - output: dict[str, Any] = {} - if response.generations: - last_gen = response.generations[-1] - if last_gen: - gen = last_gen[-1] - msg = getattr(gen, "message", None) - - # For reasoning models, content is a list of typed blocks. - # Extract text from the blocks instead of relying on gen.text. - if msg and isinstance(msg.content, list): - text_parts: list[str] = [] - for block in msg.content: - if isinstance(block, dict): - if block.get("type") == "text": - text_parts.append(block.get("text", "")) - output["content"] = "".join(text_parts) if text_parts else gen.text - else: - output["content"] = gen.text - - if msg and hasattr(msg, "tool_calls") and msg.tool_calls: - output["tool_calls"] = [{"name": tc["name"], "args": tc["args"]} for tc in msg.tool_calls] - return output - - -def create_langgraph_tracing_handler( - trace_id: str, - parent_span_id: str | None = None, -) -> AgentexLangGraphTracingHandler: - """Create a LangChain callback handler that records Agentex tracing spans. - - Pass the returned handler to LangGraph via ``config={"callbacks": [handler]}``. - - Args: - trace_id: The trace ID (typically the task/thread ID). - parent_span_id: Optional parent span ID to nest LLM/tool spans under. - - Returns: - An ``AgentexLangGraphTracingHandler`` instance ready to use as a LangChain callback. - - .. deprecated:: - Use ``LangGraphTurn`` with ``UnifiedEmitter`` instead. The unified harness - derives equivalent spans from the canonical event stream automatically, with - no LangChain callback required:: - - from agentex.lib.core.harness.emitter import UnifiedEmitter - from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - - turn = LangGraphTurn(stream) - emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=span_id) - result = await emitter.auto_send_turn(turn) - - This function remains available for backward compatibility. - """ - return AgentexLangGraphTracingHandler( - trace_id=trace_id, - parent_span_id=parent_span_id, - ) diff --git a/src/agentex/lib/adk/_modules/_langgraph_turn.py b/src/agentex/lib/adk/_modules/_langgraph_turn.py index da8ff0e7c..84c5c7838 100644 --- a/src/agentex/lib/adk/_modules/_langgraph_turn.py +++ b/src/agentex/lib/adk/_modules/_langgraph_turn.py @@ -4,9 +4,9 @@ ``langgraph_usage_to_turn_usage`` helper that maps LangGraph's ``AIMessage.usage_metadata`` onto the framework-agnostic ``TurnUsage`` model. -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` events -(from "updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` -handles Full events correctly; no coalescing wrapper is needed. +LangGraph emits tool requests as ``StreamTaskMessageFull`` events (from +"updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles +Full events correctly; no coalescing wrapper is needed. """ from __future__ import annotations @@ -111,9 +111,9 @@ class LangGraphTurn: # Async / temporal result = await emitter.auto_send_turn(turn) - AGX1-377 note: LangGraph tool requests are ``StreamTaskMessageFull`` (from - "updates"), NOT Start+Delta+Done like pydantic-ai. No ``coalesce_tool_requests`` - option is needed. + LangGraph tool requests are ``StreamTaskMessageFull`` (from "updates"), NOT + Start+Delta+Done like pydantic-ai. No ``coalesce_tool_requests`` option is + needed. Usage data is captured lazily via the ``on_final_ai_message`` callback and is only valid after ``events`` has been fully consumed. Multi-step turns diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py b/src/agentex/lib/adk/_modules/_pydantic_ai_async.py index 85abfb845..5f9514f36 100644 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py +++ b/src/agentex/lib/adk/_modules/_pydantic_ai_async.py @@ -11,36 +11,22 @@ when a context is closed without deltas). This matches the ``auto_send`` convention used by all other async/Temporal harnesses. -Tracing is opt-in via a ``tracing_handler`` parameter — see -``create_pydantic_ai_tracing_handler`` in -``agentex.lib.adk._modules._pydantic_ai_tracing``. +Tracing is derived automatically from the event stream by the emitter when +a ``trace_id`` is provided to the ``UnifiedEmitter``. """ from __future__ import annotations -from typing import TYPE_CHECKING - -if TYPE_CHECKING: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - async def stream_pydantic_ai_events( stream, task_id: str, - tracing_handler: "AgentexPydanticAITracingHandler | None" = None, ) -> str: """Stream Pydantic AI events to Agentex via Redis. Args: stream: Async iterator yielded by ``agent.run_stream_events(...)``. task_id: The Agentex task ID to stream messages to. - tracing_handler: Optional handler from - ``create_pydantic_ai_tracing_handler(...)``. When provided, each - tool call in the run is also recorded as an Agentex child span - beneath the handler's configured ``parent_span_id``. Streaming - behavior is unchanged when omitted. Returns: The accumulated text content of the **last** text part in the run. @@ -54,7 +40,6 @@ async def stream_pydantic_ai_events( turn = PydanticAITurn( stream, model=None, - tracing_handler=tracing_handler, ) emitter = UnifiedEmitter( task_id=task_id, diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py index e4ac31e7e..0f9aaeb55 100644 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py +++ b/src/agentex/lib/adk/_modules/_pydantic_ai_sync.py @@ -41,14 +41,9 @@ async def handle_message_send(params): import json import inspect -from typing import TYPE_CHECKING, Any, Callable, AsyncIterator +from typing import Any, Callable, AsyncIterator from pydantic_ai.run import AgentRunResultEvent - -if TYPE_CHECKING: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) from pydantic_ai.messages import ( TextPart, PartEndEvent, @@ -124,7 +119,6 @@ def _tool_return_content(result: ToolReturnPart | Any) -> Any: async def convert_pydantic_ai_to_agentex_events( stream_response: AsyncIterator[Any], - tracing_handler: "AgentexPydanticAITracingHandler | None" = None, on_result: Callable[[AgentRunResultEvent], Any] | None = None, ) -> AsyncIterator[StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone]: """Convert a Pydantic AI agent event stream into Agentex stream events. @@ -148,11 +142,6 @@ async def convert_pydantic_ai_to_agentex_events( stream_response: The async iterator yielded by Pydantic AI's ``agent.run_stream_events(...)`` context manager (or a stream of ``AgentStreamEvent`` items received in an ``event_stream_handler``). - tracing_handler: Optional handler from - ``create_pydantic_ai_tracing_handler(...)``. When provided, each - tool call in the run is also recorded as an Agentex child span - beneath the handler's configured ``parent_span_id``. Streaming - behavior is unchanged when omitted. on_result: Optional callback invoked with the terminal ``AgentRunResultEvent`` when the run completes. Both sync and async callables are accepted. No ``StreamTaskMessage*`` events are @@ -306,26 +295,6 @@ async def convert_pydantic_ai_to_agentex_events( if message_index is None: continue yield StreamTaskMessageDone(type="done", index=message_index) - # Tool-call parts end with the model's full args known. Open a - # tracing child span for the tool execution now; close it when - # FunctionToolResultEvent arrives below. - if tracing_handler is not None and isinstance(event.part, ToolCallPart) and event.part.tool_call_id: - args: dict[str, Any] | str | None - raw_args = event.part.args - if isinstance(raw_args, dict): - args = dict(raw_args) - elif isinstance(raw_args, str): - try: - args = json.loads(raw_args) if raw_args else {} - except json.JSONDecodeError: - args = {"_raw": raw_args} - else: - args = {} - await tracing_handler.on_tool_start( - tool_call_id=event.part.tool_call_id, - tool_name=event.part.tool_name, - arguments=args, - ) elif isinstance(event, FunctionToolResultEvent): result = event.part @@ -345,11 +314,6 @@ async def convert_pydantic_ai_to_agentex_events( content=content_payload, ), ) - if tracing_handler is not None and tool_call_id: - await tracing_handler.on_tool_end( - tool_call_id=tool_call_id, - result=content_payload, - ) elif isinstance(event, (FunctionToolCallEvent, FinalResultEvent, AgentRunResultEvent)): # Already covered by PartStart/PartDelta/PartEnd events above, or diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py b/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py deleted file mode 100644 index e199d0a8c..000000000 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py +++ /dev/null @@ -1,221 +0,0 @@ -"""Tracing handler that records Agentex spans for tool calls in a pydantic-ai agent run. - -.. deprecated:: - ``AgentexPydanticAITracingHandler`` and ``create_pydantic_ai_tracing_handler`` - are superseded by the unified harness surface (``UnifiedEmitter`` in - ``agentex.lib.core.harness``). The unified surface derives tool and - reasoning spans directly from the canonical ``StreamTaskMessage*`` stream, - so no separate handler is required. Both symbols remain fully importable - and functional; they will be removed in a future release. New code should - construct a ``UnifiedEmitter`` with a ``trace_id`` instead: - - from agentex.lib.core.harness import UnifiedEmitter - from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - - emitter = UnifiedEmitter(task_id=task_id, trace_id=trace_id, parent_span_id=parent_span_id) - turn = PydanticAITurn(agent.run_stream_events(prompt), model="openai:gpt-4o") - async for event in emitter.yield_turn(turn): - yield event - -# NOTE: A runtime ``warnings.warn(..., DeprecationWarning)`` is intentionally -# omitted here. The repo's pyproject ``filterwarnings = ["error"]`` would turn -# it into a test/caller failure, and the async helper (``stream_pydantic_ai_events``) -# still threads this handler through for existing callers that lack a ``trace_id`` -# on the async path. The runtime warning and caller migration are deferred until -# ``trace_id`` threading lands on the async helper in a future API-versioning change. - -Mirrors the LangGraph tracing handler pattern: the caller creates a handler -bound to a ``trace_id`` and a ``parent_span_id``, then hands it to -``stream_pydantic_ai_events(..., tracing_handler=handler)``. The streamer -calls ``on_tool_start`` / ``on_tool_end`` as it observes the corresponding -events in the agent stream, and the handler records one Agentex child span -per tool call. - -Why a handler-on-the-streamer rather than an OpenTelemetry bridge: -pydantic-ai exposes its stream of ``AgentStreamEvent`` directly, and that -stream already contains every signal we need to record tool spans. Going -through an OTel processor would require setting up an OTel ``TracerProvider`` -plus a bridge processor — that's a much larger investment, and orthogonal -to the streaming path we already own. This handler hooks into the same -event stream the UI-streaming helper consumes, so a single pass over the -events produces both: live deltas on Redis and child spans on the AgentEx -tracing pipeline. - -Why span IDs are derived from ``tool_call_id`` instead of held in a dict: -pydantic-ai's ``TemporalAgent`` splits the agent run across one or more -Temporal activities. The ``event_stream_handler`` is invoked once per -activity, with a fresh handler instance each time. So ``on_tool_start`` -(emitted inside the model activity that issued the tool call) and -``on_tool_end`` (emitted inside the next model activity, after the tool -runs) land in different handler instances — an in-memory dict can't pair -them. Deriving the span ID deterministically from ``(trace_id, -tool_call_id)`` makes the open/close pairing stateless: ``on_tool_end`` -re-derives the same ID and PATCHes the existing span directly. - -Span hierarchy produced:: - - (e.g. "Turn N", created by the caller) - ├── tool: (one child span per tool call) - └── tool: -""" - -from __future__ import annotations - -import uuid -from typing import Any -from datetime import UTC, datetime - -from agentex import AsyncAgentex -from agentex.lib.utils.logging import make_logger -from agentex.lib.adk._modules.tracing import TracingModule -from agentex.lib.adk.utils._modules.client import create_async_agentex_client - -logger = make_logger(__name__) - - -# Stable namespace for deriving tool-call span IDs. The exact UUID value is -# arbitrary; it just needs to be a constant so the same (trace_id, tool_call_id) -# always maps to the same span ID across handler invocations. -_TOOL_SPAN_NAMESPACE = uuid.UUID("8c2f9a2b-3e4d-4b5a-9c1f-0a1b2c3d4e5f") - - -def _tool_span_id(trace_id: str, tool_call_id: str) -> str: - """Deterministic span ID for a given tool call within a trace.""" - return str(uuid.uuid5(_TOOL_SPAN_NAMESPACE, f"{trace_id}:{tool_call_id}")) - - -class AgentexPydanticAITracingHandler: - """Records Agentex tracing spans for tool calls observed in a pydantic-ai event stream. - - .. deprecated:: - Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which - derives tool and reasoning spans from the canonical ``StreamTaskMessage*`` - stream automatically when ``trace_id`` is provided. This class remains - fully functional but will be removed in a future release. New code should - use ``UnifiedEmitter`` with a trace context instead of constructing this - handler directly. - - Pass an instance to ``stream_pydantic_ai_events(..., tracing_handler=...)`` - or call ``on_tool_start`` / ``on_tool_end`` yourself if you're consuming - the event stream by hand. - """ - - def __init__( - self, - trace_id: str, - parent_span_id: str | None = None, - task_id: str | None = None, - tracing: TracingModule | None = None, - client: AsyncAgentex | None = None, - ) -> None: - self._trace_id = trace_id - self._parent_span_id = parent_span_id - # task_id on the span record (separate from trace_id) is what the - # AgentEx UI's per-task spans dropdown filters by. If you want your - # tool spans visible in that dropdown, set this to the task ID. - self._task_id = task_id - # ``_tracing`` is retained for callers / tests that want to inject a - # mocked TracingModule, even though the on_tool_* methods now go - # direct to the AgentEx client (see module docstring for why). - self._tracing_eager = tracing - self._tracing_lazy: TracingModule | None = None - # Defer client construction until first use so httpx binds to the - # running event loop (matches the TracingModule pattern). - self._client_eager = client - self._client_lazy: AsyncAgentex | None = None - - @property - def _tracing(self) -> TracingModule: - if self._tracing_eager is not None: - return self._tracing_eager - if self._tracing_lazy is None: - self._tracing_lazy = TracingModule() - return self._tracing_lazy - - @property - def _client(self) -> AsyncAgentex: - if self._client_eager is not None: - return self._client_eager - if self._client_lazy is None: - self._client_lazy = create_async_agentex_client() - return self._client_lazy - - async def on_tool_start( - self, - tool_call_id: str, - tool_name: str, - arguments: dict[str, Any] | str | None, - ) -> None: - """Open a child span for a tool call. - - Uses a deterministic span ID derived from ``tool_call_id`` so that - ``on_tool_end`` — which may run inside a different handler instance - when pydantic-ai splits the run across Temporal activities — can - close the same span without needing in-memory state. - """ - span_id = _tool_span_id(self._trace_id, tool_call_id) - await self._client.spans.create( - id=span_id, - trace_id=self._trace_id, - task_id=self._task_id, - parent_id=self._parent_span_id, - name=f"tool:{tool_name}" if tool_name else "tool", - start_time=datetime.now(UTC), - input={"arguments": arguments}, - data={"__span_type__": "CUSTOM"}, - ) - - async def on_tool_end(self, tool_call_id: str, result: Any) -> None: - """Close a child span by PATCHing its end_time and output. - - Re-derives the deterministic span ID from ``tool_call_id`` and updates - the existing span record directly. No in-memory span lookup, so this - works even when ``on_tool_start`` ran inside a different handler - instance (e.g. across pydantic-ai TemporalAgent activity boundaries). - """ - span_id = _tool_span_id(self._trace_id, tool_call_id) - await self._client.spans.update( - span_id, - end_time=datetime.now(UTC), - output={"result": result}, - ) - - async def on_tool_error(self, tool_call_id: str, error: BaseException | str) -> None: - """Close a child span with an error payload as output.""" - span_id = _tool_span_id(self._trace_id, tool_call_id) - await self._client.spans.update( - span_id, - end_time=datetime.now(UTC), - output={"error": str(error)}, - ) - - -def create_pydantic_ai_tracing_handler( - trace_id: str, - parent_span_id: str | None = None, - task_id: str | None = None, -) -> AgentexPydanticAITracingHandler: - """Create a tracing handler that records Agentex spans for pydantic-ai tool calls. - - .. deprecated:: - Superseded by ``UnifiedEmitter`` (``agentex.lib.core.harness``), which - derives tool and reasoning spans from the canonical ``StreamTaskMessage*`` - stream automatically when ``trace_id`` is provided. This function remains - fully functional but will be removed in a future release. New code should - construct a ``UnifiedEmitter`` with a trace context instead. - - Args: - trace_id: The trace ID. Typically the Agentex task ID. - parent_span_id: Optional parent span ID to nest tool spans under. If - omitted, the tool spans become trace-root spans. - task_id: Optional task ID stamped onto each span. Required for the - AgentEx UI's per-task spans dropdown to display the spans. - - Returns: - A handler suitable for passing to ``stream_pydantic_ai_events(..., tracing_handler=...)``. - """ - return AgentexPydanticAITracingHandler( - trace_id=trace_id, - parent_span_id=parent_span_id, - task_id=task_id, - ) diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py index b06172e7f..472652f5c 100644 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py +++ b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py @@ -15,7 +15,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, AsyncIterator +from typing import Any, AsyncIterator from pydantic_ai.run import AgentRunResultEvent @@ -28,9 +28,6 @@ ) from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events -if TYPE_CHECKING: - from agentex.lib.adk._modules._pydantic_ai_tracing import AgentexPydanticAITracingHandler - StreamTaskMessage = StreamTaskMessageStart | StreamTaskMessageDelta | StreamTaskMessageFull | StreamTaskMessageDone @@ -83,19 +80,17 @@ class PydanticAITurn: ``events`` is identical to the bare ``convert_pydantic_ai_to_agentex_events`` output (tool calls stream as ``Start + ToolRequestDelta + Done``, preserving argument-token streaming on the sync/yield channel). The foundation - ``auto_send`` delivers the streamed tool-request shape natively (AGX1-377), - so no coalescing is needed on either channel. + ``auto_send`` delivers the streamed tool-request shape natively, so no + coalescing is needed on either channel. """ def __init__( self, stream: AsyncIterator[Any], model: str | None = None, - tracing_handler: "AgentexPydanticAITracingHandler | None" = None, ) -> None: self._stream = stream self._model = model - self._tracing_handler = tracing_handler self._usage = TurnUsage(model=model) @property @@ -119,7 +114,6 @@ def _capture(result_event: AgentRunResultEvent) -> None: raw_stream = convert_pydantic_ai_to_agentex_events( self._stream, - tracing_handler=self._tracing_handler, on_result=_capture, ) async for ev in raw_stream: diff --git a/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 index 3309dc07e..38d393b09 100644 --- a/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/default-langgraph/project/acp.py.j2 @@ -15,13 +15,14 @@ if _litellm_key: os.environ["OPENAI_API_KEY"] = _litellm_key import agentex.lib.adk as adk -from agentex.lib.adk import create_langgraph_tracing_handler, stream_langgraph_events +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.protocol.acp import SendEventParams, CancelTaskParams, CreateTaskParams from agentex.lib.types.fastacp import AsyncACPConfig from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn from project.graph import create_graph @@ -67,24 +68,23 @@ async def handle_task_event_send(params: SendEventParams): input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - callback = create_langgraph_tracing_handler( - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - stream = graph.astream( {"messages": [{"role": "user", "content": user_message}]}, - config={ - "configurable": {"thread_id": task_id}, - "callbacks": [callback], - }, + config={"configurable": {"thread_id": task_id}}, stream_mode=["messages", "updates"], ) - final_output = await stream_langgraph_events(stream, task_id) + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + + result = await emitter.auto_send_turn(turn) if turn_span: - turn_span.output = {"final_output": final_output} + turn_span.output = {"final_output": result.final_text} @acp.on_task_create diff --git a/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 index 5692396b2..e5eabb20d 100644 --- a/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/default-pydantic-ai/project/acp.py.j2 @@ -19,21 +19,19 @@ from dotenv import load_dotenv load_dotenv() -from project.agent import create_agent +from project.agent import MODEL_NAME, create_agent from pydantic_ai.run import AgentRunResultEvent from pydantic_ai.messages import ModelMessagesTypeAdapter import agentex.lib.adk as adk -from agentex.lib.adk import ( - stream_pydantic_ai_events, - create_pydantic_ai_tracing_handler, -) from agentex.protocol.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.fastacp import AsyncACPConfig from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.utils.model_utils import BaseModel from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) @@ -125,15 +123,17 @@ async def handle_task_event_send(params: SendEventParams): input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - tracing_handler = create_pydantic_ai_tracing_handler( + # Construct the UnifiedEmitter from the ACP context so tracing is + # automatic and messages are auto-sent to the task stream (Redis). + emitter = UnifiedEmitter( + task_id=task_id, trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, - task_id=task_id, ) # Wrap the pydantic-ai event stream so we can capture the final # AgentRunResultEvent (which carries the full message list for the - # next turn) without changing the streaming-helper's signature. + # next turn) before forwarding events to the emitter. captured_messages: list[Any] = [] async def tee_messages(upstream) -> AsyncIterator[Any]: @@ -143,9 +143,8 @@ async def handle_task_event_send(params: SendEventParams): yield event async with agent.run_stream_events(user_message, message_history=previous_messages) as stream: - final_output = await stream_pydantic_ai_events( - tee_messages(stream), task_id, tracing_handler=tracing_handler - ) + turn = PydanticAITurn(tee_messages(stream), model=MODEL_NAME) + result = await emitter.auto_send_turn(turn) # Save the updated message history so the next turn picks up here. if captured_messages: @@ -158,7 +157,7 @@ async def handle_task_event_send(params: SendEventParams): ) if turn_span: - turn_span.output = {"final_output": final_output} + turn_span.output = {"final_output": result.final_text} @acp.on_task_cancel diff --git a/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 index 54538d0c9..2b8233b5d 100644 --- a/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/sync-langgraph/project/acp.py.j2 @@ -8,12 +8,13 @@ tokens and tool calls from the LangGraph graph to the Agentex frontend. from typing import AsyncGenerator import agentex.lib.adk as adk -from agentex.lib.adk import create_langgraph_tracing_handler, convert_langgraph_to_agentex_events +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.protocol.acp import SendMessageParams from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn from agentex.types.task_message_content import TaskMessageContent from agentex.types.task_message_delta import TextDelta from agentex.types.task_message_update import TaskMessageUpdate @@ -72,22 +73,21 @@ async def handle_message_send( input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - callback = create_langgraph_tracing_handler( - trace_id=thread_id, - parent_span_id=turn_span.id if turn_span else None, - ) - stream = graph.astream( {"messages": [{"role": "user", "content": user_message}]}, - config={ - "configurable": {"thread_id": thread_id}, - "callbacks": [callback], - }, + config={"configurable": {"thread_id": thread_id}}, stream_mode=["messages", "updates"], ) + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter( + task_id=thread_id, + trace_id=thread_id, + parent_span_id=turn_span.id if turn_span else None, + ) + final_text = "" - async for event in convert_langgraph_to_agentex_events(stream): + async for event in emitter.yield_turn(turn): # Accumulate text deltas for span output delta = getattr(event, "delta", None) if isinstance(delta, TextDelta) and delta.text_delta: diff --git a/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 index 4925e847f..f82dadcb6 100644 --- a/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 +++ b/src/agentex/lib/cli/templates/sync-pydantic-ai/project/acp.py.j2 @@ -15,19 +15,17 @@ from dotenv import load_dotenv load_dotenv() -from project.agent import create_agent +from project.agent import MODEL_NAME, create_agent import agentex.lib.adk as adk -from agentex.lib.adk import ( - create_pydantic_ai_tracing_handler, - convert_pydantic_ai_to_agentex_events, -) from agentex.protocol.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter from agentex.lib.types.tracing import SGPTracingProcessorConfig from agentex.lib.utils.logging import make_logger from agentex.lib.sdk.fastacp.fastacp import FastACP from agentex.types.task_message_update import TaskMessageUpdate from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config logger = make_logger(__name__) @@ -73,7 +71,7 @@ async def handle_message_send( logger.info(f"Processing message for task {task_id}") # Open a per-message turn span. Tool calls below nest underneath this - # span via the tracing handler's parent_span_id wiring. + # span via the emitter's parent_span_id wiring. async with adk.tracing.span( trace_id=task_id, task_id=task_id, @@ -81,13 +79,14 @@ async def handle_message_send( input={"message": user_message}, data={"__span_type__": "AGENT_WORKFLOW"}, ) as turn_span: - tracing_handler = create_pydantic_ai_tracing_handler( + # Construct the UnifiedEmitter from the ACP/streaming context so tracing + # is automatic: tool spans nest under this turn's span. + emitter = UnifiedEmitter( + task_id=task_id, trace_id=task_id, parent_span_id=turn_span.id if turn_span else None, - task_id=task_id, ) async with agent.run_stream_events(user_message) as stream: - async for event in convert_pydantic_ai_to_agentex_events( - stream, tracing_handler=tracing_handler - ): - yield event + turn = PydanticAITurn(stream, model=MODEL_NAME) + async for ev in emitter.yield_turn(turn): + yield ev diff --git a/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2 b/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2 index 0aa958118..82b1db269 100644 --- a/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2 +++ b/src/agentex/lib/cli/templates/temporal-pydantic-ai/project/agent.py.j2 @@ -11,9 +11,9 @@ moves into recorded activities. Streaming back to Agentex happens via ``event_stream_handler``, which receives Pydantic AI ``AgentStreamEvent``s from inside the model activity -and forwards them to Redis using the ``stream_pydantic_ai_events`` helper. -The ``task_id`` and tracing parent span ID are threaded into the handler -via ``deps``. +and forwards them through the unified harness surface +(``UnifiedEmitter.auto_send_turn`` + ``PydanticAITurn``). The ``task_id`` and +tracing parent span ID are threaded into the handler via ``deps``. """ from __future__ import annotations @@ -27,10 +27,8 @@ from project.tools import get_weather from pydantic_ai.messages import AgentStreamEvent from pydantic_ai.durable_exec.temporal import TemporalAgent -from agentex.lib.adk import ( - stream_pydantic_ai_events, - create_pydantic_ai_tracing_handler, -) +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn # Swap this for any Pydantic AI-supported model identifier # (e.g. "anthropic:claude-3-5-sonnet-latest", "openai:gpt-4o"). @@ -92,17 +90,18 @@ async def event_handler( activity (not the workflow), it can freely make non-deterministic Redis writes — including the tracing HTTP calls that record per-tool-call spans under the workflow's per-turn span (when ``parent_span_id`` is set). + + The UnifiedEmitter is constructed from ``deps`` (task_id + parent_span_id), + so tool spans nest under the workflow's per-turn span and messages auto-send + to the task stream. """ - tracing_handler = create_pydantic_ai_tracing_handler( + emitter = UnifiedEmitter( + task_id=run_context.deps.task_id, trace_id=run_context.deps.task_id, parent_span_id=run_context.deps.parent_span_id, - task_id=run_context.deps.task_id, - ) - await stream_pydantic_ai_events( - events, - run_context.deps.task_id, - tracing_handler=tracing_handler, ) + turn = PydanticAITurn(events, model=MODEL_NAME) + await emitter.auto_send_turn(turn) # Construct the durable agent at module load time so that the diff --git a/src/agentex/lib/core/harness/auto_send.py b/src/agentex/lib/core/harness/auto_send.py index 2ecd6b583..6d3883fa6 100644 --- a/src/agentex/lib/core/harness/auto_send.py +++ b/src/agentex/lib/core/harness/auto_send.py @@ -52,8 +52,8 @@ async def auto_send( final_text_parts so that multi-step turns return the LAST text segment. Full(TextContent) also overwrites final_text_parts (same semantics). - AGX1-378: created_at is forwarded to every streaming_task_message_context - call so callers can back-date message timestamps. + created_at is forwarded to every streaming_task_message_context call so + callers can back-date message timestamps. Mirrors the open/close/stream_update pattern from src/agentex/lib/adk/_modules/_langgraph_async.py: diff --git a/src/agentex/lib/core/harness/tracer.py b/src/agentex/lib/core/harness/tracer.py index 4ca4d628b..0c6167b76 100644 --- a/src/agentex/lib/core/harness/tracer.py +++ b/src/agentex/lib/core/harness/tracer.py @@ -24,7 +24,7 @@ class SpanTracer: The real TracingModule.end_span does NOT accept an output kwarg — output is recorded by mutating span.output before calling end_span, matching the pattern - used throughout the codebase (see _langgraph_tracing.py on_tool_end etc.). + used throughout the codebase. Span-lifecycle contract: the `_open` dict (span key -> span object) is scoped to a single turn. Pairing is by `key`: diff --git a/src/agentex/lib/core/services/adk/providers/openai.py b/src/agentex/lib/core/services/adk/providers/openai.py index 1ae29589d..a2513ea01 100644 --- a/src/agentex/lib/core/services/adk/providers/openai.py +++ b/src/agentex/lib/core/services/adk/providers/openai.py @@ -742,11 +742,10 @@ async def run_agent_streamed_auto_send( ) as span: heartbeat_if_in_workflow("run agent streamed auto send") - # AGX1-378 restored: created_at is now threaded through - # UnifiedEmitter.auto_send_turn -> auto_send -> every - # streaming_task_message_context call, so the first agent message of - # the turn is stamped with the workflow-supplied timestamp (e.g. - # workflow.now()) just as the original inline loop did. + # created_at is threaded through UnifiedEmitter.auto_send_turn -> + # auto_send -> every streaming_task_message_context call, so the + # first agent message of the turn is stamped with the + # workflow-supplied timestamp (e.g. workflow.now()). # The dispenser is still used below for guardrail-rejection messages, # which open their own streaming contexts directly. _take_created_at = _make_created_at_dispenser(created_at) diff --git a/tests/lib/adk/providers/test_openai_activities.py b/tests/lib/adk/providers/test_openai_activities.py index 2f89308a9..964b24545 100644 --- a/tests/lib/adk/providers/test_openai_activities.py +++ b/tests/lib/adk/providers/test_openai_activities.py @@ -653,7 +653,7 @@ def _assert_tools_conversion(self, starting_agent, tools_case, _original_tools): @patch("agents.Runner.run_streamed") async def test_run_agent_streamed_auto_send_forwards_created_at(self, mock_runner_run_streamed): - """created_at is forwarded to every streaming context opened by auto_send_turn (AGX1-378).""" + """created_at is forwarded to every streaming context opened by auto_send_turn.""" from datetime import datetime, timezone from agentex.lib.core.temporal.activities.adk.providers.openai_activities import ( diff --git a/tests/lib/adk/test_langgraph_sync.py b/tests/lib/adk/test_langgraph_sync.py index 248d18f68..6b71a2264 100644 --- a/tests/lib/adk/test_langgraph_sync.py +++ b/tests/lib/adk/test_langgraph_sync.py @@ -3,8 +3,6 @@ Covers: - Basic text, tool call, and tool response emission - on_final_ai_message callback for usage capture -- create_langgraph_tracing_handler symbol is importable and functional - (runtime DeprecationWarning removed; deprecation is docstring-only) NOTE: langchain_core imports must be deferred to test-function scope because conftest.py stubs out ``langchain_core.messages`` with MagicMock for ADK @@ -225,23 +223,3 @@ def _cb(msg): # The tool call Full event is emitted before the callback fires assert yield_order.index("event") < yield_order.index("callback") - - -class TestLangGraphTracingHandlerBackwardCompat: - def test_create_langgraph_tracing_handler_no_runtime_warning(self): - """Deprecated symbol remains importable and emits no runtime DeprecationWarning. - - The runtime warnings.warn was removed (docstring-only deprecation) to - align with PR 4/6 and avoid breaking callers under warnings-as-errors. - Using ``warnings.simplefilter("error", DeprecationWarning)`` verifies - that calling the function is safe under -W error conditions. - """ - import warnings - - from agentex.lib.adk._modules._langgraph_tracing import create_langgraph_tracing_handler - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("error", DeprecationWarning) - create_langgraph_tracing_handler(trace_id="t1", parent_span_id="p1") - - assert w == [], "create_langgraph_tracing_handler must NOT emit a runtime DeprecationWarning" diff --git a/tests/lib/adk/test_langgraph_sync_unified.py b/tests/lib/adk/test_langgraph_sync_unified.py index cfd522828..7c0eba58e 100644 --- a/tests/lib/adk/test_langgraph_sync_unified.py +++ b/tests/lib/adk/test_langgraph_sync_unified.py @@ -150,12 +150,11 @@ def fake_tracer(self): return tracer, backend async def test_tool_span_derived_from_full_events(self, fake_tracer): - """AGX1-377: SpanDeriver now handles Full tool events for LangGraph. + """SpanDeriver handles Full tool events for LangGraph. Full(ToolRequestContent) opens a tool span keyed by tool_call_id; - Full(ToolResponseContent) closes it. This bridges the previous gap where - LangGraph's Full-event path produced no spans, aligning it with - Start+Done harnesses (pydantic-ai, openai-agents). + Full(ToolResponseContent) closes it, aligning LangGraph's Full-event + path with the Start+Done harnesses (pydantic-ai, openai-agents). """ from langchain_core.messages import AIMessage, ToolMessage diff --git a/tests/lib/adk/test_pydantic_ai_async.py b/tests/lib/adk/test_pydantic_ai_async.py index 49cb6054c..737c07f1f 100644 --- a/tests/lib/adk/test_pydantic_ai_async.py +++ b/tests/lib/adk/test_pydantic_ai_async.py @@ -262,8 +262,8 @@ async def test_tool_call_opens_streaming_context_with_identity( ) -> None: """Tool requests are delivered as a streaming context (Start+Delta+Done). - AGX1-377 fix: auto_send now delivers streamed tool-request messages - natively (Start+ToolRequestDelta+Done). The streaming context is opened + auto_send delivers streamed tool-request messages natively + (Start+ToolRequestDelta+Done). The streaming context is opened at the Start event with the initial ToolRequestContent (tool_call_id + name + empty arguments), argument tokens are streamed as deltas, and the context is closed on Done. @@ -304,7 +304,7 @@ async def test_tool_call_opens_streaming_context_with_identity( assert content.tool_call_id == "c1" assert content.name == "get_weather" assert content.author == "agent" - # AGX1-377 streamed shape: initial_content has empty args (args come via delta) + # Streamed shape: initial_content has empty args (args come via delta) assert content.arguments == {} # The arg delta is delivered as a stream_update assert len(ctx.updates) == 1 @@ -657,292 +657,6 @@ async def test_part_delta_without_matching_start_is_ignored( assert final == "" -class TestTracingHandler: - """Tracing handler hooks fire alongside streaming for each tool call.""" - - @dataclass - class _RecordingHandler: - starts: list[dict[str, Any]] = field(default_factory=list) - ends: list[dict[str, Any]] = field(default_factory=list) - - async def on_tool_start(self, tool_call_id: str, tool_name: str, arguments: Any) -> None: - self.starts.append({"tool_call_id": tool_call_id, "tool_name": tool_name, "arguments": arguments}) - - async def on_tool_end(self, tool_call_id: str, result: Any) -> None: - self.ends.append({"tool_call_id": tool_call_id, "result": result}) - - async def test_handler_records_start_and_end_for_each_tool_call( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - streaming, messages = fake_adk - handler = self._RecordingHandler() - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - ] - await stream_pydantic_ai_events( - _aiter(events), - TASK_ID, - tracing_handler=handler, # type: ignore[arg-type] - ) - - # AGX1-373: tool messages arrive via streaming_task_message_context. - # Tracing is still additive — both messages are delivered AND hooks fire. - assert messages.created == [] - assert len(streaming.contexts) == 2 - assert isinstance(streaming.contexts[0].initial_content, ToolRequestContent) - assert isinstance(streaming.contexts[1].initial_content, ToolResponseContent) - # And both lifecycle hooks fired exactly once with the right payload. - assert handler.starts == [ - { - "tool_call_id": "c1", - "tool_name": "get_weather", - "arguments": {"city": "Paris"}, - } - ] - assert handler.ends == [{"tool_call_id": "c1", "result": "Sunny"}] - - async def test_handler_not_called_when_no_tool_calls_in_stream( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - handler = self._RecordingHandler() - events = [ - PartStartEvent(index=0, part=TextPart(content="")), - PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="Hello")), - PartEndEvent(index=0, part=TextPart(content="Hello")), - ] - await stream_pydantic_ai_events( - _aiter(events), - TASK_ID, - tracing_handler=handler, # type: ignore[arg-type] - ) - assert handler.starts == [] - assert handler.ends == [] - - async def test_handler_records_each_tool_in_multi_tool_run( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - """A turn with two tool calls must produce two start/end pairs in order.""" - handler = self._RecordingHandler() - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args="{}", tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="lookup_city", args=None, tool_call_id="c2"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="lookup_city", args="{}", tool_call_id="c2"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="lookup_city", content="Paris, FR", tool_call_id="c2"), - ), - ] - await stream_pydantic_ai_events( - _aiter(events), - TASK_ID, - tracing_handler=handler, # type: ignore[arg-type] - ) - - assert [s["tool_call_id"] for s in handler.starts] == ["c1", "c2"] - assert [e["tool_call_id"] for e in handler.ends] == ["c1", "c2"] - assert handler.starts[0]["tool_name"] == "get_weather" - assert handler.starts[1]["tool_name"] == "lookup_city" - - async def test_omitting_handler_is_a_no_op_for_existing_behavior( - self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] - ) -> None: - """Regression: passing no tracing handler preserves streaming behavior. - - AGX1-373: tool messages arrive via streaming_task_message_context - regardless of whether tracing_handler is passed. - """ - streaming, messages = fake_adk - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args="{}", tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - ] - await stream_pydantic_ai_events(_aiter(events), TASK_ID) - # AGX1-373: tool messages via streaming_task_message_context. - assert messages.created == [] - assert len(streaming.contexts) == 2 - content_types = [type(ctx.initial_content).__name__ for ctx in streaming.contexts] - assert content_types == ["ToolRequestContent", "ToolResponseContent"] - - -class TestPydanticAITracingHandlerDeterministicIds: - """Regression coverage for ``AgentexPydanticAITracingHandler``. - - pydantic-ai's ``TemporalAgent`` splits a single agent run across several - Temporal activities. The event_stream_handler is invoked once per - activity, with a fresh handler instance each time. So ``on_tool_start`` - (during the model activity that issued the tool call) and ``on_tool_end`` - (during the next model activity, after the tool ran) end up in DIFFERENT - handler instances — an in-memory dict can't pair them. - - The fix is deterministic span IDs derived from ``(trace_id, tool_call_id)``. - These tests lock that in. - """ - - class _RecordingClient: - """Stand-in for ``AsyncAgentex`` capturing spans.create / spans.update calls.""" - - def __init__(self) -> None: - self.creates: list[dict[str, Any]] = [] - self.updates: list[tuple[str, dict[str, Any]]] = [] - self.spans = self # so .spans.create / .spans.update resolve back here - - async def create(self, **kwargs: Any) -> Any: - self.creates.append(kwargs) - return None - - async def update(self, span_id: str, **kwargs: Any) -> Any: - self.updates.append((span_id, kwargs)) - return None - - async def test_same_tool_call_id_yields_same_span_id_across_handler_instances( - self, - ) -> None: - """The whole point of the design: two handler instances with the same - trace_id and tool_call_id resolve to the same span ID — otherwise - ``on_tool_end`` patches a different (non-existent) record and the span - in the DB never gets ``end_time`` / ``output``.""" - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client_a = self._RecordingClient() - client_b = self._RecordingClient() - - # Two independent handler instances — simulates the cross-activity - # invocation pattern in TemporalAgent. - handler_a = AgentexPydanticAITracingHandler( - trace_id="trace-1", - parent_span_id="parent-1", - task_id="task-1", - client=client_a, # type: ignore[arg-type] - ) - handler_b = AgentexPydanticAITracingHandler( - trace_id="trace-1", - parent_span_id="parent-1", - task_id="task-1", - client=client_b, # type: ignore[arg-type] - ) - - await handler_a.on_tool_start(tool_call_id="call_abc", tool_name="get_weather", arguments={"city": "Paris"}) - await handler_b.on_tool_end(tool_call_id="call_abc", result="Sunny, 72F") - - assert len(client_a.creates) == 1 - assert len(client_b.updates) == 1 - - created_span_id = client_a.creates[0]["id"] - updated_span_id = client_b.updates[0][0] - assert created_span_id == updated_span_id, ( - "on_tool_start and on_tool_end must address the same span across handler " - "instances; mismatch means tool spans will be left open and the AgentEx UI " - "will hide their trace." - ) - - async def test_different_tool_call_ids_yield_different_span_ids(self) -> None: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler = AgentexPydanticAITracingHandler( - trace_id="trace-1", - client=client, # type: ignore[arg-type] - ) - - await handler.on_tool_start("call_a", "get_weather", {"city": "Paris"}) - await handler.on_tool_start("call_b", "get_weather", {"city": "Tokyo"}) - - ids = {c["id"] for c in client.creates} - assert len(ids) == 2, "Distinct tool_call_ids must map to distinct span IDs" - - async def test_same_tool_call_id_in_different_traces_yields_different_span_ids( - self, - ) -> None: - """Span IDs are namespaced by trace_id so two unrelated runs with the - same provider-issued tool_call_id don't collide.""" - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler_t1 = AgentexPydanticAITracingHandler(trace_id="trace-1", client=client) # type: ignore[arg-type] - handler_t2 = AgentexPydanticAITracingHandler(trace_id="trace-2", client=client) # type: ignore[arg-type] - - await handler_t1.on_tool_start("call_abc", "t", None) - await handler_t2.on_tool_start("call_abc", "t", None) - - ids = {c["id"] for c in client.creates} - assert len(ids) == 2 - - async def test_on_tool_end_patches_only_end_time_and_output(self) -> None: - """Don't overwrite start_time, name, parent_id, etc. on close — only patch - the fields we have new values for. Sending start_time again could clobber - what was set at create time.""" - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler = AgentexPydanticAITracingHandler(trace_id="trace-1", client=client) # type: ignore[arg-type] - - await handler.on_tool_end("call_abc", "Sunny") - - assert len(client.updates) == 1 - _, patch_kwargs = client.updates[0] - assert set(patch_kwargs.keys()) == {"end_time", "output"}, ( - f"Unexpected fields in tool span PATCH: {set(patch_kwargs.keys())}" - ) - assert patch_kwargs["output"] == {"result": "Sunny"} - - async def test_on_tool_error_patches_error_output(self) -> None: - from agentex.lib.adk._modules._pydantic_ai_tracing import ( - AgentexPydanticAITracingHandler, - ) - - client = self._RecordingClient() - handler = AgentexPydanticAITracingHandler(trace_id="trace-1", client=client) # type: ignore[arg-type] - - await handler.on_tool_error("call_abc", RuntimeError("boom")) - - assert len(client.updates) == 1 - _, patch_kwargs = client.updates[0] - assert "error" in patch_kwargs["output"] - assert "boom" in patch_kwargs["output"]["error"] - - class TestCleanupOnException: async def test_open_contexts_are_closed_on_iterator_failure( self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule] diff --git a/tests/lib/adk/test_pydantic_ai_sync.py b/tests/lib/adk/test_pydantic_ai_sync.py index 080bc5be8..3d6d7beba 100644 --- a/tests/lib/adk/test_pydantic_ai_sync.py +++ b/tests/lib/adk/test_pydantic_ai_sync.py @@ -290,90 +290,6 @@ async def test_tool_retry_prompt_surfaces_as_response(self): assert out[0].content.content == "bad arguments" -class TestTracingHandlerSync: - """The sync converter has the same opt-in tracing-handler contract as the - async streamer: pass a handler and the converter calls ``on_tool_start`` / - ``on_tool_end`` for each tool call. Streaming yields are unchanged when - omitted.""" - - class _RecordingHandler: - def __init__(self) -> None: - self.starts: list[dict[str, Any]] = [] - self.ends: list[dict[str, Any]] = [] - - async def on_tool_start(self, tool_call_id: str, tool_name: str, arguments: Any) -> None: - self.starts.append({"tool_call_id": tool_call_id, "tool_name": tool_name, "arguments": arguments}) - - async def on_tool_end(self, tool_call_id: str, result: Any) -> None: - self.ends.append({"tool_call_id": tool_call_id, "result": result}) - - async def test_handler_records_start_and_end_for_a_tool_call(self): - handler = self._RecordingHandler() - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args=None, tool_call_id="c1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="get_weather", args='{"city":"Paris"}', tool_call_id="c1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="get_weather", content="Sunny", tool_call_id="c1"), - ), - ] - out = await _collect( - convert_pydantic_ai_to_agentex_events(_aiter(events), tracing_handler=handler) # type: ignore[arg-type] - ) - - # Streaming output is unchanged. - assert any(isinstance(e, StreamTaskMessageStart) for e in out) - assert any(isinstance(e, StreamTaskMessageFull) for e in out) - - assert handler.starts == [ - { - "tool_call_id": "c1", - "tool_name": "get_weather", - "arguments": {"city": "Paris"}, - } - ] - assert handler.ends == [{"tool_call_id": "c1", "result": "Sunny"}] - - async def test_handler_not_called_when_no_tool_calls(self): - handler = self._RecordingHandler() - events = [ - PartStartEvent(index=0, part=TextPart(content="")), - PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hi")), - PartEndEvent(index=0, part=TextPart(content="hi")), - ] - await _collect( - convert_pydantic_ai_to_agentex_events(_aiter(events), tracing_handler=handler) # type: ignore[arg-type] - ) - assert handler.starts == [] - assert handler.ends == [] - - async def test_omitting_handler_preserves_pre_tracing_behavior(self): - events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="t", args=None, tool_call_id="c"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="t", args="{}", tool_call_id="c"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="t", content="ok", tool_call_id="c"), - ), - ] - out = await _collect(convert_pydantic_ai_to_agentex_events(_aiter(events))) - # Same emit shape as before: Start, Done, Full - types = [type(e).__name__ for e in out] - assert "StreamTaskMessageStart" in types - assert "StreamTaskMessageDone" in types - assert "StreamTaskMessageFull" in types - - class TestMultiStepRun: async def test_text_then_tool_then_text_assigns_distinct_indices(self): """A multi-step run: model emits text + tool call → tool runs → model emits more text. diff --git a/tests/lib/adk/test_pydantic_ai_turn.py b/tests/lib/adk/test_pydantic_ai_turn.py index 46bf247a3..c57251db6 100644 --- a/tests/lib/adk/test_pydantic_ai_turn.py +++ b/tests/lib/adk/test_pydantic_ai_turn.py @@ -233,8 +233,8 @@ async def test_no_usage_event_leaves_default_usage(self): class TestToolRequestStreaming: """PydanticAITurn.events equals the bare converter output unconditionally. - The foundation auto_send delivers Start+ToolRequestDelta+Done natively - (AGX1-377), so no coalescing is needed on either channel. + The foundation auto_send delivers Start+ToolRequestDelta+Done natively, so + no coalescing is needed on either channel. """ async def test_events_match_bare_converter_for_streamed_tool_call(self): diff --git a/tests/lib/core/harness/conformance/runner.py b/tests/lib/core/harness/conformance/runner.py index a88c73e05..ffe680c10 100644 --- a/tests/lib/core/harness/conformance/runner.py +++ b/tests/lib/core/harness/conformance/runner.py @@ -53,12 +53,9 @@ identical because both adapters drive the same SpanDeriver.observe() call sequence and forward every signal to their tracer. -AGX1-377 fix: auto_send now DELIVERS streamed tool-request messages (Start+Done) -instead of dropping them. The conformance normaliser previously suppressed the -delivery for Start(tool_request)+Done on the yield channel to match auto_send's -old drop behaviour. That suppression is now removed: both channels produce a -LogicalDelivery for a streamed tool_request, and the cross-channel assertion -verifies it is delivered on both. +auto_send DELIVERS streamed tool-request messages (Start+Done): both channels +produce a LogicalDelivery for a streamed tool_request, and the cross-channel +assertion verifies it is delivered on both. """ from __future__ import annotations @@ -145,8 +142,8 @@ def _yield_logical_deliveries(events: list[StreamTaskMessage]) -> list[LogicalDe - reasoning: initial_content.summary joined (from Start) prepended to accumulated reasoning-content deltas (this catches a channel that drops the summary) - - tool_request: JSON-sorted arguments from the Start content (AGX1-377: now - delivered on both channels, no longer suppressed) + - tool_request: JSON-sorted arguments from the Start content (delivered on + both channels) - tool_response: str(content) from Full event """ from agentex.types.text_content import TextContent @@ -191,9 +188,9 @@ def _yield_logical_deliveries(events: list[StreamTaskMessage]) -> list[LogicalDe ) ) elif ctype == "tool_request" and isinstance(content, ToolRequestContent): - # AGX1-377 fix: auto_send now delivers streamed tool-request - # messages. Emit a delivery here so the cross-channel - # assertion verifies it is present on both channels. + # auto_send delivers streamed tool-request messages. Emit a + # delivery here so the cross-channel assertion verifies it is + # present on both channels. deliveries.append( LogicalDelivery( content_type=ctype, diff --git a/tests/lib/core/harness/conformance/test_conformance.py b/tests/lib/core/harness/conformance/test_conformance.py index 6d5f8ca66..a296a6ae0 100644 --- a/tests/lib/core/harness/conformance/test_conformance.py +++ b/tests/lib/core/harness/conformance/test_conformance.py @@ -24,11 +24,9 @@ Full vs Start+Done envelope difference is a documented, acceptable choice in auto_send — see runner.py for the rationale). -AGX1-377 fix: auto_send now delivers streamed tool-request messages. The -suppression that previously prevented the yield normaliser from emitting a -LogicalDelivery for Start(tool_request)+Done is removed. Both channels now -produce a delivery for streamed tool_request, verified by the -"streamed-tool-request" fixture. +auto_send delivers streamed tool-request messages: both channels produce a +delivery for streamed tool_request, verified by the "streamed-tool-request" +fixture. """ from __future__ import annotations @@ -134,9 +132,8 @@ StreamTaskMessageDone(type="done", index=0), ], ), - # fixture 4: streamed tool_request (AGX1-377 fix) — tool_request delivered - # via Start+Done (no Full). auto_send now delivers this instead of dropping - # it. Both channels must produce a LogicalDelivery for this fixture. + # fixture 4: streamed tool_request — tool_request delivered via Start+Done + # (no Full). Both channels must produce a LogicalDelivery for this fixture. Fixture( name="streamed-tool-request", events=[ diff --git a/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py index feac188e4..3594de474 100644 --- a/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py +++ b/tests/lib/core/harness/conformance/test_pydantic_ai_conformance.py @@ -8,8 +8,8 @@ Streamed tool requests ---------------------- The pydantic-ai stream emits a tool REQUEST as Start + ToolRequestDelta + Done (not a -Full event). AGX1-377 has landed: both the conformance runner and auto_send now deliver -the Start+Delta+Done(tool_request) shape, so the cross-channel test asserts full +Full event). Both the conformance runner and auto_send deliver the +Start+Delta+Done(tool_request) shape, so the cross-channel test asserts full delivery-equivalence for streamed tool requests. The fixtures below retain the ToolRequestDelta events as the streamed tool-request inputs. """ @@ -77,8 +77,8 @@ def _build_fixtures() -> list[Fixture]: # ------------------------------------------------------------------ # # 2. Single tool call + tool response. # The canonical stream emits Start+ToolRequestDelta+Done for the request - # and Full(ToolResponseContent) for the response. See AGX1-377 note above - # for why the request delivery is not yet asserted cross-channel. + # and Full(ToolResponseContent) for the response. Both are asserted + # delivery-equivalent cross-channel (see the module docstring). # ------------------------------------------------------------------ # tool_call_pydantic = [ PartStartEvent( @@ -169,8 +169,8 @@ async def test_cross_channel_equivalence(fixture: Fixture) -> None: """Assert that yield_events and auto_send produce equivalent logical deliveries and identical span signals for each pydantic-ai fixture. - See runner.py for the full contract. The AGX1-377 note at the top of this - module explains why streamed-tool-request delivery is not yet asserted. + See runner.py for the full contract, including streamed-tool-request + delivery equivalence. """ yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture) diff --git a/tests/lib/core/harness/test_auto_send.py b/tests/lib/core/harness/test_auto_send.py index b599f2503..3fd68873e 100644 --- a/tests/lib/core/harness/test_auto_send.py +++ b/tests/lib/core/harness/test_auto_send.py @@ -289,13 +289,13 @@ async def _exploding_gen(): # --------------------------------------------------------------------------- -# Test 6: streamed tool_request delivered (AGX1-377 core) +# Test 6: streamed tool_request delivered # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_auto_send_streams_tool_request(): - """A Start(ToolRequestContent) MUST open a streaming context (AGX1-377).""" + """A Start(ToolRequestContent) MUST open a streaming context.""" streaming = _FakeStreaming() events = [ StreamTaskMessageStart( @@ -445,7 +445,7 @@ async def test_auto_send_full_text_content_sets_final_text(): # --------------------------------------------------------------------------- -# Test 10: created_at is forwarded to streaming context (AGX1-378) +# Test 10: created_at is forwarded to streaming context # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_langgraph_async.py b/tests/lib/core/harness/test_harness_langgraph_async.py index 32369fa52..f94eb75ff 100644 --- a/tests/lib/core/harness/test_harness_langgraph_async.py +++ b/tests/lib/core/harness/test_harness_langgraph_async.py @@ -13,10 +13,10 @@ -------------- - The async handler pushes the correct sequence of messages to the fake streaming backend: Full(ToolRequest) + Full(ToolResponse) + text Start/Delta/Done. -- final_text accumulates all text (not just last segment — AGX1-377 unified behavior). +- final_text accumulates all text (not just last segment — unified behavior). - Tool messages go through streaming_task_message_context (not messages.create). -- With a SpanTracer, no tool spans are produced (AGX1-377: Full events are not - handled by SpanDeriver today). +- With a SpanTracer, Full tool events produce tool spans (request opens, response + closes), aligning LangGraph tracing with the Start+Done harnesses. What is NOT covered without live infrastructure ----------------------------------------------- @@ -252,7 +252,7 @@ async def test_turn_usage_populated_after_events_consumed(self): assert usage.total_tokens == 15 async def test_tracer_produces_tool_spans_for_full_events(self): - """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes). + """SpanDeriver handles Full tool events (request opens, response closes). Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it. This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents). diff --git a/tests/lib/core/harness/test_harness_langgraph_sync.py b/tests/lib/core/harness/test_harness_langgraph_sync.py index 89c4d406b..dac6966fe 100644 --- a/tests/lib/core/harness/test_harness_langgraph_sync.py +++ b/tests/lib/core/harness/test_harness_langgraph_sync.py @@ -166,7 +166,7 @@ async def test_empty_stream_yields_nothing(self): assert out == [] async def test_tracer_produces_tool_spans_for_full_events(self): - """AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes). + """SpanDeriver handles Full tool events (request opens, response closes). Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it. This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents). diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_async.py b/tests/lib/core/harness/test_harness_pydantic_ai_async.py index e9b73e687..a5781fb23 100644 --- a/tests/lib/core/harness/test_harness_pydantic_ai_async.py +++ b/tests/lib/core/harness/test_harness_pydantic_ai_async.py @@ -12,7 +12,7 @@ The async path uses the bare PydanticAITurn (no coalescing): the foundation auto_send delivers streamed tool-request Start+ToolRequestDelta+Done messages -natively (AGX1-377 fix), so no coalescing wrapper is needed. +natively, so no coalescing wrapper is needed. What is tested -------------- @@ -272,9 +272,9 @@ async def test_context_lifecycle_open_then_close(self) -> None: class TestAsyncAutoSendSpanDerivation: """Span derivation on the async path now works for streamed tool requests. - The foundation auto_send delivers Start+ToolRequestDelta+Done natively - (AGX1-377 fix). The SpanDeriver opens a tool span on Done(tool_request), - so the async path now derives spans just like the sync path. + The foundation auto_send delivers Start+ToolRequestDelta+Done natively. + The SpanDeriver opens a tool span on Done(tool_request), so the async path + derives spans just like the sync path. """ async def test_tool_span_derived_on_async_path(self) -> None: From 6027d53dc924187526b731d0b5b2661012e8ad7d Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 19:56:03 -0400 Subject: [PATCH 06/12] refactor(harness)!: consolidate each harness to __sync.py + __turn.py Folds the pydantic-ai/langgraph _async + _langgraph_messages helpers into their turn/sync modules (stream_*_events -> __turn.py, emit_langgraph_messages -> _langgraph_sync.py); public facade names are unchanged. Relocates the OpenAI harness Turn + convert_openai_to_agentex_events tap into _modules/_openai_turn.py / _modules/_openai_sync.py, leaving back-compat shims at providers/_modules/{openai_turn, sync_provider}.py so the adk.providers namespace + CLI template keep working (the larger openai.py Temporal/MCP provider stays under adk.providers). Merges the duplicate _sync / _sync_unified test modules into one per harness. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/agentex/lib/adk/__init__.py | 10 +- .../lib/adk/_modules/_langgraph_async.py | 65 ---- .../lib/adk/_modules/_langgraph_messages.py | 85 ----- .../lib/adk/_modules/_langgraph_sync.py | 79 ++++ .../lib/adk/_modules/_langgraph_turn.py | 48 +++ src/agentex/lib/adk/_modules/_openai_sync.py | 358 ++++++++++++++++++ src/agentex/lib/adk/_modules/_openai_turn.py | 134 +++++++ .../lib/adk/_modules/_pydantic_ai_async.py | 50 --- .../lib/adk/_modules/_pydantic_ai_turn.py | 45 +++ .../lib/adk/providers/_modules/openai_turn.py | 136 +------ .../adk/providers/_modules/sync_provider.py | 346 +---------------- src/agentex/lib/core/harness/auto_send.py | 6 +- tests/lib/adk/providers/test_openai_turn.py | 4 +- tests/lib/adk/test_langgraph_async.py | 2 +- tests/lib/adk/test_langgraph_sync.py | 174 ++++++++- tests/lib/adk/test_langgraph_sync_unified.py | 213 ----------- tests/lib/adk/test_pydantic_ai_async.py | 2 +- tests/lib/adk/test_pydantic_ai_sync.py | 167 +++++++- .../lib/adk/test_pydantic_ai_sync_unified.py | 191 ---------- tests/lib/core/harness/conformance/runner.py | 4 +- .../test_harness_langgraph_temporal.py | 7 +- 21 files changed, 1030 insertions(+), 1096 deletions(-) delete mode 100644 src/agentex/lib/adk/_modules/_langgraph_async.py delete mode 100644 src/agentex/lib/adk/_modules/_langgraph_messages.py create mode 100644 src/agentex/lib/adk/_modules/_openai_sync.py create mode 100644 src/agentex/lib/adk/_modules/_openai_turn.py delete mode 100644 src/agentex/lib/adk/_modules/_pydantic_ai_async.py delete mode 100644 tests/lib/adk/test_langgraph_sync_unified.py delete mode 100644 tests/lib/adk/test_pydantic_ai_sync_unified.py diff --git a/src/agentex/lib/adk/__init__.py b/src/agentex/lib/adk/__init__.py index 36918af30..3287dc07d 100644 --- a/src/agentex/lib/adk/__init__.py +++ b/src/agentex/lib/adk/__init__.py @@ -6,10 +6,12 @@ from agentex.lib.adk._modules.agents import AgentsModule from agentex.lib.adk._modules.agent_task_tracker import AgentTaskTrackerModule from agentex.lib.adk._modules.checkpointer import create_checkpointer -from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events -from agentex.lib.adk._modules._langgraph_messages import emit_langgraph_messages -from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events -from agentex.lib.adk._modules._pydantic_ai_async import stream_pydantic_ai_events +from agentex.lib.adk._modules._langgraph_turn import stream_langgraph_events +from agentex.lib.adk._modules._langgraph_sync import ( + emit_langgraph_messages, + convert_langgraph_to_agentex_events, +) +from agentex.lib.adk._modules._pydantic_ai_turn import stream_pydantic_ai_events from agentex.lib.adk._modules._pydantic_ai_sync import convert_pydantic_ai_to_agentex_events from agentex.lib.adk._modules._claude_code_sync import convert_claude_code_to_agentex_events from agentex.lib.adk._modules._claude_code_turn import ( diff --git a/src/agentex/lib/adk/_modules/_langgraph_async.py b/src/agentex/lib/adk/_modules/_langgraph_async.py deleted file mode 100644 index 4d95fc177..000000000 --- a/src/agentex/lib/adk/_modules/_langgraph_async.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Async LangGraph streaming helper for Agentex. - -Converts LangGraph graph.astream() events into Agentex streaming updates -and pushes them to Redis via adk.streaming contexts. For use with async -ACP agents that stream via Redis rather than HTTP yields. - -Unified surface ---------------- -This module is now implemented on top of ``LangGraphTurn`` and -``UnifiedEmitter.auto_send_turn``, the same surface used by every other -harness adapter (pydantic-ai, openai-agents, etc.). The public signature -and return type are preserved identically. - -LangGraph emits tool requests as ``StreamTaskMessageFull`` events (from -"updates" events), NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles -Full events correctly; no coalescing wrapper is needed. -""" - -from agentex.lib.utils.temporal import workflow_now_if_in_workflow - - -async def stream_langgraph_events(stream, task_id: str) -> str: - """Stream LangGraph events to Agentex via Redis. - - Processes the stream from graph.astream() called with - stream_mode=["messages", "updates"] and pushes text, reasoning, - tool request, and tool response messages through Redis streaming - contexts. - - Supports both regular models (chunk.content is a str) and reasoning - models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks - in the Responses API responses/v1 format). - - Reimplemented on ``UnifiedEmitter.auto_send_turn(LangGraphTurn(...))`` for - cross-harness consistency. Behavior is identical to the previous bespoke - implementation (verified by characterization tests in test_langgraph_async.py). - - LangGraph emits tool requests as ``Full`` events (from "updates"), NOT - Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events - correctly; no coalescing wrapper is needed. - - ``created_at`` is set from ``workflow.now()`` when called inside a - Temporal workflow, matching the pattern used by the openai/litellm providers. - Outside a workflow (plain async activities, sync agents) it is ``None`` and the - server's wall clock is used. - - Args: - stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"]) - task_id: The Agentex task ID to stream messages to. - - Returns: - The accumulated final text output from the agent. - """ - from agentex.lib.core.harness.emitter import UnifiedEmitter - from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - - # LangGraph emits tool requests as Full events (from "updates"), NOT - # Start+Delta+Done like pydantic-ai. auto_send handles Full events correctly; - # no coalescing wrapper is needed. - # Stamp messages with workflow.now() inside Temporal for deterministic - # created_at ordering; falls back to None (server wall clock) outside a workflow. - turn = LangGraphTurn(stream, model=None) - emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None) - result = await emitter.auto_send_turn(turn, created_at=workflow_now_if_in_workflow()) - return result.final_text diff --git a/src/agentex/lib/adk/_modules/_langgraph_messages.py b/src/agentex/lib/adk/_modules/_langgraph_messages.py deleted file mode 100644 index c8856755b..000000000 --- a/src/agentex/lib/adk/_modules/_langgraph_messages.py +++ /dev/null @@ -1,85 +0,0 @@ -"""Emit finished LangGraph messages as Agentex task messages. - -This is the non-streaming counterpart to ``stream_langgraph_events``. Use it -when you run a LangGraph graph with ``ainvoke`` (for example a Temporal-backed -agent using the LangGraph plugin, where streaming deltas aren't available) and -want to surface the resulting messages to the Agentex UI after the fact. - -It maps LangGraph/LangChain message objects to Agentex content types: - -- ``AIMessage`` tool calls → ``ToolRequestContent`` (one per call) -- ``AIMessage`` text content → ``TextContent`` -- ``ToolMessage`` → ``ToolResponseContent`` - -Pass only the messages produced this turn (e.g. ``messages[already_emitted:]``) -so each message is surfaced exactly once across a multi-turn conversation. -""" - -from __future__ import annotations - -from typing import Any - - -async def emit_langgraph_messages(messages: list[Any], task_id: str) -> str: - """Create Agentex messages for a list of LangGraph messages. - - Args: - messages: LangGraph/LangChain message objects to surface — typically - the new messages a turn produced. - task_id: The Agentex task to create messages on. - - Returns: - The last assistant text emitted (useful as a span/turn output), or "". - """ - # Lazy imports so langchain isn't required at module load time. - from langchain_core.messages import AIMessage, ToolMessage - - from agentex.lib import adk - from agentex.types.text_content import TextContent - from agentex.types.tool_request_content import ToolRequestContent - from agentex.types.tool_response_content import ToolResponseContent - - final_text = "" - for message in messages: - if isinstance(message, AIMessage): - for tool_call in message.tool_calls or []: - await adk.messages.create( - task_id=task_id, - content=ToolRequestContent( - author="agent", - tool_call_id=tool_call["id"], - name=tool_call["name"], - arguments=tool_call["args"], - ), - ) - # ``content`` may be a plain string (OpenAI) or a list of content - # blocks (Anthropic/Claude via LangChain, e.g. - # ``[{"type": "text", "text": "..."}]``). Extract and join the text - # so the response is visible regardless of the underlying model. - if isinstance(message.content, str): - text = message.content - else: - text = "".join( - block.get("text", "") if isinstance(block, dict) else str(block) - for block in message.content - if not isinstance(block, dict) or block.get("type") == "text" - ) - if text: - final_text = text - await adk.messages.create( - task_id=task_id, - content=TextContent(author="agent", content=text, format="markdown"), - ) - elif isinstance(message, ToolMessage): - await adk.messages.create( - task_id=task_id, - content=ToolResponseContent( - author="agent", - tool_call_id=message.tool_call_id, - name=message.name or "unknown", - content=message.content - if isinstance(message.content, str) - else str(message.content), - ), - ) - return final_text diff --git a/src/agentex/lib/adk/_modules/_langgraph_sync.py b/src/agentex/lib/adk/_modules/_langgraph_sync.py index a1744304b..9d7b73847 100644 --- a/src/agentex/lib/adk/_modules/_langgraph_sync.py +++ b/src/agentex/lib/adk/_modules/_langgraph_sync.py @@ -271,3 +271,82 @@ async def convert_langgraph_to_agentex_events( yield StreamTaskMessageDone(type="done", index=message_index) if reasoning_streaming: yield StreamTaskMessageDone(type="done", index=message_index) + + +async def emit_langgraph_messages(messages: list[Any], task_id: str) -> str: + """Create Agentex messages for a list of LangGraph messages. + + This is the non-streaming counterpart to ``stream_langgraph_events``. Use it + when you run a LangGraph graph with ``ainvoke`` (for example a Temporal-backed + agent using the LangGraph plugin, where streaming deltas aren't available) and + want to surface the resulting messages to the Agentex UI after the fact. + + It maps LangGraph/LangChain message objects to Agentex content types: + + - ``AIMessage`` tool calls -> ``ToolRequestContent`` (one per call) + - ``AIMessage`` text content -> ``TextContent`` + - ``ToolMessage`` -> ``ToolResponseContent`` + + Pass only the messages produced this turn (e.g. ``messages[already_emitted:]``) + so each message is surfaced exactly once across a multi-turn conversation. + + Args: + messages: LangGraph/LangChain message objects to surface — typically + the new messages a turn produced. + task_id: The Agentex task to create messages on. + + Returns: + The last assistant text emitted (useful as a span/turn output), or "". + """ + # Lazy imports so langchain isn't required at module load time. + from langchain_core.messages import AIMessage, ToolMessage + + from agentex.lib import adk + from agentex.types.text_content import TextContent + from agentex.types.tool_request_content import ToolRequestContent + from agentex.types.tool_response_content import ToolResponseContent + + final_text = "" + for message in messages: + if isinstance(message, AIMessage): + for tool_call in message.tool_calls or []: + await adk.messages.create( + task_id=task_id, + content=ToolRequestContent( + author="agent", + tool_call_id=tool_call["id"], + name=tool_call["name"], + arguments=tool_call["args"], + ), + ) + # ``content`` may be a plain string (OpenAI) or a list of content + # blocks (Anthropic/Claude via LangChain, e.g. + # ``[{"type": "text", "text": "..."}]``). Extract and join the text + # so the response is visible regardless of the underlying model. + if isinstance(message.content, str): + text = message.content + else: + text = "".join( + block.get("text", "") if isinstance(block, dict) else str(block) + for block in message.content + if not isinstance(block, dict) or block.get("type") == "text" + ) + if text: + final_text = text + await adk.messages.create( + task_id=task_id, + content=TextContent(author="agent", content=text, format="markdown"), + ) + elif isinstance(message, ToolMessage): + await adk.messages.create( + task_id=task_id, + content=ToolResponseContent( + author="agent", + tool_call_id=message.tool_call_id, + name=message.name or "unknown", + content=message.content + if isinstance(message.content, str) + else str(message.content), + ), + ) + return final_text diff --git a/src/agentex/lib/adk/_modules/_langgraph_turn.py b/src/agentex/lib/adk/_modules/_langgraph_turn.py index 84c5c7838..a6e290e1b 100644 --- a/src/agentex/lib/adk/_modules/_langgraph_turn.py +++ b/src/agentex/lib/adk/_modules/_langgraph_turn.py @@ -14,6 +14,7 @@ from typing import Any, AsyncIterator from collections.abc import AsyncGenerator +from agentex.lib.utils.temporal import workflow_now_if_in_workflow from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events @@ -150,3 +151,50 @@ def usage(self) -> TurnUsage: did not report usage. """ return self._usage + + +async def stream_langgraph_events(stream, task_id: str) -> str: + """Stream LangGraph events to Agentex via Redis. + + Converts LangGraph ``graph.astream()`` events into Agentex streaming + updates and pushes them to Redis via ``adk.streaming`` contexts. For use + with async ACP agents that stream via Redis rather than HTTP yields. + + Processes the stream from graph.astream() called with + stream_mode=["messages", "updates"] and pushes text, reasoning, + tool request, and tool response messages through Redis streaming + contexts. + + Supports both regular models (chunk.content is a str) and reasoning + models like gpt-5/o1/o3 (chunk.content is a list of typed content blocks + in the Responses API responses/v1 format). + + Implemented on ``UnifiedEmitter.auto_send_turn(LangGraphTurn(...))`` for + cross-harness consistency, the same surface used by every other harness + adapter (pydantic-ai, openai-agents, etc.). The public signature and + return type are preserved identically. + + LangGraph emits tool requests as ``Full`` events (from "updates"), NOT + Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events + correctly; no coalescing wrapper is needed. + + ``created_at`` is set from ``workflow.now()`` when called inside a + Temporal workflow, matching the pattern used by the openai/litellm providers. + Outside a workflow (plain async activities, sync agents) it is ``None`` and the + server's wall clock is used. + + Args: + stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"]) + task_id: The Agentex task ID to stream messages to. + + Returns: + The accumulated final text output from the agent. + """ + from agentex.lib.core.harness.emitter import UnifiedEmitter + + # Stamp messages with workflow.now() inside Temporal for deterministic + # created_at ordering; falls back to None (server wall clock) outside a workflow. + turn = LangGraphTurn(stream, model=None) + emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None) + result = await emitter.auto_send_turn(turn, created_at=workflow_now_if_in_workflow()) + return result.final_text diff --git a/src/agentex/lib/adk/_modules/_openai_sync.py b/src/agentex/lib/adk/_modules/_openai_sync.py new file mode 100644 index 000000000..f16022200 --- /dev/null +++ b/src/agentex/lib/adk/_modules/_openai_sync.py @@ -0,0 +1,358 @@ +"""Sync OpenAI Agents SDK streaming tap for Agentex. + +Converts an OpenAI Agents SDK streamed run (``Runner.run_streamed(...)`` +``stream_events()``) into Agentex ``StreamTaskMessage*`` events, including +reasoning content and reasoning summary deltas for reasoning models (o1/o3/gpt-5). + +This is the lower-level primitive used by ``OpenAITurn`` (in +``_openai_turn.py``). New OpenAI Agents integrations should prefer wrapping a +``Runner.run_streamed`` result in ``OpenAITurn`` and driving delivery + tracing +through ``UnifiedEmitter``. +""" + +from __future__ import annotations + +from typing import Any + +from openai.types.responses import ( + ResponseTextDeltaEvent, + ResponseFunctionToolCall, + ResponseFunctionWebSearch, + ResponseOutputItemDoneEvent, + ResponseOutputItemAddedEvent, + ResponseCodeInterpreterToolCall, + ResponseReasoningSummaryPartAddedEvent, + ResponseReasoningSummaryTextDeltaEvent, +) +from openai.types.responses.response_reasoning_text_done_event import ResponseReasoningTextDoneEvent +from openai.types.responses.response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent +from openai.types.responses.response_reasoning_summary_text_done_event import ResponseReasoningSummaryTextDoneEvent + +from agentex.types.task_message_delta import TextDelta +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.task_message_content import TextContent +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.types.reasoning_content_delta import ReasoningContentDelta +from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta + + +def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, Any]]: + """ + Extract call_id, tool_name, and tool_arguments from a tool call item. + Args: + tool_call_item: The tool call item to process + Returns: + A tuple of (call_id, tool_name, tool_arguments) + """ + # Generic handling for different tool call types + # Try 'call_id' first, then 'id', then generate placeholder + if hasattr(tool_call_item, "call_id"): + call_id = tool_call_item.call_id + elif hasattr(tool_call_item, "id"): + call_id = tool_call_item.id + else: + call_id = f"unknown_call_{id(tool_call_item)}" + + if isinstance(tool_call_item, ResponseFunctionWebSearch): + tool_name = "web_search" + tool_arguments = {"action": tool_call_item.action.model_dump(), "status": tool_call_item.status} + elif isinstance(tool_call_item, ResponseCodeInterpreterToolCall): + tool_name = "code_interpreter" + tool_arguments = {"code": tool_call_item.code, "status": tool_call_item.status} + elif isinstance(tool_call_item, ResponseFunctionToolCall): + # Handle standard function tool calls + tool_name = tool_call_item.name + # Handle the arguments field which might be a string or None + if tool_call_item.arguments: + if isinstance(tool_call_item.arguments, str): + import json + + tool_arguments = json.loads(tool_call_item.arguments) if tool_call_item.arguments else {} + else: + tool_arguments = tool_call_item.arguments + else: + tool_arguments = {} + else: + # Generic handling for any tool call type + tool_name = getattr(tool_call_item, "name", type(tool_call_item).__name__) + # Handle the arguments field which might be a string or None + if hasattr(tool_call_item, "arguments"): + arguments = tool_call_item.arguments + if isinstance(arguments, str): + import json + + tool_arguments = json.loads(arguments) if arguments else {} + elif arguments is None: + tool_arguments = {} + else: + tool_arguments = arguments + else: + tool_arguments = tool_call_item.model_dump() + + return call_id, tool_name, tool_arguments + + +def _extract_tool_response_info(tool_map: dict[str, Any], tool_output_item: Any) -> tuple[str, str, str]: + """ + Extract call_id, tool_name, and content from a tool output item. + Args: + tool_map: Dictionary mapping call_ids to tool names + tool_output_item: The tool output item to process + Returns: + A tuple of (call_id, tool_name, content) + """ + + # Handle different formats of tool_output_item + if isinstance(tool_output_item, dict): + call_id = tool_output_item.get("call_id", tool_output_item.get("id", f"unknown_call_{id(tool_output_item)}")) + content = tool_output_item.get("output", str(tool_output_item)) + else: + # Try to get call_id from attributes + if hasattr(tool_output_item, "call_id"): + call_id = tool_output_item.call_id + elif hasattr(tool_output_item, "id"): + call_id = tool_output_item.id + else: + call_id = f"unknown_call_{id(tool_output_item)}" + + # Get content + if hasattr(tool_output_item, "output"): + content = tool_output_item.output + else: + content = str(tool_output_item) + + # Get tool name from map + tool_name = tool_map.get(call_id, "unknown_tool") + + return call_id, tool_name, content + + +async def convert_openai_to_agentex_events(stream_response): + """Convert OpenAI streaming events to AgentEx TaskMessageUpdate events with reasoning support. + + This is an enhanced version of the base converter that includes support for: + - Reasoning content deltas (for o1 models) + - Reasoning summary deltas (for o1 models) + + Args: + stream_response: An async iterator of OpenAI streaming events + Yields: + TaskMessageUpdate: AgentEx streaming events (StreamTaskMessageDelta, StreamTaskMessageFull, or StreamTaskMessageDone) + """ + + tool_map = {} + event_count = 0 + message_index = 0 # Track message index for proper sequencing + seen_tool_output = False # Track if we've seen tool output to know when final text starts + item_id_to_index = {} # Map item_id to message index + item_id_to_type = {} # Map item_id to content type (text, reasoning_content, reasoning_summary) + + async for event in stream_response: + event_count += 1 + + # Check for raw response events which contain the actual OpenAI streaming events + if hasattr(event, "type") and event.type == "raw_response_event": + if hasattr(event, "data"): + raw_event = event.data + + # Check for ResponseOutputItemAddedEvent which signals a new message starting + if isinstance(raw_event, ResponseOutputItemAddedEvent): + # Don't increment here - we'll increment when we see the actual text delta + # This is just a signal that a new message is starting + pass + + # Handle item completion - send done event to close the message + elif isinstance(raw_event, ResponseOutputItemDoneEvent): + item_id = raw_event.item.id + if item_id in item_id_to_index: + # Get the message type to decide whether to send done event + message_type = item_id_to_type.get(item_id, "text") + + # Don't send done events for reasoning content/summary + # They just end with their last delta + if message_type not in ("reasoning_content", "reasoning_summary"): + yield StreamTaskMessageDone( + type="done", + index=item_id_to_index[item_id], + ) + + # Skip reasoning summary part added events - we handle them on delta + elif isinstance(raw_event, ResponseReasoningSummaryPartAddedEvent): + pass + + # Handle reasoning summary text delta events + elif isinstance(raw_event, ResponseReasoningSummaryTextDeltaEvent): + item_id = raw_event.item_id + summary_index = raw_event.summary_index + + # If this is a new item_id we haven't seen, create a new message + if item_id and item_id not in item_id_to_index: + message_index += 1 + item_id_to_index[item_id] = message_index + item_id_to_type[item_id] = "reasoning_summary" + + # Send a start event for this new reasoning summary message + yield StreamTaskMessageStart( + type="start", + index=item_id_to_index[item_id], + content=TextContent( + type="text", + author="agent", + content="", # Start with empty content + ), + ) + + # Use the index for this item_id + current_index = item_id_to_index.get(item_id, message_index) + + # Yield reasoning summary delta + yield StreamTaskMessageDelta( + type="delta", + index=current_index, + delta=ReasoningSummaryDelta( + type="reasoning_summary", + summary_index=summary_index, + summary_delta=raw_event.delta, + ), + ) + + # Handle reasoning summary text done events + elif isinstance(raw_event, ResponseReasoningSummaryTextDoneEvent): + # We do NOT close the streaming context here + # as there can be multiple reasoning summaries. + # The context will be closed when the entire + # output item is done (ResponseOutputItemDoneEvent) + pass + + # Handle reasoning content text delta events + elif isinstance(raw_event, ResponseReasoningTextDeltaEvent): + item_id = raw_event.item_id + content_index = raw_event.content_index + + # If this is a new item_id we haven't seen, create a new message + if item_id and item_id not in item_id_to_index: + message_index += 1 + item_id_to_index[item_id] = message_index + item_id_to_type[item_id] = "reasoning_content" + + # Send a start event for this new reasoning content message + yield StreamTaskMessageStart( + type="start", + index=item_id_to_index[item_id], + content=TextContent( + type="text", + author="agent", + content="", # Start with empty content + ), + ) + + # Use the index for this item_id + current_index = item_id_to_index.get(item_id, message_index) + + # Yield reasoning content delta + yield StreamTaskMessageDelta( + type="delta", + index=current_index, + delta=ReasoningContentDelta( + type="reasoning_content", + content_index=content_index, + content_delta=raw_event.delta, + ), + ) + + # Handle reasoning content text done events + elif isinstance(raw_event, ResponseReasoningTextDoneEvent): + # We do NOT close the streaming context here + # as there can be multiple reasoning content texts. + # The context will be closed when the entire + # output item is done (ResponseOutputItemDoneEvent) + pass + + # Check if this is a text delta event from OpenAI + elif isinstance(raw_event, ResponseTextDeltaEvent): + # Check if this event has an item_id + item_id = getattr(raw_event, "item_id", None) + + # If this is a new item_id we haven't seen, it's a new message + if item_id and item_id not in item_id_to_index: + # Check if this is truly a NEW text message after tools + # We need to differentiate between the first text and the final text after tools + if seen_tool_output: + # This is the final text message after tool execution + message_index += 1 + item_id_to_index[item_id] = message_index + else: + item_id_to_index[item_id] = message_index + + item_id_to_type[item_id] = "text" + + # Send a start event with empty content for this new text message + yield StreamTaskMessageStart( + type="start", + index=item_id_to_index[item_id], + content=TextContent( + type="text", + author="agent", + content="", # Start with empty content, deltas will fill it + ), + ) + + # Use the index for this item_id + current_index = item_id_to_index.get(item_id, message_index) + + delta_message = StreamTaskMessageDelta( + type="delta", + index=current_index, + delta=TextDelta( + type="text", + text_delta=raw_event.delta, + ), + ) + yield delta_message + + elif hasattr(event, "type") and event.type == "run_item_stream_event": + # Skip reasoning_item events - they're handled via raw_response_event above + if hasattr(event, "item") and event.item.type == "reasoning_item": + continue + + # Check for tool_call_item type (this is when a tool is being called) + elif hasattr(event, "item") and event.item.type == "tool_call_item": + # Extract tool call information using the helper method + call_id, tool_name, tool_arguments = _extract_tool_call_info(event.item.raw_item) + tool_map[call_id] = tool_name + tool_request_content = ToolRequestContent( + tool_call_id=call_id, + name=tool_name, + arguments=tool_arguments, + author="agent", + ) + message_index += 1 # Increment for new message + yield StreamTaskMessageFull( + index=message_index, + type="full", + content=tool_request_content, + ) + + # Check for tool_call_output_item type (this is when a tool returns output) + elif hasattr(event, "item") and event.item.type == "tool_call_output_item": + # Extract tool response information using the helper method + call_id, tool_name, content = _extract_tool_response_info(tool_map, event.item.raw_item) + tool_response_content = ToolResponseContent( + tool_call_id=call_id, + name=tool_name, + content=content, + author="agent", + ) + message_index += 1 # Increment for new message + seen_tool_output = True # Mark that we've seen tool output so next text gets new index + yield StreamTaskMessageFull( + type="full", + index=message_index, + content=tool_response_content, + ) diff --git a/src/agentex/lib/adk/_modules/_openai_turn.py b/src/agentex/lib/adk/_modules/_openai_turn.py new file mode 100644 index 000000000..cfb1ce22d --- /dev/null +++ b/src/agentex/lib/adk/_modules/_openai_turn.py @@ -0,0 +1,134 @@ +"""OpenAITurn: adapt an OpenAI Agents SDK streamed run onto the harness surface. + +A ``HarnessTurn`` exposes a single canonical ``StreamTaskMessage*`` stream plus +normalized usage. ``OpenAITurn`` wraps a ``RunResultStreaming`` (from +``Runner.run_streamed``), converts its native OpenAI events into the canonical +stream via ``convert_openai_to_agentex_events``, and after exhaustion reads the +run's ``raw_responses`` to aggregate usage into a provider-independent +``TurnUsage``. + +Delivery (yield vs auto-send) and tracing are owned by ``UnifiedEmitter``; this +module is purely the provider->canonical adapter. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any, AsyncIterator + +from agents.usage import Usage + +from agentex.lib.utils.logging import make_logger +from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage +from agentex.lib.adk._modules._openai_sync import ( + convert_openai_to_agentex_events, +) + +if TYPE_CHECKING: + from agents import ModelResponse, RunResultStreaming + +logger = make_logger(__name__) + + +def openai_usage_to_turn_usage(usage: Usage | None, model: str | None) -> TurnUsage: + """Map an ``agents.Usage`` to a harness-independent ``TurnUsage``. + + All field access is defensive (``getattr(..., None)``): different model + backends populate different subsets of the usage object, and real zeros are + valid values (e.g. 0 output tokens on a pure cache hit), so we never coerce + a present-but-zero value into ``None``. + """ + if usage is None: + return TurnUsage(model=model) + + input_details = getattr(usage, "input_tokens_details", None) + output_details = getattr(usage, "output_tokens_details", None) + + return TurnUsage( + model=model, + num_llm_calls=getattr(usage, "requests", None) or 0, + input_tokens=getattr(usage, "input_tokens", None), + cached_input_tokens=getattr(input_details, "cached_tokens", None), + output_tokens=getattr(usage, "output_tokens", None), + reasoning_tokens=getattr(output_details, "reasoning_tokens", None), + total_tokens=getattr(usage, "total_tokens", None), + ) + + +def _aggregate_usage(raw_responses: list[ModelResponse]) -> Usage | None: + """Sum the per-response ``Usage`` across a run's ``ModelResponse`` list. + + Returns ``None`` when no response carries usage so the caller can emit a + usage object with only the model name set. ``Usage.add`` accumulates + requests/tokens (including cached/reasoning detail fields). + """ + total: Usage | None = None + for response in raw_responses: + resp_usage = getattr(response, "usage", None) + if resp_usage is None: + continue + if total is None: + total = Usage() + total.add(resp_usage) + return total + + +class OpenAITurn: + """A single OpenAI Agents SDK turn adapted to the ``HarnessTurn`` protocol. + + Construct with exactly one of: + - ``result``: a ``RunResultStreaming`` from ``Runner.run_streamed``. Its + ``stream_events()`` is converted to the canonical stream, and after the + stream is exhausted ``raw_responses`` is read to compute usage. + - ``stream``: a pre-built async iterator of canonical ``StreamTaskMessage`` + events (bypasses ``convert_openai_to_agentex_events``). Useful for tests + and for callers that have already produced canonical events. Usage stays + at ``TurnUsage(model=...)`` because there is no run to read usage from. + + ``coalesce_tool_requests`` is accepted for API parity with other provider + turns but is a no-op for OpenAI: the OpenAI converter already emits a single + ``Full(ToolRequestContent)`` per tool call rather than streamed argument + deltas, so there is nothing to coalesce. + """ + + def __init__( + self, + result: RunResultStreaming | None = None, + model: str | None = None, + stream: AsyncIterator[StreamTaskMessage] | None = None, + coalesce_tool_requests: bool = False, # noqa: ARG002 - API parity, no-op for OpenAI + ) -> None: + if result is None and stream is None: + raise ValueError("OpenAITurn requires either `result` or `stream`") + self._result = result + self._model = model + self._stream = stream + self._usage: TurnUsage = TurnUsage(model=model) + + @property + def events(self) -> AsyncIterator[StreamTaskMessage]: + return self._iter_events() + + async def _iter_events(self) -> AsyncIterator[StreamTaskMessage]: + if self._stream is not None: + async for event in self._stream: + yield event + return + + result = self._result + assert result is not None # guaranteed by __init__ + async for event in convert_openai_to_agentex_events(result.stream_events()): + yield event + + # Stream is exhausted: the run has finished and raw_responses is now + # populated, so usage can be aggregated and normalized. + try: + raw_responses: list[Any] = list(getattr(result, "raw_responses", None) or []) + aggregated = _aggregate_usage(raw_responses) + self._usage = openai_usage_to_turn_usage(aggregated, self._model) + except Exception as exc: # pragma: no cover - defensive: never break delivery on usage + logger.warning(f"Failed to aggregate OpenAI usage: {exc}") + self._usage = TurnUsage(model=self._model) + + def usage(self) -> TurnUsage: + """Normalized turn usage. Valid only after ``events`` is exhausted.""" + return self._usage diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py b/src/agentex/lib/adk/_modules/_pydantic_ai_async.py deleted file mode 100644 index 5f9514f36..000000000 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_async.py +++ /dev/null @@ -1,50 +0,0 @@ -"""Async Pydantic AI streaming helper for Agentex. - -Consumes a Pydantic AI ``agent.run_stream_events(...)`` async iterator and -pushes Agentex streaming updates to Redis via the ``adk.streaming`` -contexts. For use with async ACP agents that stream via Redis rather than -HTTP yields. - -Text and thinking tokens stream as deltas inside coalesced streaming -contexts. Tool requests and tool results are posted as open+close pairs -on a streaming context (the unified surface persists ``initial_content`` -when a context is closed without deltas). This matches the ``auto_send`` -convention used by all other async/Temporal harnesses. - -Tracing is derived automatically from the event stream by the emitter when -a ``trace_id`` is provided to the ``UnifiedEmitter``. -""" - -from __future__ import annotations - - -async def stream_pydantic_ai_events( - stream, - task_id: str, -) -> str: - """Stream Pydantic AI events to Agentex via Redis. - - Args: - stream: Async iterator yielded by ``agent.run_stream_events(...)``. - task_id: The Agentex task ID to stream messages to. - - Returns: - The accumulated text content of the **last** text part in the run. - Multi-step runs (where the model emits text, then a tool call, then - more text) return only the final text segment, matching the - ``stream_langgraph_events`` convention. - """ - from agentex.lib.core.harness.emitter import UnifiedEmitter - from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - - turn = PydanticAITurn( - stream, - model=None, - ) - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=None, - parent_span_id=None, - ) - result = await emitter.auto_send_turn(turn) - return result.final_text diff --git a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py index 472652f5c..4e9340d7a 100644 --- a/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py +++ b/src/agentex/lib/adk/_modules/_pydantic_ai_turn.py @@ -126,3 +126,48 @@ def usage(self) -> TurnUsage: Before exhaustion the model field is set but token fields are None. """ return self._usage + + +async def stream_pydantic_ai_events( + stream, + task_id: str, +) -> str: + """Stream Pydantic AI events to Agentex via Redis. + + Consumes a Pydantic AI ``agent.run_stream_events(...)`` async iterator and + pushes Agentex streaming updates to Redis via the ``adk.streaming`` + contexts. For use with async ACP agents that stream via Redis rather than + HTTP yields. + + Text and thinking tokens stream as deltas inside coalesced streaming + contexts. Tool requests and tool results are posted as open+close pairs + on a streaming context (the unified surface persists ``initial_content`` + when a context is closed without deltas). This matches the ``auto_send`` + convention used by all other async/Temporal harnesses. + + Tracing is derived automatically from the event stream by the emitter when + a ``trace_id`` is provided to the ``UnifiedEmitter``. + + Args: + stream: Async iterator yielded by ``agent.run_stream_events(...)``. + task_id: The Agentex task ID to stream messages to. + + Returns: + The accumulated text content of the **last** text part in the run. + Multi-step runs (where the model emits text, then a tool call, then + more text) return only the final text segment, matching the + ``stream_langgraph_events`` convention. + """ + from agentex.lib.core.harness.emitter import UnifiedEmitter + + turn = PydanticAITurn( + stream, + model=None, + ) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=None, + parent_span_id=None, + ) + result = await emitter.auto_send_turn(turn) + return result.final_text diff --git a/src/agentex/lib/adk/providers/_modules/openai_turn.py b/src/agentex/lib/adk/providers/_modules/openai_turn.py index 17a6518ee..20ac73da5 100644 --- a/src/agentex/lib/adk/providers/_modules/openai_turn.py +++ b/src/agentex/lib/adk/providers/_modules/openai_turn.py @@ -1,134 +1,8 @@ -"""OpenAITurn: adapt an OpenAI Agents SDK streamed run onto the harness surface. +"""Back-compat shim: ``OpenAITurn`` now lives in +``agentex.lib.adk._modules._openai_turn``. -A ``HarnessTurn`` exposes a single canonical ``StreamTaskMessage*`` stream plus -normalized usage. ``OpenAITurn`` wraps a ``RunResultStreaming`` (from -``Runner.run_streamed``), converts its native OpenAI events into the canonical -stream via ``convert_openai_to_agentex_events``, and after exhaustion reads the -run's ``raw_responses`` to aggregate usage into a provider-independent -``TurnUsage``. - -Delivery (yield vs auto-send) and tracing are owned by ``UnifiedEmitter``; this -module is purely the provider->canonical adapter. +Existing importers of +``agentex.lib.adk.providers._modules.openai_turn.OpenAITurn`` keep working. """ -from __future__ import annotations - -from typing import TYPE_CHECKING, Any, AsyncIterator - -from agents.usage import Usage - -from agentex.lib.utils.logging import make_logger -from agentex.lib.core.harness.types import TurnUsage, StreamTaskMessage -from agentex.lib.adk.providers._modules.sync_provider import ( - convert_openai_to_agentex_events, -) - -if TYPE_CHECKING: - from agents import ModelResponse, RunResultStreaming - -logger = make_logger(__name__) - - -def openai_usage_to_turn_usage(usage: Usage | None, model: str | None) -> TurnUsage: - """Map an ``agents.Usage`` to a harness-independent ``TurnUsage``. - - All field access is defensive (``getattr(..., None)``): different model - backends populate different subsets of the usage object, and real zeros are - valid values (e.g. 0 output tokens on a pure cache hit), so we never coerce - a present-but-zero value into ``None``. - """ - if usage is None: - return TurnUsage(model=model) - - input_details = getattr(usage, "input_tokens_details", None) - output_details = getattr(usage, "output_tokens_details", None) - - return TurnUsage( - model=model, - num_llm_calls=getattr(usage, "requests", None) or 0, - input_tokens=getattr(usage, "input_tokens", None), - cached_input_tokens=getattr(input_details, "cached_tokens", None), - output_tokens=getattr(usage, "output_tokens", None), - reasoning_tokens=getattr(output_details, "reasoning_tokens", None), - total_tokens=getattr(usage, "total_tokens", None), - ) - - -def _aggregate_usage(raw_responses: list[ModelResponse]) -> Usage | None: - """Sum the per-response ``Usage`` across a run's ``ModelResponse`` list. - - Returns ``None`` when no response carries usage so the caller can emit a - usage object with only the model name set. ``Usage.add`` accumulates - requests/tokens (including cached/reasoning detail fields). - """ - total: Usage | None = None - for response in raw_responses: - resp_usage = getattr(response, "usage", None) - if resp_usage is None: - continue - if total is None: - total = Usage() - total.add(resp_usage) - return total - - -class OpenAITurn: - """A single OpenAI Agents SDK turn adapted to the ``HarnessTurn`` protocol. - - Construct with exactly one of: - - ``result``: a ``RunResultStreaming`` from ``Runner.run_streamed``. Its - ``stream_events()`` is converted to the canonical stream, and after the - stream is exhausted ``raw_responses`` is read to compute usage. - - ``stream``: a pre-built async iterator of canonical ``StreamTaskMessage`` - events (bypasses ``convert_openai_to_agentex_events``). Useful for tests - and for callers that have already produced canonical events. Usage stays - at ``TurnUsage(model=...)`` because there is no run to read usage from. - - ``coalesce_tool_requests`` is accepted for API parity with other provider - turns but is a no-op for OpenAI: the OpenAI converter already emits a single - ``Full(ToolRequestContent)`` per tool call rather than streamed argument - deltas, so there is nothing to coalesce. - """ - - def __init__( - self, - result: RunResultStreaming | None = None, - model: str | None = None, - stream: AsyncIterator[StreamTaskMessage] | None = None, - coalesce_tool_requests: bool = False, # noqa: ARG002 - API parity, no-op for OpenAI - ) -> None: - if result is None and stream is None: - raise ValueError("OpenAITurn requires either `result` or `stream`") - self._result = result - self._model = model - self._stream = stream - self._usage: TurnUsage = TurnUsage(model=model) - - @property - def events(self) -> AsyncIterator[StreamTaskMessage]: - return self._iter_events() - - async def _iter_events(self) -> AsyncIterator[StreamTaskMessage]: - if self._stream is not None: - async for event in self._stream: - yield event - return - - result = self._result - assert result is not None # guaranteed by __init__ - async for event in convert_openai_to_agentex_events(result.stream_events()): - yield event - - # Stream is exhausted: the run has finished and raw_responses is now - # populated, so usage can be aggregated and normalized. - try: - raw_responses: list[Any] = list(getattr(result, "raw_responses", None) or []) - aggregated = _aggregate_usage(raw_responses) - self._usage = openai_usage_to_turn_usage(aggregated, self._model) - except Exception as exc: # pragma: no cover - defensive: never break delivery on usage - logger.warning(f"Failed to aggregate OpenAI usage: {exc}") - self._usage = TurnUsage(model=self._model) - - def usage(self) -> TurnUsage: - """Normalized turn usage. Valid only after ``events`` is exhausted.""" - return self._usage +from agentex.lib.adk._modules._openai_turn import OpenAITurn # noqa: F401 diff --git a/src/agentex/lib/adk/providers/_modules/sync_provider.py b/src/agentex/lib/adk/providers/_modules/sync_provider.py index 9996bf30d..3836b9e02 100644 --- a/src/agentex/lib/adk/providers/_modules/sync_provider.py +++ b/src/agentex/lib/adk/providers/_modules/sync_provider.py @@ -14,36 +14,11 @@ TResponseInputItem, AgentOutputSchemaBase, ) -from openai.types.responses import ( - ResponseTextDeltaEvent, - ResponseFunctionToolCall, - ResponseFunctionWebSearch, - ResponseOutputItemDoneEvent, - ResponseOutputItemAddedEvent, - ResponseCodeInterpreterToolCall, - ResponseReasoningSummaryPartAddedEvent, - ResponseReasoningSummaryTextDeltaEvent, -) from agents.models.openai_provider import OpenAIProvider -from openai.types.responses.response_reasoning_text_done_event import ResponseReasoningTextDoneEvent -from openai.types.responses.response_reasoning_text_delta_event import ResponseReasoningTextDeltaEvent -from openai.types.responses.response_reasoning_summary_text_done_event import ResponseReasoningSummaryTextDoneEvent from agentex import AsyncAgentex from agentex.lib.utils.logging import make_logger from agentex.lib.core.tracing.tracer import AsyncTracer -from agentex.types.task_message_delta import TextDelta -from agentex.types.task_message_update import ( - StreamTaskMessageDone, - StreamTaskMessageFull, - StreamTaskMessageDelta, - StreamTaskMessageStart, -) -from agentex.types.task_message_content import TextContent -from agentex.types.tool_request_content import ToolRequestContent -from agentex.types.tool_response_content import ToolResponseContent -from agentex.types.reasoning_content_delta import ReasoningContentDelta -from agentex.types.reasoning_summary_delta import ReasoningSummaryDelta logger = make_logger(__name__) @@ -94,7 +69,7 @@ class SyncStreamingModel(Model): .. deprecated:: Prefer the unified harness surface for new OpenAI Agents integrations: wrap a ``Runner.run_streamed`` result in - ``agentex.lib.adk.providers._modules.openai_turn.OpenAITurn`` and drive + ``agentex.lib.adk._modules._openai_turn.OpenAITurn`` and drive delivery + tracing through ``UnifiedEmitter`` (see the ``060_harness_openai`` / ``130_harness_openai`` / ``140_harness_openai`` tutorials). This per-model tracing wrapper predates the harness and is @@ -405,317 +380,8 @@ def get_model(self, model_name: Optional[str] = None) -> Model: return wrapped_model -def _extract_tool_call_info(tool_call_item: Any) -> tuple[str, str, dict[str, Any]]: - """ - Extract call_id, tool_name, and tool_arguments from a tool call item. - Args: - tool_call_item: The tool call item to process - Returns: - A tuple of (call_id, tool_name, tool_arguments) - """ - # Generic handling for different tool call types - # Try 'call_id' first, then 'id', then generate placeholder - if hasattr(tool_call_item, "call_id"): - call_id = tool_call_item.call_id - elif hasattr(tool_call_item, "id"): - call_id = tool_call_item.id - else: - call_id = f"unknown_call_{id(tool_call_item)}" - - if isinstance(tool_call_item, ResponseFunctionWebSearch): - tool_name = "web_search" - tool_arguments = {"action": tool_call_item.action.model_dump(), "status": tool_call_item.status} - elif isinstance(tool_call_item, ResponseCodeInterpreterToolCall): - tool_name = "code_interpreter" - tool_arguments = {"code": tool_call_item.code, "status": tool_call_item.status} - elif isinstance(tool_call_item, ResponseFunctionToolCall): - # Handle standard function tool calls - tool_name = tool_call_item.name - # Handle the arguments field which might be a string or None - if tool_call_item.arguments: - if isinstance(tool_call_item.arguments, str): - import json - - tool_arguments = json.loads(tool_call_item.arguments) if tool_call_item.arguments else {} - else: - tool_arguments = tool_call_item.arguments - else: - tool_arguments = {} - else: - # Generic handling for any tool call type - tool_name = getattr(tool_call_item, "name", type(tool_call_item).__name__) - # Handle the arguments field which might be a string or None - if hasattr(tool_call_item, "arguments"): - arguments = tool_call_item.arguments - if isinstance(arguments, str): - import json - - tool_arguments = json.loads(arguments) if arguments else {} - elif arguments is None: - tool_arguments = {} - else: - tool_arguments = arguments - else: - tool_arguments = tool_call_item.model_dump() - - return call_id, tool_name, tool_arguments - - -def _extract_tool_response_info(tool_map: dict[str, Any], tool_output_item: Any) -> tuple[str, str, str]: - """ - Extract call_id, tool_name, and content from a tool output item. - Args: - tool_map: Dictionary mapping call_ids to tool names - tool_output_item: The tool output item to process - Returns: - A tuple of (call_id, tool_name, content) - """ - - # Handle different formats of tool_output_item - if isinstance(tool_output_item, dict): - call_id = tool_output_item.get("call_id", tool_output_item.get("id", f"unknown_call_{id(tool_output_item)}")) - content = tool_output_item.get("output", str(tool_output_item)) - else: - # Try to get call_id from attributes - if hasattr(tool_output_item, "call_id"): - call_id = tool_output_item.call_id - elif hasattr(tool_output_item, "id"): - call_id = tool_output_item.id - else: - call_id = f"unknown_call_{id(tool_output_item)}" - - # Get content - if hasattr(tool_output_item, "output"): - content = tool_output_item.output - else: - content = str(tool_output_item) - - # Get tool name from map - tool_name = tool_map.get(call_id, "unknown_tool") - - return call_id, tool_name, content - - -async def convert_openai_to_agentex_events(stream_response): - """Convert OpenAI streaming events to AgentEx TaskMessageUpdate events with reasoning support. - - This is an enhanced version of the base converter that includes support for: - - Reasoning content deltas (for o1 models) - - Reasoning summary deltas (for o1 models) - - Args: - stream_response: An async iterator of OpenAI streaming events - Yields: - TaskMessageUpdate: AgentEx streaming events (StreamTaskMessageDelta, StreamTaskMessageFull, or StreamTaskMessageDone) - """ - - tool_map = {} - event_count = 0 - message_index = 0 # Track message index for proper sequencing - seen_tool_output = False # Track if we've seen tool output to know when final text starts - item_id_to_index = {} # Map item_id to message index - item_id_to_type = {} # Map item_id to content type (text, reasoning_content, reasoning_summary) - - async for event in stream_response: - event_count += 1 - - # Check for raw response events which contain the actual OpenAI streaming events - if hasattr(event, "type") and event.type == "raw_response_event": - if hasattr(event, "data"): - raw_event = event.data - - # Check for ResponseOutputItemAddedEvent which signals a new message starting - if isinstance(raw_event, ResponseOutputItemAddedEvent): - # Don't increment here - we'll increment when we see the actual text delta - # This is just a signal that a new message is starting - pass - - # Handle item completion - send done event to close the message - elif isinstance(raw_event, ResponseOutputItemDoneEvent): - item_id = raw_event.item.id - if item_id in item_id_to_index: - # Get the message type to decide whether to send done event - message_type = item_id_to_type.get(item_id, "text") - - # Don't send done events for reasoning content/summary - # They just end with their last delta - if message_type not in ("reasoning_content", "reasoning_summary"): - yield StreamTaskMessageDone( - type="done", - index=item_id_to_index[item_id], - ) - - # Skip reasoning summary part added events - we handle them on delta - elif isinstance(raw_event, ResponseReasoningSummaryPartAddedEvent): - pass - - # Handle reasoning summary text delta events - elif isinstance(raw_event, ResponseReasoningSummaryTextDeltaEvent): - item_id = raw_event.item_id - summary_index = raw_event.summary_index - - # If this is a new item_id we haven't seen, create a new message - if item_id and item_id not in item_id_to_index: - message_index += 1 - item_id_to_index[item_id] = message_index - item_id_to_type[item_id] = "reasoning_summary" - - # Send a start event for this new reasoning summary message - yield StreamTaskMessageStart( - type="start", - index=item_id_to_index[item_id], - content=TextContent( - type="text", - author="agent", - content="", # Start with empty content - ), - ) - - # Use the index for this item_id - current_index = item_id_to_index.get(item_id, message_index) - - # Yield reasoning summary delta - yield StreamTaskMessageDelta( - type="delta", - index=current_index, - delta=ReasoningSummaryDelta( - type="reasoning_summary", - summary_index=summary_index, - summary_delta=raw_event.delta, - ), - ) - - # Handle reasoning summary text done events - elif isinstance(raw_event, ResponseReasoningSummaryTextDoneEvent): - # We do NOT close the streaming context here - # as there can be multiple reasoning summaries. - # The context will be closed when the entire - # output item is done (ResponseOutputItemDoneEvent) - pass - - # Handle reasoning content text delta events - elif isinstance(raw_event, ResponseReasoningTextDeltaEvent): - item_id = raw_event.item_id - content_index = raw_event.content_index - - # If this is a new item_id we haven't seen, create a new message - if item_id and item_id not in item_id_to_index: - message_index += 1 - item_id_to_index[item_id] = message_index - item_id_to_type[item_id] = "reasoning_content" - - # Send a start event for this new reasoning content message - yield StreamTaskMessageStart( - type="start", - index=item_id_to_index[item_id], - content=TextContent( - type="text", - author="agent", - content="", # Start with empty content - ), - ) - - # Use the index for this item_id - current_index = item_id_to_index.get(item_id, message_index) - - # Yield reasoning content delta - yield StreamTaskMessageDelta( - type="delta", - index=current_index, - delta=ReasoningContentDelta( - type="reasoning_content", - content_index=content_index, - content_delta=raw_event.delta, - ), - ) - - # Handle reasoning content text done events - elif isinstance(raw_event, ResponseReasoningTextDoneEvent): - # We do NOT close the streaming context here - # as there can be multiple reasoning content texts. - # The context will be closed when the entire - # output item is done (ResponseOutputItemDoneEvent) - pass - - # Check if this is a text delta event from OpenAI - elif isinstance(raw_event, ResponseTextDeltaEvent): - # Check if this event has an item_id - item_id = getattr(raw_event, "item_id", None) - - # If this is a new item_id we haven't seen, it's a new message - if item_id and item_id not in item_id_to_index: - # Check if this is truly a NEW text message after tools - # We need to differentiate between the first text and the final text after tools - if seen_tool_output: - # This is the final text message after tool execution - message_index += 1 - item_id_to_index[item_id] = message_index - else: - item_id_to_index[item_id] = message_index - - item_id_to_type[item_id] = "text" - - # Send a start event with empty content for this new text message - yield StreamTaskMessageStart( - type="start", - index=item_id_to_index[item_id], - content=TextContent( - type="text", - author="agent", - content="", # Start with empty content, deltas will fill it - ), - ) - - # Use the index for this item_id - current_index = item_id_to_index.get(item_id, message_index) - - delta_message = StreamTaskMessageDelta( - type="delta", - index=current_index, - delta=TextDelta( - type="text", - text_delta=raw_event.delta, - ), - ) - yield delta_message - - elif hasattr(event, "type") and event.type == "run_item_stream_event": - # Skip reasoning_item events - they're handled via raw_response_event above - if hasattr(event, "item") and event.item.type == "reasoning_item": - continue - - # Check for tool_call_item type (this is when a tool is being called) - elif hasattr(event, "item") and event.item.type == "tool_call_item": - # Extract tool call information using the helper method - call_id, tool_name, tool_arguments = _extract_tool_call_info(event.item.raw_item) - tool_map[call_id] = tool_name - tool_request_content = ToolRequestContent( - tool_call_id=call_id, - name=tool_name, - arguments=tool_arguments, - author="agent", - ) - message_index += 1 # Increment for new message - yield StreamTaskMessageFull( - index=message_index, - type="full", - content=tool_request_content, - ) - - # Check for tool_call_output_item type (this is when a tool returns output) - elif hasattr(event, "item") and event.item.type == "tool_call_output_item": - # Extract tool response information using the helper method - call_id, tool_name, content = _extract_tool_response_info(tool_map, event.item.raw_item) - tool_response_content = ToolResponseContent( - tool_call_id=call_id, - name=tool_name, - content=content, - author="agent", - ) - message_index += 1 # Increment for new message - seen_tool_output = True # Mark that we've seen tool output so next text gets new index - yield StreamTaskMessageFull( - type="full", - index=message_index, - content=tool_response_content, - ) +# The OpenAI streaming tap ``convert_openai_to_agentex_events`` now lives in +# ``agentex.lib.adk._modules._openai_sync``; re-exported here for back-compat. +from agentex.lib.adk._modules._openai_sync import ( # noqa: E402 + convert_openai_to_agentex_events as convert_openai_to_agentex_events, +) diff --git a/src/agentex/lib/core/harness/auto_send.py b/src/agentex/lib/core/harness/auto_send.py index 6d3883fa6..b645a4aae 100644 --- a/src/agentex/lib/core/harness/auto_send.py +++ b/src/agentex/lib/core/harness/auto_send.py @@ -56,7 +56,7 @@ async def auto_send( callers can back-date message timestamps. Mirrors the open/close/stream_update pattern from - src/agentex/lib/adk/_modules/_langgraph_async.py: + src/agentex/lib/adk/_modules/_langgraph_turn.py: - context opened via streaming_task_message_context(...).__aenter__() - context closed via ctx.close() (not __aexit__) - deltas pushed as StreamTaskMessageDelta with parent_task_message set @@ -110,8 +110,8 @@ async def _close_all() -> None: ctx = ctx_map.get(event.index) if ctx is not None and event.delta is not None: # Reconstruct the delta with parent_task_message set from - # the context's task_message (mirrors _langgraph_async.py - # lines 72-78 and 117-127). + # the context's task_message (mirrors the legacy + # _langgraph_async streaming helper, now in _langgraph_turn.py). delta_with_parent = StreamTaskMessageDelta( parent_task_message=ctx.task_message, delta=event.delta, diff --git a/tests/lib/adk/providers/test_openai_turn.py b/tests/lib/adk/providers/test_openai_turn.py index 47a9ba9fe..d5ad2b5c8 100644 --- a/tests/lib/adk/providers/test_openai_turn.py +++ b/tests/lib/adk/providers/test_openai_turn.py @@ -25,7 +25,7 @@ def _import_target(): - from agentex.lib.adk.providers._modules.openai_turn import ( + from agentex.lib.adk._modules._openai_turn import ( OpenAITurn, _aggregate_usage, openai_usage_to_turn_usage, @@ -219,7 +219,7 @@ def stream_events(self): # monkeypatch that converter below so this can yield canonical events. return _canonical_stream(canonical) - import agentex.lib.adk.providers._modules.openai_turn as mod + import agentex.lib.adk._modules._openai_turn as mod async def _passthrough(stream): async for e in stream: diff --git a/tests/lib/adk/test_langgraph_async.py b/tests/lib/adk/test_langgraph_async.py index 682bd43bc..ebe215a15 100644 --- a/tests/lib/adk/test_langgraph_async.py +++ b/tests/lib/adk/test_langgraph_async.py @@ -26,7 +26,7 @@ from agentex.types.text_content import TextContent from agentex.types.task_message_delta import TextDelta from agentex.types.task_message_update import StreamTaskMessageDelta -from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events +from agentex.lib.adk._modules._langgraph_turn import stream_langgraph_events TASK_ID = "task-test" diff --git a/tests/lib/adk/test_langgraph_sync.py b/tests/lib/adk/test_langgraph_sync.py index 6b71a2264..9e8c6e4f0 100644 --- a/tests/lib/adk/test_langgraph_sync.py +++ b/tests/lib/adk/test_langgraph_sync.py @@ -1,8 +1,12 @@ -"""Tests for the sync LangGraph -> Agentex stream event converter. +"""Tests for the sync LangGraph -> Agentex path. Covers: -- Basic text, tool call, and tool response emission -- on_final_ai_message callback for usage capture +- The bare converter ``convert_langgraph_to_agentex_events``: + * Basic text, tool call, and tool response emission + * on_final_ai_message callback for usage capture +- The unified sync (HTTP ACP) path ``UnifiedEmitter.yield_turn(LangGraphTurn(...))``: + * Passthrough: yield_turn events equal LangGraphTurn(stream).events + * Span derivation from Full tool events with a fake tracer NOTE: langchain_core imports must be deferred to test-function scope because conftest.py stubs out ``langchain_core.messages`` with MagicMock for ADK @@ -13,15 +17,20 @@ import sys from typing import Any, AsyncIterator +from datetime import datetime, timezone +from dataclasses import field, dataclass import pytest +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.task_message_update import ( StreamTaskMessageFull, ) from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._langgraph_sync import convert_langgraph_to_agentex_events +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn # --------------------------------------------------------------------------- # Helpers @@ -223,3 +232,162 @@ def _cb(msg): # The tool call Full event is emitted before the callback fires assert yield_order.index("event") < yield_order.index("callback") + + +# --------------------------------------------------------------------------- +# Unified sync path: LangGraphTurn + UnifiedEmitter.yield_turn +# +# Verifies the sync (HTTP ACP) delivery surface: +# 1. Passthrough: events from emitter.yield_turn(LangGraphTurn(stream)) equal +# LangGraphTurn(stream).events collected directly. +# 2. Span derivation: with trace_id + fake tracer, tool spans are derived from +# the event stream. +# --------------------------------------------------------------------------- + + +@dataclass +class _FakeTracingBackend: + spans_started: list[dict[str, Any]] = field(default_factory=list) + spans_ended: list[str] = field(default_factory=list) + + async def start_span(self, **kw) -> Any: + from agentex.types.span import Span + + sp = Span( + id=f"span-{len(self.spans_started) + 1}", + trace_id=kw.get("trace_id", "trace1"), + name=kw.get("name", ""), + start_time=datetime.now(tz=timezone.utc), + ) + self.spans_started.append(kw) + return sp + + async def end_span(self, *, trace_id: str, span: Any) -> None: + self.spans_ended.append(span.id if span else "") + + +class TestUnifiedSyncPathPassthrough: + async def test_yield_turn_events_equal_direct_events(self): + """Events from emitter.yield_turn(LangGraphTurn(stream)) must equal + LangGraphTurn(stream).events collected directly — the emitter must not + add, drop, or reorder events in yield mode.""" + from langchain_core.messages import AIMessage, AIMessageChunk + + chunk = AIMessageChunk(content="Hello!") + ai_msg = AIMessage(content="Hello!") + + events_raw = [ + ("messages", (chunk, {})), + ("updates", {"agent": {"messages": [ai_msg]}}), + ] + + direct = [e async for e in LangGraphTurn(_make_stream(events_raw)).events] + + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + via_emitter = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + + assert len(direct) == len(via_emitter), "yield_turn must not add or drop events relative to direct iteration" + for a, b in zip(direct, via_emitter, strict=True): + assert type(a) == type(b), f"Event type mismatch: {type(a).__name__} vs {type(b).__name__}" + + async def test_yield_turn_passes_all_event_types(self): + """Start, Delta, Done, Full — each type is preserved.""" + from langchain_core.messages import AIMessage, AIMessageChunk + + chunk = AIMessageChunk(content="hi") + tc = {"id": "c1", "name": "t", "args": {}} + ai_msg = AIMessage(content="hi", tool_calls=[tc]) + + events_raw = [ + ("messages", (chunk, {})), + ("updates", {"agent": {"messages": [ai_msg]}}), + ] + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + types = {type(e).__name__ for e in out} + # text chunk emits Start + Delta + assert "StreamTaskMessageStart" in types + assert "StreamTaskMessageDelta" in types + # tool call emits Full + assert "StreamTaskMessageFull" in types + + async def test_empty_stream_yields_no_events(self): + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream([])))] + assert out == [] + + +class TestUnifiedSyncPathSpanDerivation: + @pytest.fixture + def fake_tracer(self): + backend = _FakeTracingBackend() + tracer = SpanTracer( + trace_id="trace1", + parent_span_id=None, + task_id="t", + tracing=backend, # type: ignore[arg-type] + ) + return tracer, backend + + async def test_tool_span_derived_from_full_events(self, fake_tracer): + """SpanDeriver handles Full tool events for LangGraph. + + Full(ToolRequestContent) opens a tool span keyed by tool_call_id; + Full(ToolResponseContent) closes it, aligning LangGraph's Full-event + path with the Start+Done harnesses (pydantic-ai, openai-agents). + """ + from langchain_core.messages import AIMessage, ToolMessage + + tracer, backend = fake_tracer + tc = {"id": "c1", "name": "get_weather", "args": {"city": "Paris"}} + ai_msg = AIMessage(content="", tool_calls=[tc]) + tool_msg = ToolMessage(content="Sunny", tool_call_id="c1", name="get_weather") + + events_raw = [ + ("updates", {"agent": {"messages": [ai_msg]}}), + ("updates", {"tools": {"messages": [tool_msg]}}), + ] + + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) + _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + + assert len(backend.spans_started) == 1, "Full(ToolRequestContent) opens one tool span" + started = backend.spans_started[0] + assert started["name"] == "get_weather" + assert started["input"] == {"city": "Paris"} + + async def test_no_spans_when_no_tool_calls(self, fake_tracer): + """yield_turn with tracer but no tool calls emits no spans.""" + from langchain_core.messages import AIMessage, AIMessageChunk + + tracer, backend = fake_tracer + chunk = AIMessageChunk(content="Hello!") + ai_msg = AIMessage(content="Hello!") + + events_raw = [ + ("messages", (chunk, {})), + ("updates", {"agent": {"messages": [ai_msg]}}), + ] + + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) + _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + + assert backend.spans_started == [], "No tool spans when there are no tool calls" + + async def test_tracer_none_means_no_spans(self): + """With tracer=False, no spans should be emitted.""" + from langchain_core.messages import AIMessage, ToolMessage + + tc = {"id": "c1", "name": "t", "args": {}} + ai_msg = AIMessage(content="", tool_calls=[tc]) + tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t") + + events_raw = [ + ("updates", {"agent": {"messages": [ai_msg]}}), + ("updates", {"tools": {"messages": [tool_msg]}}), + ] + + emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=False) + _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] + # No assertion on spans since tracer=False means emitter.tracer is None + assert emitter.tracer is None diff --git a/tests/lib/adk/test_langgraph_sync_unified.py b/tests/lib/adk/test_langgraph_sync_unified.py deleted file mode 100644 index 7c0eba58e..000000000 --- a/tests/lib/adk/test_langgraph_sync_unified.py +++ /dev/null @@ -1,213 +0,0 @@ -"""Unified sync path tests for LangGraphTurn + UnifiedEmitter. - -Verifies: -1. Passthrough: events from emitter.yield_turn(LangGraphTurn(stream)) equal - LangGraphTurn(stream).events collected directly. -2. Span derivation: with trace_id + fake tracer, tool spans are derived from - the event stream. - -NOTE: langchain_core imports are deferred to test scope because conftest.py -stubs ``langchain_core.messages`` with MagicMock. -""" - -from __future__ import annotations - -import sys -from typing import Any -from datetime import datetime, timezone -from dataclasses import field, dataclass - -import pytest - -from agentex.lib.core.harness.tracer import SpanTracer -from agentex.lib.core.harness.emitter import UnifiedEmitter -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn - -# --------------------------------------------------------------------------- -# Remove conftest stubs so real langchain_core types are used -# --------------------------------------------------------------------------- - - -@pytest.fixture(autouse=True) -def _real_langchain_core(): - stub_keys = [k for k in sys.modules if k.startswith("langchain_core") or k.startswith("langgraph")] - saved = {k: sys.modules.pop(k) for k in stub_keys} - import importlib - - importlib.import_module("langchain_core.messages") - yield - sys.modules.update(saved) - - -# --------------------------------------------------------------------------- -# Helpers -# --------------------------------------------------------------------------- - - -def _make_stream(events: list[tuple[str, Any]]): - async def _gen(): - for e in events: - yield e - - return _gen() - - -# --------------------------------------------------------------------------- -# Fake SpanTracer -# --------------------------------------------------------------------------- - - -@dataclass -class _FakeTracingBackend: - spans_started: list[dict[str, Any]] = field(default_factory=list) - spans_ended: list[str] = field(default_factory=list) - - async def start_span(self, **kw) -> Any: - from agentex.types.span import Span - - sp = Span( - id=f"span-{len(self.spans_started) + 1}", - trace_id=kw.get("trace_id", "trace1"), - name=kw.get("name", ""), - start_time=datetime.now(tz=timezone.utc), - ) - self.spans_started.append(kw) - return sp - - async def end_span(self, *, trace_id: str, span: Any) -> None: - self.spans_ended.append(span.id if span else "") - - -# --------------------------------------------------------------------------- -# Tests -# --------------------------------------------------------------------------- - - -class TestPassthrough: - async def test_yield_turn_events_equal_direct_events(self): - """Events from emitter.yield_turn(LangGraphTurn(stream)) must equal - LangGraphTurn(stream).events collected directly — the emitter must not - add, drop, or reorder events in yield mode.""" - from langchain_core.messages import AIMessage, AIMessageChunk - - chunk = AIMessageChunk(content="Hello!") - ai_msg = AIMessage(content="Hello!") - - # Build two identical streams - events_raw = [ - ("messages", (chunk, {})), - ("updates", {"agent": {"messages": [ai_msg]}}), - ] - - # Direct collection - direct = [e async for e in LangGraphTurn(_make_stream(events_raw)).events] - - # Via emitter.yield_turn - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - via_emitter = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - - assert len(direct) == len(via_emitter), "yield_turn must not add or drop events relative to direct iteration" - for a, b in zip(direct, via_emitter, strict=True): - assert type(a) == type(b), f"Event type mismatch: {type(a).__name__} vs {type(b).__name__}" - - async def test_yield_turn_passes_all_event_types(self): - """Start, Delta, Done, Full — each type is preserved.""" - from langchain_core.messages import AIMessage, AIMessageChunk - - chunk = AIMessageChunk(content="hi") - tc = {"id": "c1", "name": "t", "args": {}} - ai_msg = AIMessage(content="hi", tool_calls=[tc]) - - events_raw = [ - ("messages", (chunk, {})), - ("updates", {"agent": {"messages": [ai_msg]}}), - ] - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - types = {type(e).__name__ for e in out} - # text chunk emits Start + Delta - assert "StreamTaskMessageStart" in types - assert "StreamTaskMessageDelta" in types - # tool call emits Full - assert "StreamTaskMessageFull" in types - - async def test_empty_stream_yields_no_events(self): - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - out = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream([])))] - assert out == [] - - -class TestSpanDerivation: - @pytest.fixture - def fake_tracer(self): - backend = _FakeTracingBackend() - tracer = SpanTracer( - trace_id="trace1", - parent_span_id=None, - task_id="t", - tracing=backend, # type: ignore[arg-type] - ) - return tracer, backend - - async def test_tool_span_derived_from_full_events(self, fake_tracer): - """SpanDeriver handles Full tool events for LangGraph. - - Full(ToolRequestContent) opens a tool span keyed by tool_call_id; - Full(ToolResponseContent) closes it, aligning LangGraph's Full-event - path with the Start+Done harnesses (pydantic-ai, openai-agents). - """ - from langchain_core.messages import AIMessage, ToolMessage - - tracer, backend = fake_tracer - tc = {"id": "c1", "name": "get_weather", "args": {"city": "Paris"}} - ai_msg = AIMessage(content="", tool_calls=[tc]) - tool_msg = ToolMessage(content="Sunny", tool_call_id="c1", name="get_weather") - - events_raw = [ - ("updates", {"agent": {"messages": [ai_msg]}}), - ("updates", {"tools": {"messages": [tool_msg]}}), - ] - - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) - _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - - assert len(backend.spans_started) == 1, "Full(ToolRequestContent) opens one tool span" - started = backend.spans_started[0] - assert started["name"] == "get_weather" - assert started["input"] == {"city": "Paris"} - - async def test_no_spans_when_no_tool_calls(self, fake_tracer): - """yield_turn with tracer but no tool calls emits no spans.""" - from langchain_core.messages import AIMessage, AIMessageChunk - - tracer, backend = fake_tracer - chunk = AIMessageChunk(content="Hello!") - ai_msg = AIMessage(content="Hello!") - - events_raw = [ - ("messages", (chunk, {})), - ("updates", {"agent": {"messages": [ai_msg]}}), - ] - - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer) - _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - - assert backend.spans_started == [], "No tool spans when there are no tool calls" - - async def test_tracer_none_means_no_spans(self): - """With tracer=False, no spans should be emitted.""" - from langchain_core.messages import AIMessage, ToolMessage - - tc = {"id": "c1", "name": "t", "args": {}} - ai_msg = AIMessage(content="", tool_calls=[tc]) - tool_msg = ToolMessage(content="ok", tool_call_id="c1", name="t") - - events_raw = [ - ("updates", {"agent": {"messages": [ai_msg]}}), - ("updates", {"tools": {"messages": [tool_msg]}}), - ] - - emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=False) - _ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))] - # No assertion on spans since tracer=False means emitter.tracer is None - assert emitter.tracer is None diff --git a/tests/lib/adk/test_pydantic_ai_async.py b/tests/lib/adk/test_pydantic_ai_async.py index 737c07f1f..4ab468152 100644 --- a/tests/lib/adk/test_pydantic_ai_async.py +++ b/tests/lib/adk/test_pydantic_ai_async.py @@ -36,7 +36,7 @@ from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent from agentex.types.reasoning_content_delta import ReasoningContentDelta -from agentex.lib.adk._modules._pydantic_ai_async import stream_pydantic_ai_events +from agentex.lib.adk._modules._pydantic_ai_turn import stream_pydantic_ai_events TASK_ID = "task_test" diff --git a/tests/lib/adk/test_pydantic_ai_sync.py b/tests/lib/adk/test_pydantic_ai_sync.py index 3d6d7beba..be5cf4f15 100644 --- a/tests/lib/adk/test_pydantic_ai_sync.py +++ b/tests/lib/adk/test_pydantic_ai_sync.py @@ -1,4 +1,12 @@ -"""Tests for the Pydantic AI -> Agentex stream event converter.""" +"""Tests for the sync Pydantic AI -> Agentex path. + +Covers: +- The bare converter ``convert_pydantic_ai_to_agentex_events`` (text/thinking/ + tool-call streaming and arg-delta handling). +- The unified sync (HTTP ACP) path ``UnifiedEmitter.yield_turn(PydanticAITurn(...))``: + * Passthrough: yield_turn events equal PydanticAITurn(stream).events + * Span derivation (tool + reasoning) with a fake tracing backend +""" from __future__ import annotations @@ -25,6 +33,8 @@ FunctionToolResultEvent, ) +from agentex.lib.core.harness import UnifiedEmitter +from tests.lib.core.harness._fakes import FakeTracing from agentex.types.reasoning_content import ReasoningContent from agentex.types.task_message_delta import TextDelta from agentex.types.tool_request_delta import ToolRequestDelta @@ -42,6 +52,7 @@ _args_delta_to_str, convert_pydantic_ai_to_agentex_events, ) +from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn async def _aiter(events: list[Any]) -> AsyncIterator[Any]: @@ -471,3 +482,157 @@ async def on_result_async(event: AgentRunResultEvent) -> None: assert len(awaited) == 1 assert awaited[0].result.output == "async_output" + + +# --------------------------------------------------------------------------- +# Unified sync path: PydanticAITurn + UnifiedEmitter.yield_turn +# +# Exercises the path documented in _pydantic_ai_sync.py under +# "Recommended: unified surface": +# - events forwarded by yield_turn equal PydanticAITurn(stream).events (passthrough) +# - with a trace context + fake tracing backend, tool / reasoning spans are derived +# --------------------------------------------------------------------------- + + +class TestUnifiedSyncPathPassthrough: + """The events forwarded by yield_turn are identical to PydanticAITurn.events.""" + + async def test_text_stream_passthrough(self): + raw_events = [ + PartStartEvent(index=0, part=TextPart(content="")), + PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hello")), + PartEndEvent(index=0, part=TextPart(content="hello")), + ] + + turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + direct = await _collect(turn_a.events) + + turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + via_emitter = await _collect(emitter.yield_turn(turn_b)) + + assert len(via_emitter) == len(direct) + for a, b in zip(via_emitter, direct): + assert type(a) is type(b) + assert a.model_dump() == b.model_dump() + + async def test_tool_call_stream_passthrough(self): + raw_events = [ + PartStartEvent(index=0, part=ToolCallPart(tool_name="Bash", args=None, tool_call_id="c1")), + PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"cmd":"ls"}')), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c1"), + ), + ] + + turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + direct = await _collect(turn_a.events) + + turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) + via_emitter = await _collect(emitter.yield_turn(turn_b)) + + assert len(via_emitter) == len(direct) + for a, b in zip(via_emitter, direct): + assert type(a) is type(b) + assert a.model_dump() == b.model_dump() + + +class TestUnifiedSyncPathSpanDerivation: + """With trace context + fake tracing, spans are derived from the stream.""" + + async def test_tool_span_opened_and_closed(self): + """A tool call produces start_span + end_span on the fake tracing backend.""" + tool_events = [ + PartStartEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="call_1"), + ), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="call_1"), + ), + FunctionToolResultEvent( + part=ToolReturnPart(tool_name="Bash", content="files", tool_call_id="call_1"), + ), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) + + events = await _collect(emitter.yield_turn(turn)) + + assert len(events) >= 2, "at least Start(tool) + Done + Full(response)" + assert len(fake.started) == 1, "one tool span opened" + assert len(fake.ended) == 1, "one tool span closed" + span_name, parent_id, span_input = fake.started[0] + assert span_name == "Bash" + assert parent_id == "p" + closed_name, closed_output = fake.ended[0] + assert closed_name == "Bash" + + async def test_reasoning_span_opened_and_closed(self): + """A thinking/reasoning block produces start_span + end_span.""" + reasoning_events = [ + PartStartEvent(index=0, part=ThinkingPart(content="")), + PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="let me think")), + PartEndEvent(index=0, part=ThinkingPart(content="let me think")), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(reasoning_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) + + await _collect(emitter.yield_turn(turn)) + + assert len(fake.started) == 1, "one reasoning span opened" + assert len(fake.ended) == 1, "one reasoning span closed" + span_name, parent_id, _ = fake.started[0] + assert span_name == "reasoning" + assert parent_id == "p" + + async def test_no_trace_id_means_no_spans(self): + """When trace_id is None, no spans are derived even with a fake tracing backend.""" + raw_events = [ + PartStartEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c2"), + ), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c2"), + ), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, tracing=fake) + + await _collect(emitter.yield_turn(turn)) + + assert fake.started == [], "no spans when trace_id is absent" + assert fake.ended == [] + + async def test_tracer_false_suppresses_spans_even_with_trace_id(self): + """tracer=False disables span derivation regardless of trace_id.""" + raw_events = [ + PartStartEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c3"), + ), + PartEndEvent( + index=0, + part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c3"), + ), + ] + + fake = FakeTracing() + turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") + emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracer=False, tracing=fake) + + await _collect(emitter.yield_turn(turn)) + + assert fake.started == [] + assert fake.ended == [] diff --git a/tests/lib/adk/test_pydantic_ai_sync_unified.py b/tests/lib/adk/test_pydantic_ai_sync_unified.py deleted file mode 100644 index 2e58ef5b2..000000000 --- a/tests/lib/adk/test_pydantic_ai_sync_unified.py +++ /dev/null @@ -1,191 +0,0 @@ -"""Tests for the unified sync (HTTP ACP) path: PydanticAITurn + UnifiedEmitter. - -Exercises the path documented in _pydantic_ai_sync.py under "Recommended: unified surface": -- events forwarded by yield_turn equal PydanticAITurn(stream).events (passthrough) -- with a trace context + fake tracing backend, tool spans are derived (start_span / end_span called) -- with a trace context + fake tracing backend, reasoning spans are derived -""" - -from __future__ import annotations - -from typing import Any, AsyncIterator - -from pydantic_ai.run import AgentRunResult, AgentRunResultEvent -from pydantic_ai.usage import RunUsage -from pydantic_ai.messages import ( - TextPart, - PartEndEvent, - ThinkingPart, - ToolCallPart, - TextPartDelta, - PartDeltaEvent, - PartStartEvent, - ThinkingPartDelta, - ToolCallPartDelta, -) - -from agentex.lib.core.harness import UnifiedEmitter -from tests.lib.core.harness._fakes import FakeTracing -from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn - - -async def _aiter(events: list[Any]) -> AsyncIterator[Any]: - for e in events: - yield e - - -async def _collect(stream: AsyncIterator[Any]) -> list[Any]: - return [e async for e in stream] - - -def _make_result_event(usage: RunUsage | None = None) -> AgentRunResultEvent: - result = AgentRunResult(output="done", _output_tool_name=None) - if usage is not None: - result._state.usage = usage - return AgentRunResultEvent(result=result) - - -class TestUnifiedSyncPathPassthrough: - """The events forwarded by yield_turn are identical to PydanticAITurn.events.""" - - async def test_text_stream_passthrough(self): - raw_events = [ - PartStartEvent(index=0, part=TextPart(content="")), - PartDeltaEvent(index=0, delta=TextPartDelta(content_delta="hello")), - PartEndEvent(index=0, part=TextPart(content="hello")), - ] - - turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - direct = await _collect(turn_a.events) - - turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - via_emitter = await _collect(emitter.yield_turn(turn_b)) - - assert len(via_emitter) == len(direct) - for a, b in zip(via_emitter, direct): - assert type(a) is type(b) - assert a.model_dump() == b.model_dump() - - async def test_tool_call_stream_passthrough(self): - raw_events = [ - PartStartEvent(index=0, part=ToolCallPart(tool_name="Bash", args=None, tool_call_id="c1")), - PartDeltaEvent(index=0, delta=ToolCallPartDelta(args_delta='{"cmd":"ls"}')), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c1"), - ), - ] - - turn_a = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - direct = await _collect(turn_a.events) - - turn_b = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None) - via_emitter = await _collect(emitter.yield_turn(turn_b)) - - assert len(via_emitter) == len(direct) - for a, b in zip(via_emitter, direct): - assert type(a) is type(b) - assert a.model_dump() == b.model_dump() - - -class TestUnifiedSyncPathSpanDerivation: - """With trace context + fake tracing, spans are derived from the stream.""" - - async def test_tool_span_opened_and_closed(self): - """A tool call produces start_span + end_span on the fake tracing backend.""" - from pydantic_ai.messages import ToolReturnPart, FunctionToolResultEvent - - tool_events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="call_1"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="call_1"), - ), - FunctionToolResultEvent( - part=ToolReturnPart(tool_name="Bash", content="files", tool_call_id="call_1"), - ), - ] - - fake = FakeTracing() - turn = PydanticAITurn(_aiter(tool_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) - - events = await _collect(emitter.yield_turn(turn)) - - assert len(events) >= 2, "at least Start(tool) + Done + Full(response)" - assert len(fake.started) == 1, "one tool span opened" - assert len(fake.ended) == 1, "one tool span closed" - span_name, parent_id, span_input = fake.started[0] - assert span_name == "Bash" - assert parent_id == "p" - closed_name, closed_output = fake.ended[0] - assert closed_name == "Bash" - - async def test_reasoning_span_opened_and_closed(self): - """A thinking/reasoning block produces start_span + end_span.""" - reasoning_events = [ - PartStartEvent(index=0, part=ThinkingPart(content="")), - PartDeltaEvent(index=0, delta=ThinkingPartDelta(content_delta="let me think")), - PartEndEvent(index=0, part=ThinkingPart(content="let me think")), - ] - - fake = FakeTracing() - turn = PydanticAITurn(_aiter(reasoning_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracing=fake) - - await _collect(emitter.yield_turn(turn)) - - assert len(fake.started) == 1, "one reasoning span opened" - assert len(fake.ended) == 1, "one reasoning span closed" - span_name, parent_id, _ = fake.started[0] - assert span_name == "reasoning" - assert parent_id == "p" - - async def test_no_trace_id_means_no_spans(self): - """When trace_id is None, no spans are derived even with a fake tracing backend.""" - raw_events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c2"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c2"), - ), - ] - - fake = FakeTracing() - turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id=None, parent_span_id=None, tracing=fake) - - await _collect(emitter.yield_turn(turn)) - - assert fake.started == [], "no spans when trace_id is absent" - assert fake.ended == [] - - async def test_tracer_false_suppresses_spans_even_with_trace_id(self): - """tracer=False disables span derivation regardless of trace_id.""" - raw_events = [ - PartStartEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args={"cmd": "ls"}, tool_call_id="c3"), - ), - PartEndEvent( - index=0, - part=ToolCallPart(tool_name="Bash", args='{"cmd":"ls"}', tool_call_id="c3"), - ), - ] - - fake = FakeTracing() - turn = PydanticAITurn(_aiter(raw_events), model="openai:gpt-4o") - emitter = UnifiedEmitter(task_id="t", trace_id="tr", parent_span_id="p", tracer=False, tracing=fake) - - await _collect(emitter.yield_turn(turn)) - - assert fake.started == [] - assert fake.ended == [] diff --git a/tests/lib/core/harness/conformance/runner.py b/tests/lib/core/harness/conformance/runner.py index ffe680c10..6eb0007ce 100644 --- a/tests/lib/core/harness/conformance/runner.py +++ b/tests/lib/core/harness/conformance/runner.py @@ -43,8 +43,8 @@ because: - StreamingTaskMessageContext.close() persists initial_content when no deltas have been streamed, so the message IS correctly persisted. - - It mirrors the pattern already used by the real _langgraph_async.py harness, - keeping behavioural parity. + - It mirrors the pattern already used by the real langgraph streaming helper + (now in _langgraph_turn.py), keeping behavioural parity. - Switching to adk.messages.create would require an additional injectable dependency, adding surface area for no observable benefit. The conformance test treats this as an ACCEPTABLE envelope difference: at the diff --git a/tests/lib/core/harness/test_harness_langgraph_temporal.py b/tests/lib/core/harness/test_harness_langgraph_temporal.py index 1a094a33c..219e92229 100644 --- a/tests/lib/core/harness/test_harness_langgraph_temporal.py +++ b/tests/lib/core/harness/test_harness_langgraph_temporal.py @@ -1,7 +1,7 @@ """Integration test: Temporal channel with a LangGraph agent. -The Temporal LangGraph agent pattern uses ``emit_langgraph_messages`` (from -``_langgraph_messages.py``) inside a Temporal activity. That module is not +The Temporal LangGraph agent pattern uses ``emit_langgraph_messages`` (now in +``_langgraph_sync.py``) inside a Temporal activity. That helper is not yet unified onto the harness surface (it has its own Redis-streaming code). This test file verifies the LangGraph Temporal agent's streaming behavior using @@ -43,8 +43,7 @@ from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent -from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn -from agentex.lib.adk._modules._langgraph_async import stream_langgraph_events +from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn, stream_langgraph_events # --------------------------------------------------------------------------- # Remove conftest stubs so real langchain_core types are used From df56e1fd9f91299af272f93ffc0c9a63a93b0fe7 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 20:06:53 -0400 Subject: [PATCH 07/12] test(harness): add sync/async/temporal integration suites for openai, claude_code, codex Brings integration-test parity across all five harnesses (was pydantic-ai + langgraph only): 9 new test_harness__{sync,async,temporal}.py suites built on the shared _fakes, with native-stream shapes drawn from each harness's turn + conformance tests. Extends the harness-integration.yml live-matrix to all five harnesses and generalizes the trigger glob. Temporal suites assert the auto_send delivery + created_at threading (no harness has a separate temporal stream helper), documented per file. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/harness-integration.yml | 12 +- .../harness/test_harness_claude_code_async.py | 247 ++++++++++++++ .../harness/test_harness_claude_code_sync.py | 302 ++++++++++++++++ .../test_harness_claude_code_temporal.py | 183 ++++++++++ .../core/harness/test_harness_codex_async.py | 227 ++++++++++++ .../core/harness/test_harness_codex_sync.py | 277 +++++++++++++++ .../harness/test_harness_codex_temporal.py | 180 ++++++++++ .../core/harness/test_harness_openai_async.py | 304 +++++++++++++++++ .../core/harness/test_harness_openai_sync.py | 322 ++++++++++++++++++ .../harness/test_harness_openai_temporal.py | 195 +++++++++++ 10 files changed, 2243 insertions(+), 6 deletions(-) create mode 100644 tests/lib/core/harness/test_harness_claude_code_async.py create mode 100644 tests/lib/core/harness/test_harness_claude_code_sync.py create mode 100644 tests/lib/core/harness/test_harness_claude_code_temporal.py create mode 100644 tests/lib/core/harness/test_harness_codex_async.py create mode 100644 tests/lib/core/harness/test_harness_codex_sync.py create mode 100644 tests/lib/core/harness/test_harness_codex_temporal.py create mode 100644 tests/lib/core/harness/test_harness_openai_async.py create mode 100644 tests/lib/core/harness/test_harness_openai_sync.py create mode 100644 tests/lib/core/harness/test_harness_openai_temporal.py diff --git a/.github/workflows/harness-integration.yml b/.github/workflows/harness-integration.yml index 075ee5cf3..ab20929a8 100644 --- a/.github/workflows/harness-integration.yml +++ b/.github/workflows/harness-integration.yml @@ -7,8 +7,7 @@ on: paths: - "src/agentex/lib/core/harness/**" - "src/agentex/lib/adk/_modules/**" - - "tests/lib/core/harness/test_harness_pydantic_ai_*.py" - - "tests/lib/core/harness/test_harness_langgraph_*.py" + - "tests/lib/core/harness/test_harness_*.py" - ".github/workflows/harness-integration.yml" jobs: @@ -34,14 +33,15 @@ jobs: run: ./scripts/test tests/lib/core/harness/ -v # Offline harness integration tests (sync / async / temporal channels) for each - # migrated harness. These use fake streams / TestModel + fake streaming/tracing - # and require no live infrastructure. Future harness migration PRs (6-8) add - # their harness to the matrix below and their test paths to the triggers above. + # harness. These use fake streams / TestModel + fake streaming/tracing and + # require no live infrastructure. All five harnesses are now covered; the + # trigger above uses a `test_harness_*.py` glob so new suites are picked up + # automatically. live-matrix: runs-on: ubuntu-latest strategy: matrix: - harness: [pydantic_ai, langgraph] + harness: [pydantic_ai, langgraph, openai, claude_code, codex] channel: [sync, async, temporal] fail-fast: false name: ${{ matrix.harness }}-${{ matrix.channel }} diff --git a/tests/lib/core/harness/test_harness_claude_code_async.py b/tests/lib/core/harness/test_harness_claude_code_async.py new file mode 100644 index 000000000..7902971fb --- /dev/null +++ b/tests/lib/core/harness/test_harness_claude_code_async.py @@ -0,0 +1,247 @@ +"""Integration test: async (Redis-streaming) channel with a claude-code turn. + +Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + ClaudeCodeTurn) +with hand-built claude-code ``stream-json`` envelopes and a fake streaming +backend so the test runs fully offline (no claude-code CLI subprocess, no +Redis, no Agentex server). + +Native envelope shapes are copied verbatim from the claude-code turn test and +conformance fixtures (assistant tool_use -> Start(ToolRequestContent)+Done; +user tool_result -> Full(ToolResponseContent); assistant text -> +Start(TextContent)+Delta+Done; result envelope -> usage). + +What is tested +-------------- +- auto_send pushes the correct message contexts: tool_request + tool_response + + text (in that order). +- TurnResult.final_text equals the final assistant text. +- TurnResult.usage reflects the claude-code ``result`` envelope (input/output + tokens, cost, num_llm_calls from num_turns). +- With a SpanTracer + fake tracing, a tool span is derived on the async path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual Redis streaming. +- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle. +- A real claude-code CLI subprocess / live model behaviour. + +See also: test_harness_claude_code_sync.py and test_harness_claude_code_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator + +import pytest + +from agentex.types.task_message import TaskMessage +from tests.lib.core.harness._fakes import FakeTracing +from agentex.lib.core.harness.types import TurnResult +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn + +# --------------------------------------------------------------------------- +# Native claude-code envelope fixtures +# --------------------------------------------------------------------------- + + +def _tool_then_text_envelopes() -> list[dict[str, Any]]: + return [ + { + "type": "assistant", + "message": { + "content": [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"path": "/workspace/README.md"}, + } + ] + }, + }, + { + "type": "user", + "message": { + "content": [ + { + "type": "tool_result", + "tool_use_id": "call_read", + "content": "# My Project — temperature 72F", + } + ] + }, + }, + { + "type": "assistant", + "message": {"content": [{"type": "text", "text": "The project file says 72F."}]}, + }, + { + "type": "result", + "usage": {"input_tokens": 200, "output_tokens": 80}, + "cost_usd": 0.015, + "num_turns": 2, + }, + ] + + +async def _aiter(envelopes: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in envelopes: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None: + self.sink = sink + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + self.sink.append(("open", self.ctype, self.task_message.content)) + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + self.sink.append(("close", self.ctype)) + + async def stream_update(self, update: Any) -> Any: + self.sink.append(("delta", self.ctype, update)) + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.sink: list[Any] = [] + self.messages_opened: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + return _FakeCtx(self.sink, ctype, initial_content) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_auto_send_turn( + envelopes: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> tuple[TurnResult, _FakeStreaming]: + fake_streaming = _FakeStreaming() + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = ClaudeCodeTurn(_aiter(envelopes)) + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestAsyncAutoSendMessageOrder: + async def test_tool_request_pushed_before_tool_response(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + + async def test_text_pushed_last(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert types[-1] == "text", f"Expected last type=text, got {types}" + + +class TestAsyncAutoSendContent: + async def test_tool_request_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)] + assert len(tool_reqs) == 1 + assert tool_reqs[0].name == "Read" + + async def test_tool_response_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)] + assert len(tool_resps) == 1 + assert "72F" in str(tool_resps[0].content) + + async def test_tool_call_ids_match(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id == "call_read" + + +class TestAsyncAutoSendFinalTextAndUsage: + async def test_final_text_matches_last_text(self) -> None: + result, _ = await _run_auto_send_turn(_tool_then_text_envelopes()) + assert result.final_text == "The project file says 72F." + + async def test_usage_from_result_envelope(self) -> None: + """TurnResult.usage reflects the claude-code result envelope.""" + result, _ = await _run_auto_send_turn(_tool_then_text_envelopes()) + assert result.usage is not None + assert result.usage.input_tokens == 200 + assert result.usage.output_tokens == 80 + assert result.usage.total_tokens == 280 + assert result.usage.cost_usd == pytest.approx(0.015) + assert result.usage.num_llm_calls == 2 + + async def test_context_lifecycle_open_then_close(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_envelopes()) + opens = [e for e in fake_streaming.sink if e[0] == "open"] + closes = [e for e in fake_streaming.sink if e[0] == "close"] + assert len(opens) == len(closes) + assert len(opens) == len(fake_streaming.messages_opened) + + +class TestAsyncAutoSendSpanDerivation: + async def test_tool_span_derived_on_async_path(self) -> None: + fake_tracing = FakeTracing() + await _run_auto_send_turn( + _tool_then_text_envelopes(), + trace_id="trace1", + parent_span_id="parent", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert fake_tracing.started[0][0] == "Read" + assert len(fake_tracing.ended) == 1 + assert "72F" in str(fake_tracing.ended[0][1]) diff --git a/tests/lib/core/harness/test_harness_claude_code_sync.py b/tests/lib/core/harness/test_harness_claude_code_sync.py new file mode 100644 index 000000000..3876fdb87 --- /dev/null +++ b/tests/lib/core/harness/test_harness_claude_code_sync.py @@ -0,0 +1,302 @@ +"""Integration test: sync (HTTP-yield) channel with a claude-code turn. + +Exercises the unified harness surface (UnifiedEmitter.yield_turn + ClaudeCodeTurn) +with hand-built claude-code ``stream-json`` envelopes so the test runs fully +offline (no claude-code CLI subprocess, no API keys, no Agentex server). + +Native stream shapes +--------------------- +``ClaudeCodeTurn`` consumes an async iterator of raw claude-code stream-json +envelopes (str | dict). The envelope shapes used here are copied verbatim from +the claude-code turn test (tests/lib/adk/test_claude_code_turn.py) and the +claude-code conformance fixtures +(tests/lib/core/harness/conformance/test_claude_code_conformance.py): + + assistant text block -> Start(TextContent) + Delta + Done + assistant tool_use -> Start(ToolRequestContent) + Done + user tool_result -> Full(ToolResponseContent) + assistant thinking -> Start(ReasoningContent) + Delta + Done + +What is tested +-------------- +- The sync handler forwards StreamTaskMessage* events in canonical order: + tool_request (Start+Done) -> tool_response (Full) -> text. +- The tool_response carries the tool_result content, keyed by tool_use_id. +- With a trace_id + fake tracing, the SpanDeriver opens a tool span on + Done(tool_request) and closes it on the matching Full(tool_response), and + opens/closes a reasoning span for a thinking block. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual HTTP streaming over the ACP sync endpoint. +- A real claude-code CLI subprocess / live model behaviour. +- The full FastACP request/response lifecycle. + +See also: test_harness_claude_code_async.py and test_harness_claude_code_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator, override + +from tests.lib.core.harness._fakes import FakeTracing +from agentex.lib.core.harness.types import OpenSpan, CloseSpan +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn + +# --------------------------------------------------------------------------- +# Native claude-code envelope fixtures (copied from the turn + conformance tests) +# --------------------------------------------------------------------------- + + +def _tool_then_text_envelopes() -> list[dict[str, Any]]: + """tool_use -> tool_result -> final text, then a result envelope with usage.""" + return [ + { + "type": "assistant", + "message": { + "content": [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"path": "/workspace/README.md"}, + } + ] + }, + }, + { + "type": "user", + "message": { + "content": [ + { + "type": "tool_result", + "tool_use_id": "call_read", + "content": "# My Project — temperature 72F", + } + ] + }, + }, + { + "type": "assistant", + "message": {"content": [{"type": "text", "text": "The project file says 72F."}]}, + }, + { + "type": "result", + "usage": {"input_tokens": 100, "output_tokens": 50}, + "cost_usd": 0.01, + "num_turns": 2, + }, + ] + + +def _thinking_envelopes() -> list[dict[str, Any]]: + return [ + { + "type": "assistant", + "message": { + "content": [ + {"type": "thinking", "thinking": "Let me think.\nStep 1: check the facts."}, + {"type": "text", "text": "Here is my answer."}, + ] + }, + }, + {"type": "result", "usage": {"input_tokens": 10, "output_tokens": 5}}, + ] + + +async def _aiter(envelopes: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in envelopes: + yield e + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_yield_turn( + envelopes: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> list[Any]: + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = ClaudeCodeTurn(_aiter(envelopes)) + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + ) + return [ev async for ev in emitter.yield_turn(turn)] + + +# --------------------------------------------------------------------------- +# Tests: event order and content +# --------------------------------------------------------------------------- + + +class TestSyncYieldEventOrder: + async def test_tool_request_precedes_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + content_types = [ + getattr(getattr(ev, "content", None), "type", None) + for ev in events + if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull)) + ] + assert "tool_request" in content_types + assert "tool_response" in content_types + assert content_types.index("tool_request") < content_types.index("tool_response") + + async def test_text_appears_after_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + tool_resp_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageFull) + and getattr(getattr(ev, "content", None), "type", None) == "tool_response" + ) + text_start_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text" + ) + assert tool_resp_pos < text_start_pos + + async def test_tool_response_carries_result_keyed_by_tool_use_id(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + full_responses = [ + ev.content + for ev in events + if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent) + ] + assert len(full_responses) == 1 + tool_response = full_responses[0] + assert isinstance(tool_response, ToolResponseContent) + assert tool_response.tool_call_id == "call_read" + assert "72F" in str(tool_response.content) + + async def test_tool_request_is_read(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + tool_reqs = [ + ev.content + for ev in events + if isinstance(getattr(ev, "content", None), ToolRequestContent) + ] + assert any(isinstance(c, ToolRequestContent) and c.name == "Read" for c in tool_reqs) + + async def test_every_start_has_matching_done(self) -> None: + events = await _run_yield_turn(_tool_then_text_envelopes()) + starts = {ev.index for ev in events if isinstance(ev, StreamTaskMessageStart)} + dones = {ev.index for ev in events if isinstance(ev, StreamTaskMessageDone)} + assert starts == dones, f"Unmatched Start/Done indices: starts={starts} dones={dones}" + + +# --------------------------------------------------------------------------- +# Tests: span derivation on the yield path +# --------------------------------------------------------------------------- + + +class TestSyncYieldSpanDerivation: + async def test_tool_span_opened_and_closed(self) -> None: + """Done(tool_request) opens a tool span; Full(tool_response) closes it.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_envelopes(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert len(fake_tracing.ended) == 1 + name, parent_id, _ = fake_tracing.started[0] + assert name == "Read" + assert parent_id == "parent-span" + + async def test_tool_span_output_is_tool_result(self) -> None: + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_envelopes(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + name, output = fake_tracing.ended[0] + assert name == "Read" + assert "72F" in str(output) + + async def test_reasoning_span_for_thinking_block(self) -> None: + """A thinking block opens and closes a reasoning span.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _thinking_envelopes(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert fake_tracing.started_names == ["reasoning"] + assert len(fake_tracing.ended) == 1 + + async def test_no_trace_id_means_no_spans(self) -> None: + fake_tracing = FakeTracing() + turn = ClaudeCodeTurn(_aiter(_tool_then_text_envelopes())) + emitter = UnifiedEmitter(task_id="task1", trace_id=None, parent_span_id=None, tracing=fake_tracing) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_tracer_false_suppresses_spans(self) -> None: + fake_tracing = FakeTracing() + turn = ClaudeCodeTurn(_aiter(_tool_then_text_envelopes())) + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent-span", + tracer=False, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_span_signal_types(self) -> None: + received_signals: list[Any] = [] + + class _RecordingTracer(SpanTracer): + @override + async def handle(self, signal: Any) -> None: + received_signals.append(signal) + await super().handle(signal) + + fake_tracing = FakeTracing() + tracer = _RecordingTracer( + trace_id="trace1", + parent_span_id="parent", + task_id="task1", + tracing=fake_tracing, + ) + turn = ClaudeCodeTurn(_aiter(_tool_then_text_envelopes())) + emitter = UnifiedEmitter(task_id="task1", trace_id="trace1", parent_span_id="parent", tracer=tracer) + [_ async for _ in emitter.yield_turn(turn)] + + tool_signals = [s for s in received_signals if getattr(s, "name", None) == "Read"] + assert len(tool_signals) >= 1 + assert isinstance(received_signals[0], OpenSpan) + assert any(isinstance(s, CloseSpan) for s in received_signals) diff --git a/tests/lib/core/harness/test_harness_claude_code_temporal.py b/tests/lib/core/harness/test_harness_claude_code_temporal.py new file mode 100644 index 000000000..b643f0d20 --- /dev/null +++ b/tests/lib/core/harness/test_harness_claude_code_temporal.py @@ -0,0 +1,183 @@ +"""Integration test: Temporal channel with a claude-code turn, offline. + +The claude-code tap is a pure library adapter (no Temporal-specific helper such +as langgraph's ``stream_langgraph_events``). In a Temporal deployment the +claude-code CLI runs inside a Temporal activity and the resulting canonical +stream is delivered via the SAME ``UnifiedEmitter.auto_send_turn`` path used by +the non-temporal async channel. The only temporal-specific concern at the +harness boundary is that the activity stamps messages with a deterministic +``created_at`` (e.g. ``workflow.now()``) for replay determinism. + +This suite therefore exercises the auto_send path inside an activity-style call +plus the temporal-only contract: ``created_at`` is threaded through to every +streaming context. The native claude-code envelope shapes are copied verbatim +from the claude-code turn test / conformance fixtures. + +What is tested +-------------- +- The canonical message sequence (tool_request -> tool_response -> text) is + delivered via auto_send_turn, exactly as inside a Temporal activity. +- ``created_at`` passed to ``auto_send_turn`` is forwarded to every + ``streaming_task_message_context`` call (deterministic timestamping). +- Final text + usage from the result envelope are returned. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Temporal scheduling / durability / replay behaviour. +- Redis streaming (requires a running Redis instance). +- A real claude-code CLI subprocess / live model behaviour. + +See also: test_harness_claude_code_sync.py and test_harness_claude_code_async.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator +from datetime import datetime, timezone + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn + + +def _tool_then_text_envelopes() -> list[dict[str, Any]]: + return [ + { + "type": "assistant", + "message": { + "content": [ + { + "type": "tool_use", + "id": "call_read", + "name": "Read", + "input": {"path": "/workspace/README.md"}, + } + ] + }, + }, + { + "type": "user", + "message": { + "content": [ + {"type": "tool_result", "tool_use_id": "call_read", "content": "# My Project — 72F"} + ] + }, + }, + { + "type": "assistant", + "message": {"content": [{"type": "text", "text": "The project file says 72F."}]}, + }, + {"type": "result", "usage": {"input_tokens": 50, "output_tokens": 20}, "num_turns": 2}, + ] + + +async def _aiter(envelopes: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in envelopes: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend that records created_at +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, ctype: str, initial_content: Any) -> None: + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + pass + + async def stream_update(self, update: Any) -> Any: + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.messages_opened: list[Any] = [] + self.created_ats: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + self.created_ats.append(created_at) + return _FakeCtx(ctype, initial_content) + + +async def _run_activity( + envelopes: list[dict[str, Any]], created_at: datetime | None +) -> tuple[Any, _FakeStreaming]: + fake_streaming = _FakeStreaming() + turn = ClaudeCodeTurn(_aiter(envelopes)) + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn, created_at=created_at) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTemporalActivityDelivery: + async def test_canonical_sequence_delivered(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=None) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + assert types[-1] == "text" + + async def test_tool_round_trip_keyed_correctly(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=None) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id == "call_read" + + async def test_final_text_and_usage(self) -> None: + result, _ = await _run_activity(_tool_then_text_envelopes(), created_at=None) + assert result.final_text == "The project file says 72F." + assert result.usage.input_tokens == 50 + assert result.usage.num_llm_calls == 2 + + +class TestTemporalCreatedAtThreading: + async def test_created_at_threaded_to_all_contexts(self) -> None: + fixed = datetime(2026, 6, 22, 12, 0, 0, tzinfo=timezone.utc) + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=fixed) + assert len(fake_streaming.created_ats) == len(fake_streaming.messages_opened) + assert all(ts == fixed for ts in fake_streaming.created_ats), ( + f"Expected every context stamped with {fixed}, got {fake_streaming.created_ats}" + ) + + async def test_default_created_at_is_none(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_envelopes(), created_at=None) + assert all(ts is None for ts in fake_streaming.created_ats) + + async def test_created_at_deterministic_across_runs(self) -> None: + fixed = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + _, first = await _run_activity(_tool_then_text_envelopes(), created_at=fixed) + _, second = await _run_activity(_tool_then_text_envelopes(), created_at=fixed) + assert first.created_ats == second.created_ats diff --git a/tests/lib/core/harness/test_harness_codex_async.py b/tests/lib/core/harness/test_harness_codex_async.py new file mode 100644 index 000000000..5f0e60bb3 --- /dev/null +++ b/tests/lib/core/harness/test_harness_codex_async.py @@ -0,0 +1,227 @@ +"""Integration test: async (Redis-streaming) channel with a codex turn. + +Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + CodexTurn) +with hand-built codex ``exec --json`` event dicts and a fake streaming backend +so the test runs fully offline (no codex CLI subprocess, no Redis, no Agentex +server). + +Native event shapes are copied verbatim from the codex turn test / conformance +fixtures (command_execution -> tool round-trip; agent_message -> text; +turn.completed -> usage). + +What is tested +-------------- +- auto_send pushes the correct message contexts: tool_request + tool_response + + text (in that order). +- TurnResult.final_text equals the final agent_message text. +- TurnResult.usage reflects the codex ``turn.completed`` usage (input/output/ + total tokens) plus the locally-counted num_tool_calls. +- With a SpanTracer + fake tracing, a tool span is derived on the async path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual Redis streaming. +- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle. +- A real codex CLI subprocess / live model behaviour. + +See also: test_harness_codex_sync.py and test_harness_codex_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator + +from agentex.types.task_message import TaskMessage +from tests.lib.core.harness._fakes import FakeTracing +from agentex.lib.core.harness.types import TurnResult +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._codex_turn import CodexTurn + +# --------------------------------------------------------------------------- +# Native codex event fixtures +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[dict[str, Any]]: + return [ + {"type": "thread.started", "thread_id": "thread-abc"}, + { + "type": "item.started", + "item": {"id": "tool1", "type": "command_execution", "command": "cat weather.txt"}, + }, + { + "type": "item.completed", + "item": { + "id": "tool1", + "type": "command_execution", + "command": "cat weather.txt", + "aggregated_output": "sunny and 72F", + "exit_code": 0, + }, + }, + {"type": "item.started", "item": {"id": "msg1", "type": "agent_message", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "msg1", "type": "agent_message", "text": "The weather is sunny and 72F."}, + }, + { + "type": "turn.completed", + "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28}, + }, + ] + + +async def _aiter(events: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None: + self.sink = sink + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + self.sink.append(("open", self.ctype, self.task_message.content)) + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + self.sink.append(("close", self.ctype)) + + async def stream_update(self, update: Any) -> Any: + self.sink.append(("delta", self.ctype, update)) + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.sink: list[Any] = [] + self.messages_opened: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + return _FakeCtx(self.sink, ctype, initial_content) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_auto_send_turn( + events: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> tuple[TurnResult, _FakeStreaming]: + fake_streaming = _FakeStreaming() + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = CodexTurn(_aiter(events), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestAsyncAutoSendMessageOrder: + async def test_tool_request_pushed_before_tool_response(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + + async def test_text_pushed_last(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert types[-1] == "text", f"Expected last type=text, got {types}" + + +class TestAsyncAutoSendContent: + async def test_tool_response_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)] + assert len(tool_resps) == 1 + assert "72F" in str(tool_resps[0].content) + + async def test_tool_call_ids_match(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id + + +class TestAsyncAutoSendFinalTextAndUsage: + async def test_final_text_matches_last_text(self) -> None: + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.final_text == "The weather is sunny and 72F." + + async def test_usage_from_turn_completed(self) -> None: + """TurnResult.usage reflects the codex turn.completed usage + tool count.""" + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.usage is not None + assert result.usage.input_tokens == 20 + assert result.usage.output_tokens == 8 + assert result.usage.total_tokens == 28 + assert result.usage.model == "o4-mini" + assert result.usage.num_tool_calls == 1 + assert result.usage.num_llm_calls == 1 + + async def test_context_lifecycle_open_then_close(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + opens = [e for e in fake_streaming.sink if e[0] == "open"] + closes = [e for e in fake_streaming.sink if e[0] == "close"] + assert len(opens) == len(closes) + assert len(opens) == len(fake_streaming.messages_opened) + + +class TestAsyncAutoSendSpanDerivation: + async def test_tool_span_derived_on_async_path(self) -> None: + fake_tracing = FakeTracing() + await _run_auto_send_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert len(fake_tracing.ended) == 1 + assert "72F" in str(fake_tracing.ended[0][1]) diff --git a/tests/lib/core/harness/test_harness_codex_sync.py b/tests/lib/core/harness/test_harness_codex_sync.py new file mode 100644 index 000000000..192a36a6b --- /dev/null +++ b/tests/lib/core/harness/test_harness_codex_sync.py @@ -0,0 +1,277 @@ +"""Integration test: sync (HTTP-yield) channel with a codex turn. + +Exercises the unified harness surface (UnifiedEmitter.yield_turn + CodexTurn) +with hand-built codex ``exec --json`` event dicts so the test runs fully offline +(no codex CLI subprocess, no API keys, no Agentex server). + +Native stream shapes +--------------------- +``CodexTurn`` consumes an async iterator of raw codex events (str | dict). The +event shapes used here are copied verbatim from the codex turn test +(tests/lib/adk/test_codex_turn.py) and the codex conformance fixtures +(tests/lib/core/harness/conformance/test_codex_conformance.py): + + command_execution item -> Start(ToolRequestContent) + Done + Full(ToolResponseContent) + agent_message item -> Start(TextContent) + ... + Full/Done + reasoning item -> Start(ReasoningContent) + Full(ReasoningContent) + turn.completed -> usage + +Reasoning note +-------------- +The codex converter emits reasoning as Start(ReasoningContent) + Full(ReasoningContent) +with NO Done event. The SpanDeriver opens a reasoning span on Start but only +closes it on a Done; with no Done, the reasoning span is closed by flush() at +end of stream (is_complete=False). This is asserted explicitly below rather than +glossed over — it is a real codex-specific quirk, not a missing channel. + +What is tested +-------------- +- The sync handler forwards StreamTaskMessage* events in canonical order: + tool_request (Start+Done) -> tool_response (Full) -> text. +- The tool_response carries the command output, keyed by item id. +- With a trace_id + fake tracing, a tool span is opened on Done(tool_request) + and closed on the matching Full(tool_response), and a reasoning span is + opened (closed-by-flush) for a reasoning item. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual HTTP streaming over the ACP sync endpoint. +- A real codex CLI subprocess / live model behaviour. +- The full FastACP request/response lifecycle. + +See also: test_harness_codex_async.py and test_harness_codex_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator, override + +from tests.lib.core.harness._fakes import FakeTracing +from agentex.lib.core.harness.types import OpenSpan, CloseSpan +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageFull, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._codex_turn import CodexTurn + +# --------------------------------------------------------------------------- +# Native codex event fixtures (copied from the turn + conformance tests) +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[dict[str, Any]]: + """A command_execution tool round-trip followed by a final text reply.""" + return [ + {"type": "thread.started", "thread_id": "thread-abc"}, + {"type": "turn.started"}, + { + "type": "item.started", + "item": {"id": "tool1", "type": "command_execution", "command": "cat weather.txt"}, + }, + { + "type": "item.completed", + "item": { + "id": "tool1", + "type": "command_execution", + "command": "cat weather.txt", + "aggregated_output": "sunny and 72F", + "exit_code": 0, + }, + }, + {"type": "item.started", "item": {"id": "msg1", "type": "agent_message", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "msg1", "type": "agent_message", "text": "The weather is sunny and 72F."}, + }, + { + "type": "turn.completed", + "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28}, + }, + ] + + +def _reasoning_events() -> list[dict[str, Any]]: + return [ + {"type": "thread.started", "thread_id": "thread-reason"}, + {"type": "item.started", "item": {"id": "r1", "type": "reasoning", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "r1", "type": "reasoning", "text": "Step 1: analyze\nStep 2: solve"}, + }, + {"type": "item.started", "item": {"id": "msg2", "type": "agent_message", "text": ""}}, + {"type": "item.completed", "item": {"id": "msg2", "type": "agent_message", "text": "42"}}, + {"type": "turn.completed", "usage": {"input_tokens": 30, "output_tokens": 20, "total_tokens": 50}}, + ] + + +async def _aiter(events: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_yield_turn( + events: list[dict[str, Any]], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> list[Any]: + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = CodexTurn(_aiter(events), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + ) + return [ev async for ev in emitter.yield_turn(turn)] + + +# --------------------------------------------------------------------------- +# Tests: event order and content +# --------------------------------------------------------------------------- + + +class TestSyncYieldEventOrder: + async def test_tool_request_precedes_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + content_types = [ + getattr(getattr(ev, "content", None), "type", None) + for ev in events + if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull)) + ] + assert "tool_request" in content_types + assert "tool_response" in content_types + assert content_types.index("tool_request") < content_types.index("tool_response") + + async def test_text_appears_after_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + tool_resp_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageFull) + and getattr(getattr(ev, "content", None), "type", None) == "tool_response" + ) + text_start_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text" + ) + assert tool_resp_pos < text_start_pos + + async def test_tool_response_carries_command_output(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + full_responses = [ + ev.content + for ev in events + if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent) + ] + assert len(full_responses) == 1 + tool_response = full_responses[0] + assert isinstance(tool_response, ToolResponseContent) + assert "72F" in str(tool_response.content) + + async def test_tool_request_present(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + tool_reqs = [ + ev.content for ev in events if isinstance(getattr(ev, "content", None), ToolRequestContent) + ] + assert len(tool_reqs) == 1 + + +# --------------------------------------------------------------------------- +# Tests: span derivation on the yield path +# --------------------------------------------------------------------------- + + +class TestSyncYieldSpanDerivation: + async def test_tool_span_opened_and_closed(self) -> None: + """Done(tool_request) opens a tool span; Full(tool_response) closes it.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert len(fake_tracing.ended) == 1 + _name, parent_id, _input = fake_tracing.started[0] + assert parent_id == "parent-span" + + async def test_tool_span_output_is_command_output(self) -> None: + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + _name, output = fake_tracing.ended[0] + assert "72F" in str(output) + + async def test_reasoning_span_opened_then_flush_closed(self) -> None: + """A codex reasoning item emits Start+Full (no Done): the reasoning span + opens and is closed by flush() at end of stream (is_complete=False).""" + received_signals: list[Any] = [] + + class _RecordingTracer(SpanTracer): + @override + async def handle(self, signal: Any) -> None: + received_signals.append(signal) + await super().handle(signal) + + fake_tracing = FakeTracing() + tracer = _RecordingTracer( + trace_id="trace1", + parent_span_id="parent-span", + task_id="task1", + tracing=fake_tracing, + ) + turn = CodexTurn(_aiter(_reasoning_events()), model="o4-mini") + emitter = UnifiedEmitter(task_id="task1", trace_id="trace1", parent_span_id="parent-span", tracer=tracer) + [_ async for _ in emitter.yield_turn(turn)] + + opens = [s for s in received_signals if isinstance(s, OpenSpan) and s.kind == "reasoning"] + closes = [s for s in received_signals if isinstance(s, CloseSpan) and str(s.key).startswith("reasoning:")] + assert len(opens) == 1, "Reasoning Start must open exactly one reasoning span" + assert len(closes) == 1, "Reasoning span must be closed (by flush) at end of stream" + assert closes[0].is_complete is False, "No Done event, so the reasoning span is flush-closed as incomplete" + + async def test_no_trace_id_means_no_spans(self) -> None: + fake_tracing = FakeTracing() + turn = CodexTurn(_aiter(_tool_then_text_events()), model="o4-mini") + emitter = UnifiedEmitter(task_id="task1", trace_id=None, parent_span_id=None, tracing=fake_tracing) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_tracer_false_suppresses_spans(self) -> None: + fake_tracing = FakeTracing() + turn = CodexTurn(_aiter(_tool_then_text_events()), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent-span", + tracer=False, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] diff --git a/tests/lib/core/harness/test_harness_codex_temporal.py b/tests/lib/core/harness/test_harness_codex_temporal.py new file mode 100644 index 000000000..0af0b862b --- /dev/null +++ b/tests/lib/core/harness/test_harness_codex_temporal.py @@ -0,0 +1,180 @@ +"""Integration test: Temporal channel with a codex turn, offline. + +The codex tap is a pure library adapter (subprocess/sandbox provisioning lives +in the golden agent; there is no codex-specific temporal helper like langgraph's +``stream_langgraph_events``). In a Temporal deployment the codex CLI runs inside +a Temporal activity and the resulting canonical stream is delivered via the SAME +``UnifiedEmitter.auto_send_turn`` path used by the non-temporal async channel. +The only temporal-specific concern at the harness boundary is that the activity +stamps messages with a deterministic ``created_at`` (e.g. ``workflow.now()``) +for replay determinism. + +This suite exercises the auto_send path inside an activity-style call plus the +temporal-only contract: ``created_at`` is threaded through to every streaming +context. The native codex event shapes are copied verbatim from the codex turn +test / conformance fixtures. + +What is tested +-------------- +- The canonical message sequence (tool_request -> tool_response -> text) is + delivered via auto_send_turn, exactly as inside a Temporal activity. +- ``created_at`` passed to ``auto_send_turn`` is forwarded to every + ``streaming_task_message_context`` call (deterministic timestamping). +- Final text + usage from turn.completed are returned. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Temporal scheduling / durability / replay behaviour. +- Redis streaming (requires a running Redis instance). +- A real codex CLI subprocess / live model behaviour. + +See also: test_harness_codex_sync.py and test_harness_codex_async.py. +""" + +from __future__ import annotations + +from typing import Any, AsyncIterator +from datetime import datetime, timezone + +from agentex.types.task_message import TaskMessage +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._codex_turn import CodexTurn + + +def _tool_then_text_events() -> list[dict[str, Any]]: + return [ + {"type": "thread.started", "thread_id": "thread-abc"}, + { + "type": "item.started", + "item": {"id": "tool1", "type": "command_execution", "command": "cat weather.txt"}, + }, + { + "type": "item.completed", + "item": { + "id": "tool1", + "type": "command_execution", + "command": "cat weather.txt", + "aggregated_output": "sunny and 72F", + "exit_code": 0, + }, + }, + {"type": "item.started", "item": {"id": "msg1", "type": "agent_message", "text": ""}}, + { + "type": "item.completed", + "item": {"id": "msg1", "type": "agent_message", "text": "The weather is sunny and 72F."}, + }, + { + "type": "turn.completed", + "usage": {"input_tokens": 20, "output_tokens": 8, "total_tokens": 28}, + }, + ] + + +async def _aiter(events: list[dict[str, Any]]) -> AsyncIterator[dict[str, Any]]: + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend that records created_at +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, ctype: str, initial_content: Any) -> None: + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + pass + + async def stream_update(self, update: Any) -> Any: + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.messages_opened: list[Any] = [] + self.created_ats: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + self.created_ats.append(created_at) + return _FakeCtx(ctype, initial_content) + + +async def _run_activity(events: list[dict[str, Any]], created_at: datetime | None) -> tuple[Any, _FakeStreaming]: + fake_streaming = _FakeStreaming() + turn = CodexTurn(_aiter(events), model="o4-mini") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn, created_at=created_at) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTemporalActivityDelivery: + async def test_canonical_sequence_delivered(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + assert types[-1] == "text" + + async def test_tool_round_trip_keyed_correctly(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id + + async def test_final_text_and_usage(self) -> None: + result, _ = await _run_activity(_tool_then_text_events(), created_at=None) + assert result.final_text == "The weather is sunny and 72F." + assert result.usage.total_tokens == 28 + assert result.usage.num_tool_calls == 1 + + +class TestTemporalCreatedAtThreading: + async def test_created_at_threaded_to_all_contexts(self) -> None: + fixed = datetime(2026, 6, 22, 12, 0, 0, tzinfo=timezone.utc) + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert len(fake_streaming.created_ats) == len(fake_streaming.messages_opened) + assert all(ts == fixed for ts in fake_streaming.created_ats), ( + f"Expected every context stamped with {fixed}, got {fake_streaming.created_ats}" + ) + + async def test_default_created_at_is_none(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + assert all(ts is None for ts in fake_streaming.created_ats) + + async def test_created_at_deterministic_across_runs(self) -> None: + fixed = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + _, first = await _run_activity(_tool_then_text_events(), created_at=fixed) + _, second = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert first.created_ats == second.created_ats diff --git a/tests/lib/core/harness/test_harness_openai_async.py b/tests/lib/core/harness/test_harness_openai_async.py new file mode 100644 index 000000000..593142fba --- /dev/null +++ b/tests/lib/core/harness/test_harness_openai_async.py @@ -0,0 +1,304 @@ +"""Integration test: async (Redis-streaming) channel with an OpenAI-agents turn. + +Exercises the unified harness surface (UnifiedEmitter.auto_send_turn + OpenAITurn) +with hand-built canonical StreamTaskMessage* streams and a fake streaming +backend so the test runs fully offline (no API keys, no Redis, no Agentex +server). + +The canonical event shapes are copied from the OpenAI converter contract +(see tests/lib/core/harness/conformance/test_openai_conformance.py): tool calls +are Full(ToolRequestContent) + Full(ToolResponseContent); text is +Start+Delta+Done. + +What is tested +-------------- +- auto_send pushes the correct message contexts to the fake streaming backend: + tool_request + tool_response + text (in that order). +- TurnResult.final_text equals the accumulated text deltas. +- TurnResult carries a TurnUsage; via the OpenAITurn result/converter path the + aggregated token usage (input/output/total + num_llm_calls) is surfaced in + TurnResult.usage. +- With a SpanTracer + fake tracing, a tool span is derived on the async path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual Redis streaming. +- The ACP on_task_event_send / on_task_create / on_task_cancel lifecycle. +- A real Runner.run_streamed execution / live OpenAI model behaviour. + +See also: test_harness_openai_sync.py and test_harness_openai_temporal.py. +""" + +from __future__ import annotations + +from typing import Any + +import pytest +from agents.usage import Usage + +from agentex.types.text_delta import TextDelta +from agentex.types.task_message import TaskMessage +from agentex.types.text_content import TextContent +from tests.lib.core.harness._fakes import FakeTracing +from agentex.lib.core.harness.types import TurnResult, StreamTaskMessage +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._openai_turn import OpenAITurn + +# --------------------------------------------------------------------------- +# Canonical event fixtures (copied from the OpenAI converter contract) +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[StreamTaskMessage]: + return [ + StreamTaskMessageFull( + type="full", + index=0, + content=ToolRequestContent( + type="tool_request", + author="agent", + tool_call_id="call_1", + name="get_weather", + arguments={"city": "Paris"}, + ), + ), + StreamTaskMessageFull( + type="full", + index=1, + content=ToolResponseContent( + type="tool_response", + author="agent", + tool_call_id="call_1", + name="get_weather", + content="The weather in Paris is sunny and 72F", + ), + ), + StreamTaskMessageStart( + type="start", + index=2, + content=TextContent(type="text", author="agent", content=""), + ), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="Sunny ")), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="and 72F.")), + StreamTaskMessageDone(type="done", index=2), + ] + + +async def _canonical_stream(events: list[StreamTaskMessage]): + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend (replaces adk.streaming; no Redis required) +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, sink: list[Any], ctype: str, initial_content: Any) -> None: + self.sink = sink + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + self.sink.append(("open", self.ctype, self.task_message.content)) + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + self.sink.append(("close", self.ctype)) + + async def stream_update(self, update: Any) -> Any: + self.sink.append(("delta", self.ctype, update)) + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.sink: list[Any] = [] + self.messages_opened: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + return _FakeCtx(self.sink, ctype, initial_content) + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_auto_send_turn( + events: list[StreamTaskMessage], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> tuple[TurnResult, _FakeStreaming]: + fake_streaming = _FakeStreaming() + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests: message order and content +# --------------------------------------------------------------------------- + + +class TestAsyncAutoSendMessageOrder: + async def test_tool_request_pushed_before_tool_response(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in message_types + assert message_types.index("tool_request") < message_types.index("tool_response") + + async def test_text_pushed_last(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + message_types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert message_types[-1] == "text", f"Expected last message type=text, got {message_types}" + + async def test_exactly_three_messages(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + assert len(fake_streaming.messages_opened) == 3, ( + f"Expected 3 messages, got {[getattr(m, 'type', None) for m in fake_streaming.messages_opened]}" + ) + + +class TestAsyncAutoSendContentVerification: + async def test_tool_request_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_reqs = [m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)] + assert len(tool_reqs) == 1 + assert tool_reqs[0].name == "get_weather" + + async def test_tool_response_content(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_resps = [m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)] + assert len(tool_resps) == 1 + assert "72F" in str(tool_resps[0].content) + assert tool_resps[0].name == "get_weather" + + async def test_tool_call_ids_match(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + tool_req = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolRequestContent)) + tool_resp = next(m for m in fake_streaming.messages_opened if isinstance(m, ToolResponseContent)) + assert tool_req.tool_call_id == tool_resp.tool_call_id + + +class TestAsyncAutoSendFinalTextAndUsage: + async def test_final_text_matches_deltas(self) -> None: + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.final_text == "Sunny and 72F." + + async def test_turn_result_has_usage(self) -> None: + """An injected canonical stream has no run to read usage from, so usage + carries only the model name (input_tokens stays None).""" + result, _ = await _run_auto_send_turn(_tool_then_text_events()) + assert result.usage is not None + assert result.usage.model == "gpt-4o" + + async def test_context_lifecycle_open_then_close(self) -> None: + _, fake_streaming = await _run_auto_send_turn(_tool_then_text_events()) + opens = [e for e in fake_streaming.sink if e[0] == "open"] + closes = [e for e in fake_streaming.sink if e[0] == "close"] + assert len(opens) == len(closes) == 3 + + async def test_usage_populated_from_result_path(self, monkeypatch: pytest.MonkeyPatch) -> None: + """Via the OpenAITurn result/converter path, aggregated token usage is + surfaced on TurnResult.usage after the stream is consumed. + + Mirrors the OpenAI turn test: a fake RunResultStreaming exposes + raw_responses with a Usage, and the converter is monkeypatched to a + passthrough so the canonical text stream is delivered while usage is read + from raw_responses. + """ + import agentex.lib.adk._modules._openai_turn as turn_mod + + canonical: list[StreamTaskMessage] = [ + StreamTaskMessageStart( + type="start", index=0, content=TextContent(type="text", author="agent", content="") + ), + StreamTaskMessageDelta(type="delta", index=0, delta=TextDelta(type="text", text_delta="hi")), + StreamTaskMessageDone(type="done", index=0), + ] + + class _FakeResult: + def __init__(self) -> None: + self.raw_responses = [ + type("R", (), {"usage": Usage(requests=2, input_tokens=8, output_tokens=4, total_tokens=12)})() + ] + + def stream_events(self): # type: ignore[no-untyped-def] + return _canonical_stream(canonical) + + async def _passthrough(stream): # type: ignore[no-untyped-def] + async for e in stream: + yield e + + monkeypatch.setattr(turn_mod, "convert_openai_to_agentex_events", _passthrough) + + turn = OpenAITurn(result=_FakeResult(), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=_FakeStreaming(), + ) + result = await emitter.auto_send_turn(turn) + + assert result.final_text == "hi" + assert result.usage.model == "gpt-4o" + assert result.usage.num_llm_calls == 2 + assert result.usage.input_tokens == 8 + assert result.usage.output_tokens == 4 + assert result.usage.total_tokens == 12 + + +class TestAsyncAutoSendSpanDerivation: + async def test_tool_span_derived_on_async_path(self) -> None: + fake_tracing = FakeTracing() + await _run_auto_send_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent", + fake_tracing=fake_tracing, + ) + assert len(fake_tracing.started) == 1 + assert fake_tracing.started[0][0] == "get_weather" + assert len(fake_tracing.ended) == 1 diff --git a/tests/lib/core/harness/test_harness_openai_sync.py b/tests/lib/core/harness/test_harness_openai_sync.py new file mode 100644 index 000000000..d0d25c643 --- /dev/null +++ b/tests/lib/core/harness/test_harness_openai_sync.py @@ -0,0 +1,322 @@ +"""Integration test: sync (HTTP-yield) channel with an OpenAI-agents turn. + +Exercises the unified harness surface (UnifiedEmitter.yield_turn + OpenAITurn) +with hand-built canonical StreamTaskMessage* streams so the test runs fully +offline (no API keys, no live OpenAI Agents run, no Agentex server). + +Why an injected canonical stream +-------------------------------- +OpenAI's native ``RunResultStreaming`` events are heavy SDK objects; the +``OpenAITurn`` accepts a pre-built canonical ``stream=`` of StreamTaskMessage* +events that bypasses ``convert_openai_to_agentex_events``. The shapes used here +are copied verbatim from the OpenAI converter contract exercised by +``tests/lib/core/harness/conformance/test_openai_conformance.py`` (tool calls +are Full(ToolRequestContent) + Full(ToolResponseContent); reasoning is +Start(ReasoningContent) + Delta + Done). This keeps the canonical stream +faithful to what the live converter produces while staying offline. + +What is tested +-------------- +- The sync handler forwards StreamTaskMessage* events verbatim in canonical + order: tool_request (Full) -> tool_response (Full) -> text (Start+Delta+Done). +- Final accumulated text equals the seeded text deltas. +- With a trace_id + fake tracing, a tool span is opened (OpenSpan) on + Full(ToolRequestContent) and closed (CloseSpan) on the matching + Full(ToolResponseContent), and a reasoning span is opened/closed for a + reasoning segment — proving the SpanDeriver is wired on the yield path. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Actual HTTP streaming over the ACP sync endpoint. +- A real ``Runner.run_streamed`` execution / live OpenAI model behaviour. +- ``convert_openai_to_agentex_events`` over real SDK events (covered by the + OpenAI turn + conformance suites). + +See also: test_harness_openai_async.py and test_harness_openai_temporal.py. +""" + +from __future__ import annotations + +from typing import Any, override + +from agentex.types.text_delta import TextDelta +from agentex.types.text_content import TextContent +from tests.lib.core.harness._fakes import FakeTracing +from agentex.lib.core.harness.types import OpenSpan, CloseSpan, StreamTaskMessage +from agentex.lib.core.harness.tracer import SpanTracer +from agentex.types.reasoning_content import ReasoningContent +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._openai_turn import OpenAITurn +from agentex.types.reasoning_content_delta import ReasoningContentDelta + +# --------------------------------------------------------------------------- +# Canonical event fixtures (copied from the OpenAI converter contract) +# --------------------------------------------------------------------------- + + +def _tool_then_text_events() -> list[StreamTaskMessage]: + """A tool round-trip followed by a final text reply. + + Mirrors the OpenAI converter's tool path: a Full(ToolRequestContent) for the + call and a Full(ToolResponseContent) for the result (matched by tool_call_id), + then a streamed text answer. + """ + return [ + StreamTaskMessageFull( + type="full", + index=0, + content=ToolRequestContent( + type="tool_request", + author="agent", + tool_call_id="call_1", + name="get_weather", + arguments={"city": "Paris"}, + ), + ), + StreamTaskMessageFull( + type="full", + index=1, + content=ToolResponseContent( + type="tool_response", + author="agent", + tool_call_id="call_1", + name="get_weather", + content="The weather in Paris is sunny and 72F", + ), + ), + StreamTaskMessageStart( + type="start", + index=2, + content=TextContent(type="text", author="agent", content=""), + ), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="Sunny ")), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="and 72F.")), + StreamTaskMessageDone(type="done", index=2), + ] + + +def _reasoning_events() -> list[StreamTaskMessage]: + """A reasoning segment: Start(ReasoningContent) + Delta + Done.""" + return [ + StreamTaskMessageStart( + type="start", + index=0, + content=ReasoningContent(type="reasoning", author="agent", summary=["Thinking..."]), + ), + StreamTaskMessageDelta( + type="delta", + index=0, + delta=ReasoningContentDelta(type="reasoning_content", content_index=0, content_delta="step 1"), + ), + StreamTaskMessageDone(type="done", index=0), + ] + + +async def _canonical_stream(events: list[StreamTaskMessage]): + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +async def _run_yield_turn( + events: list[StreamTaskMessage], + trace_id: str | None = None, + parent_span_id: str | None = None, + fake_tracing: FakeTracing | None = None, +) -> list[Any]: + """Drive the sync (yield) path and collect all yielded events.""" + tracer: SpanTracer | bool | None = None + if trace_id and fake_tracing is not None: + tracer = SpanTracer( + trace_id=trace_id, + parent_span_id=parent_span_id, + task_id="task1", + tracing=fake_tracing, + ) + + turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=trace_id, + parent_span_id=parent_span_id, + tracer=tracer if tracer is not None else False, + ) + return [ev async for ev in emitter.yield_turn(turn)] + + +# --------------------------------------------------------------------------- +# Tests: event order and content +# --------------------------------------------------------------------------- + + +class TestSyncYieldEventOrder: + async def test_tool_request_precedes_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + content_types = [ + getattr(getattr(ev, "content", None), "type", None) + for ev in events + if isinstance(ev, (StreamTaskMessageStart, StreamTaskMessageFull)) + ] + assert "tool_request" in content_types + assert "tool_response" in content_types + assert content_types.index("tool_request") < content_types.index("tool_response") + + async def test_text_appears_after_tool_response(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + tool_resp_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageFull) + and getattr(getattr(ev, "content", None), "type", None) == "tool_response" + ) + text_start_pos = next( + i + for i, ev in enumerate(events) + if isinstance(ev, StreamTaskMessageStart) and getattr(getattr(ev, "content", None), "type", None) == "text" + ) + assert tool_resp_pos < text_start_pos + + async def test_tool_response_carries_weather_result(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + full_responses = [ + ev + for ev in events + if isinstance(ev, StreamTaskMessageFull) and isinstance(getattr(ev, "content", None), ToolResponseContent) + ] + assert len(full_responses) == 1 + tool_response = full_responses[0].content + assert isinstance(tool_response, ToolResponseContent) + assert "72F" in str(tool_response.content) + assert tool_response.name == "get_weather" + + async def test_accumulated_text_matches_deltas(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + accumulated = "".join( + ev.delta.text_delta + for ev in events + if isinstance(ev, StreamTaskMessageDelta) and isinstance(ev.delta, TextDelta) and ev.delta.text_delta + ) + assert accumulated == "Sunny and 72F." + + async def test_every_start_has_matching_done(self) -> None: + events = await _run_yield_turn(_tool_then_text_events()) + starts = {ev.index for ev in events if isinstance(ev, StreamTaskMessageStart)} + dones = {ev.index for ev in events if isinstance(ev, StreamTaskMessageDone)} + assert starts == dones, f"Unmatched Start/Done indices: starts={starts} dones={dones}" + + +# --------------------------------------------------------------------------- +# Tests: span derivation on the yield path +# --------------------------------------------------------------------------- + + +class TestSyncYieldSpanDerivation: + async def test_tool_span_opened_and_closed(self) -> None: + """Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + + assert len(fake_tracing.started) == 1, "Expected exactly one tool span opened" + assert len(fake_tracing.ended) == 1, "Expected exactly one tool span closed" + name, parent_id, _ = fake_tracing.started[0] + assert name == "get_weather" + assert parent_id == "parent-span" + + async def test_tool_span_output_is_tool_result(self) -> None: + fake_tracing = FakeTracing() + await _run_yield_turn( + _tool_then_text_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + name, output = fake_tracing.ended[0] + assert name == "get_weather" + assert "72F" in str(output) + + async def test_reasoning_span_opened_and_closed(self) -> None: + """A reasoning segment opens and closes a reasoning span.""" + fake_tracing = FakeTracing() + await _run_yield_turn( + _reasoning_events(), + trace_id="trace1", + parent_span_id="parent-span", + fake_tracing=fake_tracing, + ) + assert fake_tracing.started_names == ["reasoning"] + assert len(fake_tracing.ended) == 1 + + async def test_no_trace_id_means_no_spans(self) -> None: + fake_tracing = FakeTracing() + turn = OpenAITurn(stream=_canonical_stream(_tool_then_text_events()), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_tracer_false_suppresses_spans(self) -> None: + fake_tracing = FakeTracing() + turn = OpenAITurn(stream=_canonical_stream(_tool_then_text_events()), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent-span", + tracer=False, + tracing=fake_tracing, + ) + [_ async for _ in emitter.yield_turn(turn)] + assert fake_tracing.started == [] + assert fake_tracing.ended == [] + + async def test_span_signal_types(self) -> None: + """The signals received by the tracer are OpenSpan then CloseSpan.""" + received_signals: list[Any] = [] + + class _RecordingTracer(SpanTracer): + @override + async def handle(self, signal: Any) -> None: + received_signals.append(signal) + await super().handle(signal) + + fake_tracing = FakeTracing() + tracer = _RecordingTracer( + trace_id="trace1", + parent_span_id="parent", + task_id="task1", + tracing=fake_tracing, + ) + turn = OpenAITurn(stream=_canonical_stream(_tool_then_text_events()), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id="trace1", + parent_span_id="parent", + tracer=tracer, + ) + [_ async for _ in emitter.yield_turn(turn)] + + assert len(received_signals) == 2 + assert isinstance(received_signals[0], OpenSpan) + assert isinstance(received_signals[1], CloseSpan) + assert received_signals[0].name == "get_weather" diff --git a/tests/lib/core/harness/test_harness_openai_temporal.py b/tests/lib/core/harness/test_harness_openai_temporal.py new file mode 100644 index 000000000..61cda37ef --- /dev/null +++ b/tests/lib/core/harness/test_harness_openai_temporal.py @@ -0,0 +1,195 @@ +"""Integration test: Temporal channel with an OpenAI-agents turn, offline. + +In a Temporal OpenAI deployment (see +examples/tutorials/10_async/10_temporal/120_openai_agents), the OpenAI Agents +SDK run executes inside a Temporal activity. Each turn's canonical stream is +delivered to Redis via the SAME ``UnifiedEmitter.auto_send_turn`` path used by +the non-temporal async channel — the only temporal-specific concern at the +harness boundary is that the activity stamps messages with a deterministic +``created_at`` (e.g. ``workflow.now()``) so replay is deterministic. + +There is no dedicated ``stream_openai_events`` temporal helper (unlike +langgraph's ``stream_langgraph_events``); the temporal OpenAI agent builds an +``OpenAITurn`` and calls ``auto_send_turn`` directly inside the activity. This +suite therefore exercises the auto_send path plus the temporal-only contract: +``created_at`` is threaded through to every streaming context. + +What is tested +-------------- +- The canonical message sequence (tool_request -> tool_response -> text) is + delivered via auto_send_turn, exactly as inside a Temporal activity. +- ``created_at`` passed to ``auto_send_turn`` is forwarded to every + ``streaming_task_message_context`` call (deterministic timestamping). +- Final text is returned from the turn. + +What is NOT covered without live infrastructure +----------------------------------------------- +- Temporal scheduling (workflow.signal -> activity dispatch). +- Temporal durability / replay behaviour. +- Redis streaming (requires a running Redis instance). +- A real Runner.run_streamed execution / live OpenAI model behaviour. + +See also: test_harness_openai_sync.py and test_harness_openai_async.py. +""" + +from __future__ import annotations + +from typing import Any +from datetime import datetime, timezone + +from agentex.types.text_delta import TextDelta +from agentex.types.task_message import TaskMessage +from agentex.types.text_content import TextContent +from agentex.lib.core.harness.types import StreamTaskMessage +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.types.task_message_update import ( + StreamTaskMessageDone, + StreamTaskMessageFull, + StreamTaskMessageDelta, + StreamTaskMessageStart, +) +from agentex.types.tool_request_content import ToolRequestContent +from agentex.types.tool_response_content import ToolResponseContent +from agentex.lib.adk._modules._openai_turn import OpenAITurn + + +def _tool_then_text_events() -> list[StreamTaskMessage]: + return [ + StreamTaskMessageFull( + type="full", + index=0, + content=ToolRequestContent( + type="tool_request", + author="agent", + tool_call_id="call_1", + name="get_weather", + arguments={"city": "Paris"}, + ), + ), + StreamTaskMessageFull( + type="full", + index=1, + content=ToolResponseContent( + type="tool_response", + author="agent", + tool_call_id="call_1", + name="get_weather", + content="The weather in Paris is sunny and 72F", + ), + ), + StreamTaskMessageStart( + type="start", + index=2, + content=TextContent(type="text", author="agent", content=""), + ), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="Sunny ")), + StreamTaskMessageDelta(type="delta", index=2, delta=TextDelta(type="text", text_delta="and 72F.")), + StreamTaskMessageDone(type="done", index=2), + ] + + +async def _canonical_stream(events: list[StreamTaskMessage]): + for e in events: + yield e + + +# --------------------------------------------------------------------------- +# Fake streaming backend that records the created_at it receives +# --------------------------------------------------------------------------- + + +class _FakeCtx: + def __init__(self, ctype: str, initial_content: Any) -> None: + self.ctype = ctype + self.task_message = TaskMessage(id="msg-1", task_id="task1", content=initial_content) + + async def __aenter__(self) -> "_FakeCtx": + return self + + async def __aexit__(self, *args: Any) -> bool: + await self.close() + return False + + async def close(self) -> None: + pass + + async def stream_update(self, update: Any) -> Any: + return update + + +class _FakeStreaming: + def __init__(self) -> None: + self.messages_opened: list[Any] = [] + self.created_ats: list[Any] = [] + + def streaming_task_message_context( + self, + task_id: str, + initial_content: Any, + streaming_mode: str = "coalesced", + created_at: Any = None, + ) -> _FakeCtx: + ctype = getattr(initial_content, "type", None) or "" + self.messages_opened.append(initial_content) + self.created_ats.append(created_at) + return _FakeCtx(ctype, initial_content) + + +async def _run_activity(events: list[StreamTaskMessage], created_at: datetime | None) -> tuple[Any, _FakeStreaming]: + """Mirror the temporal activity body: build an OpenAITurn and auto_send it.""" + fake_streaming = _FakeStreaming() + turn = OpenAITurn(stream=_canonical_stream(events), model="gpt-4o") + emitter = UnifiedEmitter( + task_id="task1", + trace_id=None, + parent_span_id=None, + tracer=False, + streaming=fake_streaming, + ) + result = await emitter.auto_send_turn(turn, created_at=created_at) + return result, fake_streaming + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +class TestTemporalActivityMessageOrder: + async def test_canonical_sequence_delivered(self) -> None: + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + types = [getattr(m, "type", None) for m in fake_streaming.messages_opened] + assert "tool_request" in types + assert "tool_response" in types + assert types.index("tool_request") < types.index("tool_response") + assert types[-1] == "text" + + async def test_final_text_returned(self) -> None: + result, _ = await _run_activity(_tool_then_text_events(), created_at=None) + assert result.final_text == "Sunny and 72F." + + +class TestTemporalCreatedAtThreading: + """created_at is forwarded to every streaming context (deterministic replay).""" + + async def test_created_at_threaded_to_all_contexts(self) -> None: + fixed = datetime(2026, 6, 22, 12, 0, 0, tzinfo=timezone.utc) + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert len(fake_streaming.created_ats) == 3 + assert all(ts == fixed for ts in fake_streaming.created_ats), ( + f"Expected every context stamped with {fixed}, got {fake_streaming.created_ats}" + ) + + async def test_default_created_at_is_none(self) -> None: + """When the activity does not stamp a timestamp, contexts see None.""" + _, fake_streaming = await _run_activity(_tool_then_text_events(), created_at=None) + assert all(ts is None for ts in fake_streaming.created_ats) + + async def test_created_at_is_deterministic_across_runs(self) -> None: + """Two runs with the same created_at stamp identical timestamps — the + determinism the Temporal channel relies on for replay.""" + fixed = datetime(2026, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + _, first = await _run_activity(_tool_then_text_events(), created_at=fixed) + _, second = await _run_activity(_tool_then_text_events(), created_at=fixed) + assert first.created_ats == second.created_ats + assert all(ts == fixed for ts in first.created_ats) From e069a11948554c86a3562b82121b69e3d6a0e95d Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 20:08:16 -0400 Subject: [PATCH 08/12] docs(harness): changelog for the harness-cleanup breaking changes; fix stale tutorial refs Documents the deprecated-tracing-handler removal and the _modules consolidation / openai relocation (with back-compat shim window) under CHANGELOG Unreleased, and updates the sync_provider deprecation note to the renamed openai tutorial slots. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 5 +++++ src/agentex/lib/adk/providers/_modules/sync_provider.py | 4 ++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8f81295a9..277809f54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ ## Unreleased +### ⚠ BREAKING CHANGES + +* **harness:** removed the deprecated bespoke tracing handlers `create_langgraph_tracing_handler` / `create_pydantic_ai_tracing_handler` (and their `AgentexLangGraphTracingHandler` / `AgentexPydanticAITracingHandler` classes) from the public `agentex.lib.adk` surface. Span tracing is now derived from the canonical `StreamTaskMessage*` stream by `UnifiedEmitter` — wrap your run in the harness `*Turn` and drive `UnifiedEmitter.yield_turn` / `auto_send_turn`. The `agentex init` templates were migrated accordingly. +* **harness:** each harness now exposes exactly `__sync.py` + `__turn.py` under `agentex.lib.adk._modules`. The OpenAI harness `OpenAITurn` and `convert_openai_to_agentex_events` moved to `agentex.lib.adk._modules._openai_turn` / `_openai_sync`; back-compat shims remain at `agentex.lib.adk.providers._modules.{openai_turn,sync_provider}` for one release. Public facade names (`stream_pydantic_ai_events`, `stream_langgraph_events`, `emit_langgraph_messages`, etc.) are unchanged. + ### Features * **tracing:** emit OTel metrics for async span queue depth, batch drain, and SGP export success/failure (HTTP status labels). Disable SDK-side recording with ``AGENTEX_TRACING_METRICS=0``. diff --git a/src/agentex/lib/adk/providers/_modules/sync_provider.py b/src/agentex/lib/adk/providers/_modules/sync_provider.py index 3836b9e02..86696a2b5 100644 --- a/src/agentex/lib/adk/providers/_modules/sync_provider.py +++ b/src/agentex/lib/adk/providers/_modules/sync_provider.py @@ -71,8 +71,8 @@ class SyncStreamingModel(Model): wrap a ``Runner.run_streamed`` result in ``agentex.lib.adk._modules._openai_turn.OpenAITurn`` and drive delivery + tracing through ``UnifiedEmitter`` (see the - ``060_harness_openai`` / ``130_harness_openai`` / ``140_harness_openai`` - tutorials). This per-model tracing wrapper predates the harness and is + ``050_openai_agents`` / ``120_openai_agents`` tutorials). This + per-model tracing wrapper predates the harness and is retained only for backwards compatibility; it will be removed in a future release. No runtime warning is emitted. """ From 55f2cace5f0af8d9d1f6dea9de0cb96a26dc534f Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 20:17:31 -0400 Subject: [PATCH 09/12] fix(harness): determinism test now covers all harness fixtures (greptile P1) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The shared test_span_derivation_is_deterministic parametrized over all_fixtures() at COLLECTION time, which froze the set to the 5 generic fixtures registered before test_conformance.py was imported — silently dropping per-harness determinism coverage when the per-harness copies were removed in Batch A2. Make it iterate all_fixtures() at RUN time (after all modules are collected) with a guard asserting per-harness fixtures are present, and add a conformance/conftest.py that eagerly imports every per-harness module so coverage is independent of collection order / run scope. Also fixes the plan doc per greptile: make the openai tutorial mapping explicitly delete-and-replace the old slots, and correct the Batch I/H verification to expect the sync_provider/openai_turn shims (used by the sync-openai template + base sync tutorials) rather than zero references. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../plans/2026-06-22-pr10-harness-cleanup.md | 20 +++++++-------- .../lib/core/harness/conformance/conftest.py | 21 ++++++++++++++++ .../harness/conformance/test_conformance.py | 25 ++++++++++++++++--- 3 files changed, 52 insertions(+), 14 deletions(-) create mode 100644 tests/lib/core/harness/conformance/conftest.py diff --git a/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md b/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md index 2217dcecf..adb995428 100644 --- a/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md +++ b/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md @@ -342,11 +342,11 @@ Verified dual tutorial sets on `next`: Run: `grep -rln "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler\|stream_langgraph_events\|stream_pydantic_ai_events" examples/tutorials/` Expected: the pre-unified dirs (the ones to retire) show up. -- [ ] **Step 3: Replace in place, one framework at a time.** For each: `git rm -r` the pre-unified dir, then `git mv` the unified `harness_*` dir into that numbered slot (or copy-then-delete where content must merge). Mapping: - - `harness_pydantic_ai` → `00_sync/040_pydantic_ai`, `10_async/00_base/110_pydantic_ai`, `10_async/10_temporal/110_pydantic_ai` - - `harness_langgraph` → `00_sync/030_langgraph`, `10_async/00_base/100_langgraph`, `10_async/10_temporal/130_langgraph` - - `060_harness_openai`/`130_harness_openai`/`140_harness_openai` → drop the `harness_` infix into the retired openai slots (`NNN_openai_*`) - - `harness_codex` ×3 → fresh `NNN_codex` numbers consistent with the sequence +- [ ] **Step 3: Replace in place, one framework at a time.** For each framework, `git rm -r` the pre-unified dir AND `git mv` the unified `harness_*` dir into that exact slot — both halves are required, or the old dir lingers and the tier ends up with two tutorials for the same framework. Mapping (left = source dir to move, right = old dir to delete + destination path): + - `harness_pydantic_ai` → replaces `00_sync/040_pydantic_ai`, `10_async/00_base/110_pydantic_ai`, `10_async/10_temporal/110_pydantic_ai` + - `harness_langgraph` → replaces `00_sync/030_langgraph`, `10_async/00_base/100_langgraph`, `10_async/10_temporal/130_langgraph` + - `060_harness_openai`/`130_harness_openai`/`140_harness_openai` → **delete the old `*_openai_agents_local_sandbox` dirs** (`00_sync/050_…`, `10_async/00_base/120_…`, `10_async/10_temporal/120_…`) and move the harness_openai dirs into those slots, renamed `050_openai_agents` / `120_openai_agents` (drops the `harness_` infix AND the 060/130/140 collision with `claude_code`). Do NOT merely rename `060_harness_openai`→`060_openai` — that would leave the old `050_openai_agents_local_sandbox` in place. + - `harness_codex` ×3 → fresh `NNN_codex` numbers consistent with the sequence (`070`/`140`/`150`); no old dir to delete - [ ] **Step 4: Confirm survivors are clean** @@ -471,11 +471,11 @@ Run: `uv run --all-packages python -c "import agentex.lib.adk; print('ok')"` → Run: `grep -rn "providers._modules.openai\|providers/_modules/openai\|sync_provider\|openai_turn\|SyncStreamingProvider\|convert_openai_to_agentex_events" src/ tests/ examples/` - [ ] **Step 2: Move + rename** the harness-surface modules into `_modules/_openai_sync.py` / `_openai_turn.py`. Keep `SyncStreamingProvider`/`SyncStreamingModel` (they are the supported sync path) — relocate them into `_openai_sync.py` (or keep a re-export shim at the old path for one release so the CLI template keeps working until updated). -- [ ] **Step 3: Update the CLI template** `acp.py.j2` import to the new path. Update `adk/__init__.py` and tests. -- [ ] **Step 4: Move the openai tests** `tests/lib/adk/providers/test_openai_turn.py` → `tests/lib/adk/test_openai_turn.py` (and `test_openai_activities.py` per the openai.py decision) so openai tests sit alongside the other four harnesses. +- [ ] **Step 3: Keep a back-compat shim at the old paths — do NOT expect zero references.** Several consumers legitimately import from `providers/_modules/sync_provider.py` and are NOT migrated by this plan: the `sync-openai-agents` CLI template, and the base sync tutorials `examples/tutorials/00_sync/010_multiturn/project/acp.py` (`SyncStreamingProvider`) and `00_sync/020_streaming/project/acp.py` (`SyncStreamingProvider` + `convert_openai_to_agentex_events`). So `sync_provider.py` MUST remain as a shim that keeps `SyncStreamingModel`/`SyncStreamingProvider` and re-exports the relocated `convert_openai_to_agentex_events` from `_modules/_openai_sync.py`; likewise leave a shim at `providers/_modules/openai_turn.py` re-exporting `OpenAITurn`. Update only the internal importers you actually moved (`adk/__init__.py`, the relocated test). +- [ ] **Step 4: (optional) Move the openai turn test** `tests/lib/adk/providers/test_openai_turn.py` → `tests/lib/adk/test_openai_turn.py` for symmetry, or leave it and just repoint its import to the new `_modules/_openai_turn` path (required because it monkeypatches `convert_openai_to_agentex_events` on the turn module's namespace). - [ ] **Step 5: Verify** -Run: `grep -rn "providers._modules.openai_turn\|providers._modules.sync_provider" src/ tests/ examples/` → zero (or only the deliberate one-release shim). +Run: `grep -rn "providers._modules.openai_turn\|providers._modules.sync_provider" src/ tests/ examples/` → the only remaining hits are the kept shims (`providers/_modules/{sync_provider,openai_turn}.py` themselves) and their intended one-release consumers (the `sync-openai-agents` template + the `010_multiturn`/`020_streaming` base sync tutorials). Confirm each resolves via the shim — `python -c "from agentex.lib.adk.providers._modules.sync_provider import SyncStreamingProvider, convert_openai_to_agentex_events"` — NOT that the count is zero. Run: `uv run --all-packages --all-extras pytest tests/lib/adk/ tests/lib/core/harness/ -q` → green. Run: `uv run --all-packages python -c "import agentex.lib.adk; print('ok')"` → `ok`. @@ -512,8 +512,8 @@ The pre-unified planning docs for the now-merged stack are obsolete. - [ ] **Step 1:** Re-read `adk/docs/harness.md` end-to-end against the post-E/F/I tree; confirm every symbol, tap, example, and module path matches reality. - [ ] **Step 2: Re-grep for any stale reference** -Run: `grep -rln "harness_pydantic_ai\|harness_langgraph\|harness_openai\|harness_codex\|create_.*_tracing_handler\|providers._modules.openai_turn\|sync_provider" examples/ docs/ adk/docs/ src/agentex/lib/cli/templates/ README.md` -Expected: zero (or only deliberate one-release shims, noted in the changelog). +Run: `grep -rln "harness_pydantic_ai\|harness_langgraph\|harness_openai\|harness_codex\|create_.*_tracing_handler" examples/ docs/ adk/docs/ src/agentex/lib/cli/templates/ README.md` +Expected: zero for the `harness_*` paths and the deprecated handlers. (This plan doc itself still names the old `harness_*`/openai dirs as the historical retirement record — that is expected.) Then separately confirm the `sync_provider`/`openai_turn` shims still resolve for their intended one-release consumers (the `sync-openai-agents` template + the `010_multiturn`/`020_streaming` base sync tutorials), rather than expecting zero references — `python -c "from agentex.lib.adk.providers._modules.sync_provider import SyncStreamingProvider, convert_openai_to_agentex_events; from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn"`. - [ ] **Step 3: Add the changelog / release note** documenting the breaking removals: `create_langgraph_tracing_handler` / `create_pydantic_ai_tracing_handler` (+ classes), any removed `stream_*_events`/`emit_langgraph_messages` public helper, the openai module relocation (new import path), and the `adk.harness` namespace if adopted. - [ ] **Step 4: Commit** (`docs(harness): final docs consistency pass + changelog for the harness-cleanup removals`). diff --git a/tests/lib/core/harness/conformance/conftest.py b/tests/lib/core/harness/conformance/conftest.py new file mode 100644 index 000000000..e4da7f1e2 --- /dev/null +++ b/tests/lib/core/harness/conformance/conftest.py @@ -0,0 +1,21 @@ +"""Conformance-suite test setup. + +Eagerly import every per-harness conformance module so each one's module-level +``register(...)`` calls run before any test executes. This makes +``all_fixtures()`` complete and independent of pytest's collection/import order +(the runner documents that cross-module registration order is not guaranteed), +so the cross-harness ``test_span_derivation_is_deterministic`` guard in +``test_conformance.py`` covers the full fixture set even when this directory is +run in isolation. +""" + +from __future__ import annotations + +# Importing these for their registration side effects only. +from . import ( + test_codex_conformance, # noqa: F401 + test_openai_conformance, # noqa: F401 + test_langgraph_conformance, # noqa: F401 + test_claude_code_conformance, # noqa: F401 + test_pydantic_ai_conformance, # noqa: F401 +) diff --git a/tests/lib/core/harness/conformance/test_conformance.py b/tests/lib/core/harness/conformance/test_conformance.py index a296a6ae0..7c79f9397 100644 --- a/tests/lib/core/harness/conformance/test_conformance.py +++ b/tests/lib/core/harness/conformance/test_conformance.py @@ -272,11 +272,28 @@ async def test_cross_channel_equivalence(fixture: Fixture) -> None: # --------------------------------------------------------------------------- -@pytest.mark.parametrize("fixture", all_fixtures(), ids=lambda f: f.name) -def test_span_derivation_is_deterministic(fixture: Fixture) -> None: - """Span derivation over the same event list is idempotent. +def test_span_derivation_is_deterministic() -> None: + """Span derivation over the same event list is idempotent, for EVERY + registered fixture across all harnesses. + + ``all_fixtures()`` is read at run time (not at collection/parametrize time) + so it sees fixtures registered by every conformance module, regardless of + import/collection order. The per-harness conformance modules are imported + eagerly via ``conftest.py`` in this directory, so this test covers the full + cross-harness fixture set even when run in isolation. (Parametrizing on + ``all_fixtures()`` at import time would freeze the set to whatever happened + to be registered before this module was collected.) Retained as a lightweight regression guard. The primary cross-channel guarantee is asserted in test_cross_channel_equivalence above. """ - assert derive_all(fixture.events) == derive_all(fixture.events) + fixtures = all_fixtures() + assert len(fixtures) > len(_FIXTURES), ( + "expected per-harness fixtures to be registered in addition to the " + f"{len(_FIXTURES)} generic ones; got {len(fixtures)} total — a conformance " + "module's fixtures are not being registered (check conftest imports)" + ) + for fixture in fixtures: + assert derive_all(fixture.events) == derive_all(fixture.events), ( + f"[{fixture.name}] span derivation is not deterministic" + ) From 7ebbaeb4599a1bb43b47b128f4c461e994f02e3d Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 20:19:29 -0400 Subject: [PATCH 10/12] docs(harness-cleanup): remove the PR 10 implementation plan now that the cleanup has landed Co-Authored-By: Claude Opus 4.8 (1M context) --- .../plans/2026-06-22-pr10-harness-cleanup.md | 566 ------------------ 1 file changed, 566 deletions(-) delete mode 100644 docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md diff --git a/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md b/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md deleted file mode 100644 index adb995428..000000000 --- a/docs/superpowers/plans/2026-06-22-pr10-harness-cleanup.md +++ /dev/null @@ -1,566 +0,0 @@ -# PR 10 — Post-Merge Harness Cleanup Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Remove the transitional artifacts left behind by the additive harness-surface stack (deprecated tracing handlers, resolved-workaround comments, duplicated test scaffolding, divergent per-harness structures), consolidate the harness source/test/tutorial filesystem onto one convention, retire the duplicate pre-unified tutorials, and bring `adk/docs/harness.md` in line with the final merged surface. - -**Architecture:** The harness-surface stack (#412 foundation, #414 conformance, #415/#416/#417/#420/#421 migrations, #423 facade+docs) was built additively so nothing regressed and each PR stayed reviewable. PR 10 is the single, deliberate cleanup that runs once the whole stack is merged and the deprecation/migration preconditions hold. Work is ordered so non-breaking refactors land first and the breaking removals (deprecated public symbols + the tutorials that import them) land last, behind the version-bump gate. - -**Tech Stack:** Python 3.12/3.13, `rye`/`uv`, `pytest`, `ruff`, `pyright`/`mypy`, Temporal, pydantic. Tutorials use per-project `uv` envs. - -> **Branch / base:** This plan lives on `declan-scale/pr10-harness-cleanup`, **stacked on top of `declan-scale/pr9-harness-cleanup` (PR 9, #423)**, which is itself rebased onto the latest `next`. The migration stack (#412/#414–#421) is **already merged** to `next`; PR 9 (the public adk facade + `adk/docs/harness.md`) is the base of this branch. Because PR 9 is the base, the facade and `harness.md` are already present here, so the facade-reconciliation (C1/C2) and `harness.md` (C3) tasks are directly actionable. When PR 9 merges into `next`, rebase this branch onto `next` (the PR 9 commits drop out as already-merged). - -> **Altitude note (read before executing):** This plan pins **exact file paths, the concrete transformation, and exact verification commands** — all verified against the merged `next` tree as of 2026-06-22. It deliberately does **not** hardcode line numbers (they drift as batches land). Where a step says "resolve at execution," run the named grep against the current tree first, then apply the described change. - ---- - -## Preconditions (do not start the BREAKING batches until ALL hold) - -1. **#423 (PR 9) is the base of this branch** — the facade + `harness.md` are present here, and the whole migration stack (#412/#414–#421) is already merged to `next`. When PR 9 merges to `next`, rebase this branch onto `next` before the breaking batches land. -2. **Deprecation window observed** (or a minor/major version boundary) for the publicly-deprecated symbols below — they were only docstring-deprecated, never runtime-warned, so external code may still import them. -3. **Golden agent migrated** off the bespoke paths (per the adoption plan, #422 → implementation in `agentex-agents`): it no longer constructs the deprecated tracing handlers or any pre-unified converter path. Grep the golden agent + any other internal consumers first. -4. **No external consumers** depend on the removed symbols (check downstream usage; add a changelog/release note for the removal). - -**Optional split:** Batches A–D, G, and the stale-doc removals are **non-breaking** (tests, internal helpers, docs, integration coverage) — they only need precondition 1. If the breaking removals (Batches E, F, I) are blocked on the version-bump policy, land the non-breaking batches as an earlier cleanup PR and keep E/F/I for PR 10. - ---- - -## Execution order - -| Batch | Items | Breaking? | Gated on | -|---|---|---|---| -| A — Test scaffolding consolidation | 6, 7 | No | Precond. 1 | -| B — Internal helper / sync-path consolidation | 5, 8, 9 | No | Precond. 1 | -| C — Facade reconciliation + harness.md doc update | 10, 3 | No (additive namespace) | Precond. 1 (PR 9 merged) | -| D — Conformance vestigial cleanup | 4 | No | Precond. 1 | -| G — Integration-test parity | 12 | No | Precond. 1 | -| E — Tutorial standardization + retirement | 11, 13 | Yes (deletes dirs) | Precond. 1–4 (rides with F) | -| F — Deprecated tracing-handler + workaround removal | 1, 2 | Yes (public symbols) | Precond. 1–4 | -| I — Filesystem layout + naming consolidation | NEW | Yes (file moves + import changes) | After E/F (and Precond. 1–4) | -| H — Final docs + changelog + stale-doc removal | — | — | After E/F/I land | - -Run A → B → D → G first (green, non-breaking). C lands once PR 9 is merged. Then E + F together (the old tutorials import the symbols F removes — they MUST land in the same commit range), then I (the final structural sweep, after `_tracing.py` is already gone), then H. - ---- - -## Batch A — Consolidate duplicated test scaffolding (items 6, 7) - -### Task A1: Extract the shared harness test fakes - -Verified copies on `next`: `_FakeTracing` is defined in 7 places and `_FakeSpan` in 6; `_run_yield_turn` in 2. There are also near-variants under `tests/lib/adk/` (`_FakeTracingBackend`). - -**Files:** -- Create: `tests/lib/core/harness/_fakes.py` -- Modify (delete local copy, import from `_fakes`): `tests/lib/core/harness/test_tracer.py`, `tests/lib/core/harness/test_emitter.py`, `tests/lib/core/harness/conformance/runner.py`, `tests/lib/core/harness/test_harness_pydantic_ai_sync.py`, `..._async.py`, `tests/lib/core/harness/test_harness_langgraph_sync.py`, `..._async.py`, `tests/lib/adk/test_pydantic_ai_sync_unified.py`, `tests/lib/adk/test_langgraph_sync_unified.py` - -- [ ] **Step 1: Grep the tree for every definition site** - -Run: `grep -rn "class _FakeTracing\|class _FakeSpan\|class _FakeTracingBackend\|def _run_yield_turn" tests/` -Confirm the full set before changing anything. Note `_FakeTracingBackend` (in `test_langgraph_sync_unified.py`) — decide if it is the same shape (fold it) or genuinely different (leave it, document why). - -- [ ] **Step 2: Create `_fakes.py` from the canonical copy** - -Lift the definitions from `tests/lib/core/harness/test_tracer.py` (the foundation copy) verbatim into `tests/lib/core/harness/_fakes.py`, exported as public names `FakeSpan`, `FakeTracing`, `run_yield_turn` (drop the leading underscore now that they are shared). This is a move, not a rewrite. - -- [ ] **Step 3: Replace each local copy with an import** - -In each file from the Files list, delete the local class/func block and add `from tests.lib.core.harness._fakes import FakeSpan, FakeTracing, run_yield_turn` (import only what that file uses). Update references (`_FakeTracing` → `FakeTracing`, etc.). The `tests/lib/adk/*_sync_unified.py` files import across packages — confirm the import path resolves under the test rootdir. - -- [ ] **Step 4: Verify no copies remain** - -Run: `grep -rn "class _FakeTracing\|class _FakeSpan\|def _run_yield_turn" tests/` -Expected: zero matches (only `_fakes.py`'s `class FakeTracing`/`class FakeSpan`/`def run_yield_turn`, which this grep does not match). - -- [ ] **Step 5: Run the harness + adk test suites** - -Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` -Expected: same pass count as pre-change, zero failures. - -- [ ] **Step 6: Lint + commit** - -Run: `uv run ruff check tests/` -```bash -git add tests/ -git commit -m "test(harness): extract shared FakeSpan/FakeTracing/run_yield_turn fakes" -``` - -### Task A2: Parametrize the conformance determinism test once - -Verified on `next`: `def test_span_derivation_is_deterministic` exists in `conformance/test_conformance.py` (shared), `test_langgraph_conformance.py`, and `test_pydantic_ai_conformance.py`. **Additionally**, `test_codex_conformance.py` carries the same determinism assertion (`assert derive_all(x) == derive_all(x)`) under its own test — so grep for the assertion, not just the function name. - -**Files:** -- Modify: `tests/lib/core/harness/conformance/test_conformance.py` (keep the single parametrized test) -- Modify: each `tests/lib/core/harness/conformance/test__conformance.py` (delete its determinism copy, keep fixture registration + cross-channel assertions) - -- [ ] **Step 1: Grep for every determinism copy** - -Run: `grep -rn "test_span_derivation_is_deterministic\|derive_all(.*) == derive_all" tests/lib/core/harness/conformance/` -Expected: the shared copy in `test_conformance.py` plus per-harness copies (currently langgraph, pydantic-ai, codex; check openai/claude too). - -- [ ] **Step 2: Make the shared copy parametrized over all fixtures** - -In `test_conformance.py`, ensure `test_span_derivation_is_deterministic` is parametrized by `all_fixtures()` (the registry the conformance runner exposes via `register`) so one test re-derives `derive_all(...)` over every registered fixture and asserts identical output across repeated derivation. It must reference no harness-specific symbol. - -- [ ] **Step 3: Delete the per-harness copies** - -Remove the determinism test/assertion from every `test__conformance.py`, leaving those modules with only fixture registration + cross-channel assertions. Keep `derive_all` itself in `runner.py` — it is the shared primitive the parametrized test uses (NOT vestigial; see Batch D). - -- [ ] **Step 4: Verify exactly one definition remains** - -Run: `grep -rn "def test_span_derivation_is_deterministic" tests/lib/core/harness/conformance/` -Expected: exactly one match, in `test_conformance.py`. - -- [ ] **Step 5: Run conformance tests + commit** - -Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/conformance/ -q` -```bash -git add tests/lib/core/harness/conformance/ -git commit -m "test(harness): parametrize the conformance determinism test once over all_fixtures()" -``` - ---- - -## Batch B — Consolidate internal helpers + sync paths (items 5, 8, 9) - -### Task B1: Remove leftover hand-rolled streaming branches (item 5) - -**Files (resolve exact branches at execution):** `src/agentex/lib/adk/_modules/_pydantic_ai_async.py`, `_langgraph_async.py`, and any openai/claude/codex async helper. - -- [ ] **Step 1: Confirm async helpers delegate to the emitter** - -Run: `grep -rn "auto_send_turn\|streaming_task_message_context\|adk.streaming" src/agentex/lib/adk/_modules/_*_async.py src/agentex/lib/adk/providers/_modules/` -Expected: `stream_*_events` / `run_agent_streamed_auto_send` call `UnifiedEmitter.auto_send_turn`. Flag any remaining hand-rolled `adk.streaming` loop as dead. - -- [ ] **Step 2: Delete the dead branches** the emitter delegation made unreachable. Do not touch a live delivery route. - -- [ ] **Step 3: Verify + commit** - -Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` -```bash -git add src/agentex/lib/adk/ -git commit -m "refactor(harness): drop dead hand-rolled streaming branches now covered by auto_send_turn" -``` - -### Task B2: Extract a shared usage-normalization primitive (item 8) - -The five `HarnessTurn` impls (`_pydantic_ai_turn.py`, `_langgraph_turn.py`, `providers/_modules/openai_turn.py`, `_claude_code_turn.py`, `_codex_turn.py`) repeat the same shape: wrap a tap's event stream + normalize provider usage into `TurnUsage`. - -**Files:** -- Create: `src/agentex/lib/core/harness/usage.py` (`normalize_usage(...)`) — or a `HarnessTurnBase` mixin in `core/harness/types.py` -- Create: `tests/lib/core/harness/test_usage.py` -- Modify: the five turn modules - -- [ ] **Step 1: Diff the five turn impls** for the common shape. - -Run: `wc -l src/agentex/lib/adk/_modules/_pydantic_ai_turn.py src/agentex/lib/adk/_modules/_langgraph_turn.py src/agentex/lib/adk/providers/_modules/openai_turn.py src/agentex/lib/adk/_modules/_claude_code_turn.py src/agentex/lib/adk/_modules/_codex_turn.py` -Note the existing `claude_code_usage_to_turn_usage` / `codex_usage_to_turn_usage` helpers — these are exactly the per-harness normalizers to converge. - -- [ ] **Step 2: Write the shared primitive (TDD).** Add `test_usage.py` asserting `normalize_usage` maps representative provider usage into the correct `TurnUsage` fields (aligning with `agentex.lib.core.observability.llm_metrics`). Implement `usage.py` to pass. - -Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/test_usage.py -q` → PASS. - -- [ ] **Step 3: Route each turn module through the primitive,** leaving only provider-specific mapping. Do NOT force-fit a harness whose usage genuinely diverges (check codex — it is the largest for a reason; document if you skip it). - -- [ ] **Step 4: Verify + commit** - -Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` -```bash -git add src/agentex/lib/core/harness/usage.py tests/lib/core/harness/test_usage.py src/agentex/lib/adk/ -git commit -m "refactor(harness): extract shared TurnUsage normalization primitive" -``` - -### Task B3: Converge the sync-path structures (item 9 — overlaps Batch I) - -"Sync delivery" was built three ways: openai patches `providers/_modules/sync_provider.py` (+ `openai_turn.py`); pydantic-ai/langgraph use `_*_sync.py`; claude/codex use `_claude_code_sync.py`/`_codex_sync.py`. - -- [ ] **Step 1: Adopt the per-harness `__sync.py` convention** (the majority pattern, and the target end-state in Batch I). Document the choice in the commit body. -- [ ] **Step 2: Align openai to it** — this is the structural half of Batch I's openai relocation; do them together (Task I2). -- [ ] **Step 3: Verify + commit** - -Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` -```bash -git add src/agentex/lib/adk/ -git commit -m "refactor(harness): converge the five sync paths on the __sync.py convention" -``` - ---- - -## Batch C — Reconcile the facade + update harness.md (items 10, 3) — needs PR 9 merged - -### Task C1: Fold the claude/codex ad-hoc exports into the #423 facade (item 10) - -Verified on `next` (pre-PR-9): `adk/__init__.py` already exports ad-hoc per-harness symbols — `convert_claude_code_to_agentex_events`, `ClaudeCodeTurn`, `claude_code_usage_to_turn_usage`, `convert_codex_to_agentex_events`, `CodexTurn`, `codex_usage_to_turn_usage` (plus the existing pydantic/langgraph taps + the deprecated `create_*_tracing_handler`). PR 9 adds the unified facade block (`UnifiedEmitter`, `SpanTracer`, `HarnessTurn`, `OpenSpan`, `CloseSpan`, `SpanSignal`, `StreamTaskMessage`, `TurnUsage`, `TurnResult`). - -**Files:** `src/agentex/lib/adk/__init__.py` - -- [ ] **Step 1: After rebasing onto PR-9'd `next`,** grep the facade region: - -Run: `grep -n "harness\|UnifiedEmitter\|convert_.*_to_agentex_events\|Turn\b\|usage_to_turn_usage" src/agentex/lib/adk/__init__.py` - -- [ ] **Step 2: Deduplicate.** Ensure every public harness symbol is imported once and listed once in `__all__`, organized under the unified facade block from #423. Remove duplicate import lines / `__all__` entries. Preserve the `# ruff: noqa: I001` ordering comment and the circular-import-safe ordering. - -- [ ] **Step 3: Verify the surface imports cleanly** - -Run: `uv run --all-packages python -c "import agentex.lib.adk as adk; assert len(adk.__all__) == len(set(adk.__all__)), 'dupes'; print('ok')"` -Expected: `ok`. - -- [ ] **Step 4: Lint + commit** - -Run: `uv run ruff check src/agentex/lib/adk/__init__.py && uv run pyright src/agentex/lib/adk/__init__.py` -```bash -git add src/agentex/lib/adk/__init__.py -git commit -m "refactor(adk): fold claude/codex exports into the single #423 harness facade" -``` - -### Task C2: (Decision-gated) Introduce the `adk.harness` namespace (item 3) - -> Team decision required; polish, not required. If declined, skip and record it in the PR body; `harness.md` keeps the flat `agentex.lib.adk` paths. - -**Files (if adopted):** Create `src/agentex/lib/adk/harness.py` (re-export the surface + taps); modify `adk/__init__.py` to keep flat re-exports for one release (back-compat). - -- [ ] **Step 1:** Create the namespace re-exporting `UnifiedEmitter`, `SpanTracer`, `HarnessTurn`, `OpenSpan`, `CloseSpan`, `SpanSignal`, `StreamTaskMessage`, `TurnUsage`, `TurnResult`, and each `convert__to_agentex_events` tap. -- [ ] **Step 2:** Keep flat `adk.*` re-exports with a comment they're retained for one release, slated to drop in a later major. -- [ ] **Step 3: Verify both paths** - -Run: `uv run --all-packages python -c "from agentex.lib.adk.harness import UnifiedEmitter; from agentex.lib.adk import UnifiedEmitter; print('ok')"` -- [ ] **Step 4: Commit** (`refactor(adk): add adk.harness namespace, keep flat re-exports for back-compat`). - -### Task C3: Update `adk/docs/harness.md` to the final merged surface (MANDATORY) - -> Explicitly requested: keep `harness.md` up to date and update the docs in PR 10. - -**Files:** `adk/docs/harness.md` (arrives via PR 9) - -- [ ] **Step 1: Complete the taps table.** Replace "Taps for claude-code and codex will be added in subsequent PRs (AGX1-420, AGX1-421)" with the merged reality — list all five shipped harnesses (pydantic-ai, LangGraph, OpenAI Agents, claude-code, codex), each with its `convert__to_agentex_events` tap, all exported from `agentex.lib.adk`. Remove the "will be added" sentence. - -- [ ] **Step 2: Fix the sync ACP example.** The current "Sync ACP (pydantic-ai tap)" example builds a `UnifiedEmitter` then yields the tap directly, leaving the emitter unused (Greptile flagged this on #423) under a "pre-unified sync path" caveat. Replace with the canonical post-migration flow: - -```python -import agentex.lib.adk as adk -from agentex.lib.adk import UnifiedEmitter, PydanticAITurn # Turn wrapper implements HarnessTurn - -@acp.on_message_send -async def handle(params): - task_id = params.task.id - async with adk.tracing.span(trace_id=task_id, name="message", ...) as turn_span: - emitter = UnifiedEmitter( - task_id=task_id, - trace_id=task_id, - parent_span_id=turn_span.id if turn_span else None, - ) - turn = PydanticAITurn(pydantic_stream) - async for event in emitter.yield_turn(turn): - yield event -``` - -Delete the "For the pre-unified sync path the tap is still yielded directly..." paragraph. - -- [ ] **Step 3: Reconcile import paths with the C2 decision.** If `adk.harness` adopted, show it as primary in the import block + examples (note flat path retained one release). Else leave flat paths. - -- [ ] **Step 4: Reflect the Batch I module layout** if I lands before H — any path references in the doc (e.g. "implementation lives at `src/agentex/lib/core/harness/`") stay correct, but if examples name `_modules` paths, update to the consolidated `__sync.py`/`__turn.py` names. - -- [ ] **Step 5: Guard against dangling references** - -Run: `grep -n "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler\|AgentexLangGraphTracingHandler\|AgentexPydanticAITracingHandler" adk/docs/harness.md` -Expected: zero (so Batch F's removals leave no dangling doc reference). - -- [ ] **Step 6: Commit** - -```bash -git add adk/docs/harness.md -git commit -m "docs(harness): update harness.md to the final merged surface (all taps, canonical yield_turn example)" -``` - ---- - -## Batch D — Remove vestigial conformance paths (item 4) - -Note: `derive_all` (in `conformance/runner.py`) is **actively used** by the determinism tests — keep it. Look only for genuinely unreferenced simple/determinism-only runner code. - -- [ ] **Step 1: Find unused runner paths** - -Run: `grep -rn "derive_all\|simple_runner\|determinism_only\|run_cross_channel" tests/lib/core/harness/ src/agentex/lib/core/harness/` -For each hit, confirm whether anything still imports it after the cross-channel runner (#414) became the single entry point. - -- [ ] **Step 2: Remove dead paths** nothing imports. Keep `derive_all` and `run_cross_channel_conformance` (live). - -- [ ] **Step 3: Verify + commit** - -Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/conformance/ -q` -```bash -git add tests/lib/core/harness/ -git commit -m "test(harness): remove vestigial simple-conformance-runner paths" -``` - ---- - -## Batch G — Integration-test coverage parity (item 12) - -Verified: only pydantic-ai + langgraph ship `test_harness_*_{sync,async,temporal}` suites. openai/claude/codex ship only conformance + turn/sync tests. - -**Files:** -- Create (if adding parity): `tests/lib/core/harness/test_harness_openai_{sync,async,temporal}.py`, `..._claude_code_{...}.py`, `..._codex_{...}.py` -- Modify: the harness live-matrix workflow (collapse the two near-identical matrix jobs into one) - -- [ ] **Step 1: Decide parity vs documented difference** (with the team). Either add the missing suites mirroring the pydantic-ai shape (importing the Batch A `_fakes`), or document the intentional gap in `harness.md` / a test README. -- [ ] **Step 2 (if adding): write them against the shared fakes.** - -Run: `uv run --all-packages --all-extras pytest tests/lib/core/harness/ -q` → green. -- [ ] **Step 3: Collapse the two matrix jobs into one** parametrized matrix (enabled now that fakes are shared). - -Run: `grep -rn "matrix\|harness" .github/workflows/*.yml` -- [ ] **Step 4: Commit** - -```bash -git add tests/lib/core/harness/ .github/workflows/ -git commit -m "test(harness): add integration-suite parity and collapse the live matrix to one job" -``` - ---- - -## Batch E — Tutorial standardization + retirement (items 11, 13) — BREAKING - -> Gated with Batch F: the surviving tutorials must not import the symbols F removes; the old tutorials deleted here are the ones that DO import them. Execute E and F in the same commit range. - -Verified dual tutorial sets on `next`: - -| Framework | Pre-unified (RETIRE) | Unified-surface (KEEP, rename into slot) | -|---|---|---| -| langgraph | `00_sync/030_langgraph`, `10_async/00_base/100_langgraph`, `10_async/10_temporal/130_langgraph` | `harness_langgraph` ×3 | -| pydantic-ai | `00_sync/040_pydantic_ai`, `10_async/00_base/110_pydantic_ai`, `10_async/10_temporal/110_pydantic_ai` | `harness_pydantic_ai` ×3 | -| openai | `00_sync/050_openai_agents_local_sandbox`, `10_async/00_base/120_openai_agents_local_sandbox`, `10_async/10_temporal/120_openai_agents_local_sandbox` | `060_harness_openai`, `130_harness_openai`, `140_harness_openai` | -| claude-code | — (none; already numbered) | `060/130/140_claude_code` — KEEP, no rename | -| codex | — (net-new) | `harness_codex` ×3 → fresh `NNN_codex` numbers | - -`090_claude_agents_sdk_mvp` is the Agents SDK (not the claude-code harness) — KEEP untouched. - -### Task E1: Replace-in-place onto the numbered `NNN_` paradigm - -- [ ] **Step 1: Inventory** — `find examples/tutorials -name manifest.yaml | sort`. Confirm both sets above exist. -- [ ] **Step 2: Confirm the old dirs use the deprecated path** - -Run: `grep -rln "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler\|stream_langgraph_events\|stream_pydantic_ai_events" examples/tutorials/` -Expected: the pre-unified dirs (the ones to retire) show up. - -- [ ] **Step 3: Replace in place, one framework at a time.** For each framework, `git rm -r` the pre-unified dir AND `git mv` the unified `harness_*` dir into that exact slot — both halves are required, or the old dir lingers and the tier ends up with two tutorials for the same framework. Mapping (left = source dir to move, right = old dir to delete + destination path): - - `harness_pydantic_ai` → replaces `00_sync/040_pydantic_ai`, `10_async/00_base/110_pydantic_ai`, `10_async/10_temporal/110_pydantic_ai` - - `harness_langgraph` → replaces `00_sync/030_langgraph`, `10_async/00_base/100_langgraph`, `10_async/10_temporal/130_langgraph` - - `060_harness_openai`/`130_harness_openai`/`140_harness_openai` → **delete the old `*_openai_agents_local_sandbox` dirs** (`00_sync/050_…`, `10_async/00_base/120_…`, `10_async/10_temporal/120_…`) and move the harness_openai dirs into those slots, renamed `050_openai_agents` / `120_openai_agents` (drops the `harness_` infix AND the 060/130/140 collision with `claude_code`). Do NOT merely rename `060_harness_openai`→`060_openai` — that would leave the old `050_openai_agents_local_sandbox` in place. - - `harness_codex` ×3 → fresh `NNN_codex` numbers consistent with the sequence (`070`/`140`/`150`); no old dir to delete - -- [ ] **Step 4: Confirm survivors are clean** - -Run: `grep -rln "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler" examples/tutorials/` -Expected: zero matches. - -- [ ] **Step 5: Standardize per-tutorial scaffolding (item 11).** Add the shared `.dockerignore` to the langgraph + codex tutorials (byte-identical to the pydantic-ai/openai/claude copy). Decide `conftest.py` (present only in codex): promote to shared test setup or remove — apply uniformly. - -- [ ] **Step 6: Fix index/README cross-links** - -Run: `grep -rln "harness_pydantic_ai\|harness_langgraph\|harness_openai\|harness_codex" examples/ docs/ README.md` -Update every reference to the new numbered path. Expected after: zero stale references. - -- [ ] **Step 7: Confirm glob discovery unaffected** - -Run: `grep -n "harness_\|030_langgraph\|040_pydantic_ai\|050_openai" .github/workflows/agentex-tutorials-test.yml` -Expected: no hardcoded references to renamed/removed dirs (discovery is by `manifest.yaml` glob). - -- [ ] **Step 8: Commit** combined with Batch F (Task F3). - ---- - -## Batch F — Remove deprecated tracing handlers + workaround markers (items 1, 2) — BREAKING - -### Task F1: Delete the deprecated bespoke tracing handlers (item 1) - -**Files:** -- Delete: `src/agentex/lib/adk/_modules/_pydantic_ai_tracing.py` (`create_pydantic_ai_tracing_handler`, `AgentexPydanticAITracingHandler`) -- Delete: `src/agentex/lib/adk/_modules/_langgraph_tracing.py` (`create_langgraph_tracing_handler`, `AgentexLangGraphTracingHandler`) -- Modify: `src/agentex/lib/adk/__init__.py` (remove the two imports + two `__all__` entries) -- Delete: tests that exist only to exercise the deprecated path - -> **⚠ openai shim is NOT in this task.** `SyncStreamingModel`/`SyncStreamingProvider` in `providers/_modules/sync_provider.py` are **load-bearing** — referenced by the live CLI template `src/agentex/lib/cli/templates/sync-openai-agents/project/acp.py.j2`. They are the supported sync-openai delivery path, not a deprecated tracing shim. Do NOT delete them here. Their relocation/renaming is handled in Batch I (Task I2) and only after the template is updated. - -- [ ] **Step 1: Prove zero live references** - -Run: `grep -rn "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler\|AgentexLangGraphTracingHandler\|AgentexPydanticAITracingHandler" src/ tests/ examples/` -Expected after Batch E: matches only in the modules being deleted, their dedicated tests, and `adk/__init__.py`. If anything else matches (esp. the golden agent), STOP — precondition 3 unmet. - -- [ ] **Step 2: Delete the modules + exports.** `git rm` both `_*_tracing.py`. In `adk/__init__.py` remove the two `from ..._*_tracing import create_*_tracing_handler` lines and the two `"create_*_tracing_handler"` `__all__` entries. Delete the dedicated deprecated-path tests. Keep any genuinely-shared helper they used if still referenced (grep first). - -- [ ] **Step 3: Verify** - -Run: `grep -rn "create_langgraph_tracing_handler\|create_pydantic_ai_tracing_handler" src/ tests/ examples/` → zero. -Run: `uv run --all-packages python -c "import agentex.lib.adk as adk; print('ok')"` → `ok`. - -### Task F2: Remove resolved-workaround markers + stale docstrings (item 2) - -Verified on `next`: many `AGX1-377`/`AGX1-378` references exist across `_langgraph_async.py`, `_langgraph_sync.py`, `_langgraph_turn.py`, `_pydantic_ai_turn.py`, `core/harness/auto_send.py`, `core/services/adk/providers/openai.py`, the conformance runner, and many test docstrings. **Most describe the LANDED fix / current contract** (e.g. "AGX1-377 fix: auto_send now delivers streamed tool-request messages", "AGX1-378 restored: created_at is now threaded through", "LangGraph emits tool requests as Full events"). - -**Files (resolve at execution):** `src/agentex/lib/core/harness/auto_send.py`, the per-harness turn/async/sync modules, `src/agentex/lib/core/services/adk/providers/openai.py`, the conformance runner, and test docstrings. - -- [ ] **Step 1: Find the breadcrumbs** - -Run: `grep -rn "AGX1-377\|AGX1-378\|workaround\|coalescing\|created_at limitation" src/ tests/` - -- [ ] **Step 2: Trim the historical framing, keep the current contract.** For each hit: if it documents *why the code currently behaves this way* (e.g. LangGraph Full-event tool requests, `created_at` threading) keep the explanation but strip the now-meaningless ticket-number / "workaround"/"note:" framing. Delete only comments describing removed transitional state. **No code-behavior change in this task** — comments/docstrings only. - -- [ ] **Step 3: Verify** - -Run: `grep -rn "AGX1-377\|AGX1-378" src/ tests/` -Expected: zero (or only deliberately-kept current-contract notes, justified in the commit body). - -### Task F3: Verify the breaking batch + commit E+F together - -- [ ] **Step 1:** `uv run --all-packages --all-extras pytest tests/lib/core/harness/ tests/lib/adk/ -q` → green. -- [ ] **Step 2:** `uv run ruff check src/agentex/lib/adk/ src/agentex/lib/core/harness/ && uv run pyright src/agentex/lib/adk/__init__.py` → clean. -- [ ] **Step 3: Commit the breaking set** - -```bash -git add -A -git commit -m "refactor(harness)!: remove deprecated tracing handlers, retire pre-unified tutorials, drop resolved-workaround markers - -BREAKING CHANGE: removes the docstring-deprecated create_langgraph_tracing_handler / -create_pydantic_ai_tracing_handler and their handler classes from the public adk surface. -Use UnifiedEmitter + the convert__to_agentex_events taps instead." -``` - ---- - -## Batch I — Filesystem layout + naming consolidation (NEW) — BREAKING - -The harness modules landed in different spots with different names. Target end-state (per the directive: **every provider has just a `sync.py` and a `turn.py`, all under `adk/_modules/`, openai pulled out of `providers/_modules/`**): - -| Harness | Final source files (all under `src/agentex/lib/adk/_modules/`) | -|---|---| -| pydantic-ai | `_pydantic_ai_sync.py`, `_pydantic_ai_turn.py` | -| langgraph | `_langgraph_sync.py`, `_langgraph_turn.py` | -| claude-code | `_claude_code_sync.py`, `_claude_code_turn.py` (already correct) | -| codex | `_codex_sync.py`, `_codex_turn.py` (already correct) | -| openai | `_openai_sync.py`, `_openai_turn.py` (MOVED from `providers/_modules/`) | - -Removed/folded by this batch (or already by F): `_pydantic_ai_async.py`, `_langgraph_async.py`, `_langgraph_messages.py`, `_pydantic_ai_tracing.py` (F), `_langgraph_tracing.py` (F), and the `providers/_modules/openai_turn.py` + `sync_provider.py` (relocated/renamed). - -### Task I1: Collapse pydantic-ai / langgraph to `sync.py` + `turn.py` - -**Files:** -- Modify/remove: `src/agentex/lib/adk/_modules/_pydantic_ai_async.py`, `_langgraph_async.py`, `_langgraph_messages.py` -- Modify: `_pydantic_ai_sync.py`, `_pydantic_ai_turn.py`, `_langgraph_sync.py`, `_langgraph_turn.py`, `adk/__init__.py` - -> **Caveat — the async helpers are public.** `stream_pydantic_ai_events`, `stream_langgraph_events`, `run_agent_streamed_auto_send`, and `emit_langgraph_messages` are exported from `adk/__init__.py` and may be imported by consumers/tutorials. After Batch E migrates the tutorials, confirm no consumer needs them: - -Run: `grep -rn "stream_pydantic_ai_events\|stream_langgraph_events\|run_agent_streamed_auto_send\|emit_langgraph_messages" src/ tests/ examples/` - -- [ ] **Step 1:** If a helper is still wanted, fold it into `__sync.py` or `__turn.py` and keep a thin re-export from `adk/__init__.py` for one release; otherwise remove it (changelog the public-symbol removal — adds to Batch H). Decide per-symbol based on the grep. -- [ ] **Step 2:** `git rm` `_*_async.py` / `_langgraph_messages.py` once their content is folded and references updated. Update `adk/__init__.py` imports + `__all__`. -- [ ] **Step 3: Verify** - -Run: `uv run --all-packages --all-extras pytest tests/lib/adk/ tests/lib/core/harness/ -q` → green. -Run: `uv run --all-packages python -c "import agentex.lib.adk; print('ok')"` → `ok`. - -### Task I2: Move openai out of `providers/_modules/` into `_modules/` - -**Files:** -- `git mv src/agentex/lib/adk/providers/_modules/openai_turn.py src/agentex/lib/adk/_modules/_openai_turn.py` -- Create `src/agentex/lib/adk/_modules/_openai_sync.py` from the sync-delivery pieces of `providers/_modules/sync_provider.py` (and the harness-tap `convert_openai_to_agentex_events`), aligning naming with the other four. -- Decide placement of `providers/_modules/openai.py` (the ~745-line Temporal **activities** provider): if it is a provider-activity module rather than a harness tap, it may stay under `providers/`; the directive is about the harness surface. Confirm with the grep below before moving it. -- Update importers: `adk/__init__.py`, `src/agentex/lib/cli/templates/sync-openai-agents/project/acp.py.j2` (imports `SyncStreamingProvider, convert_openai_to_agentex_events` from `agentex.lib.adk.providers._modules.sync_provider`), and any test. - -- [ ] **Step 1: Inventory every openai import path** - -Run: `grep -rn "providers._modules.openai\|providers/_modules/openai\|sync_provider\|openai_turn\|SyncStreamingProvider\|convert_openai_to_agentex_events" src/ tests/ examples/` - -- [ ] **Step 2: Move + rename** the harness-surface modules into `_modules/_openai_sync.py` / `_openai_turn.py`. Keep `SyncStreamingProvider`/`SyncStreamingModel` (they are the supported sync path) — relocate them into `_openai_sync.py` (or keep a re-export shim at the old path for one release so the CLI template keeps working until updated). -- [ ] **Step 3: Keep a back-compat shim at the old paths — do NOT expect zero references.** Several consumers legitimately import from `providers/_modules/sync_provider.py` and are NOT migrated by this plan: the `sync-openai-agents` CLI template, and the base sync tutorials `examples/tutorials/00_sync/010_multiturn/project/acp.py` (`SyncStreamingProvider`) and `00_sync/020_streaming/project/acp.py` (`SyncStreamingProvider` + `convert_openai_to_agentex_events`). So `sync_provider.py` MUST remain as a shim that keeps `SyncStreamingModel`/`SyncStreamingProvider` and re-exports the relocated `convert_openai_to_agentex_events` from `_modules/_openai_sync.py`; likewise leave a shim at `providers/_modules/openai_turn.py` re-exporting `OpenAITurn`. Update only the internal importers you actually moved (`adk/__init__.py`, the relocated test). -- [ ] **Step 4: (optional) Move the openai turn test** `tests/lib/adk/providers/test_openai_turn.py` → `tests/lib/adk/test_openai_turn.py` for symmetry, or leave it and just repoint its import to the new `_modules/_openai_turn` path (required because it monkeypatches `convert_openai_to_agentex_events` on the turn module's namespace). -- [ ] **Step 5: Verify** - -Run: `grep -rn "providers._modules.openai_turn\|providers._modules.sync_provider" src/ tests/ examples/` → the only remaining hits are the kept shims (`providers/_modules/{sync_provider,openai_turn}.py` themselves) and their intended one-release consumers (the `sync-openai-agents` template + the `010_multiturn`/`020_streaming` base sync tutorials). Confirm each resolves via the shim — `python -c "from agentex.lib.adk.providers._modules.sync_provider import SyncStreamingProvider, convert_openai_to_agentex_events"` — NOT that the count is zero. -Run: `uv run --all-packages --all-extras pytest tests/lib/adk/ tests/lib/core/harness/ -q` → green. -Run: `uv run --all-packages python -c "import agentex.lib.adk; print('ok')"` → `ok`. - -### Task I3: Normalize test naming (`_sync.py` vs `_sync_unified.py`) - -Verified duplicate-ish test files: `tests/lib/adk/test_langgraph_sync.py` + `test_langgraph_sync_unified.py`, and `test_pydantic_ai_sync.py` + `test_pydantic_ai_sync_unified.py`. - -- [ ] **Step 1: Diff each pair** to see whether `_unified` is the post-migration replacement of the pre-unified `_sync` test or genuinely separate coverage. -- [ ] **Step 2:** Merge into one `test__sync.py` per harness (folding still-relevant cases), or rename consistently. Remove the redundant file. -- [ ] **Step 3: Verify + commit I1–I3 together** - -Run: `uv run --all-packages --all-extras pytest tests/lib/adk/ tests/lib/core/harness/ -q` → green. -Run: `uv run ruff check src/ tests/ && uv run pyright src/agentex/lib/adk/__init__.py` → clean. -```bash -git add -A -git commit -m "refactor(harness)!: consolidate harness modules to __sync.py + __turn.py under _modules/ (openai moved out of providers/_modules)" -``` - ---- - -## Batch H — Final docs, changelog, and stale-plan-doc removal - -### Task H1: Remove the stale unified-harness plan doc(s) - -The pre-unified planning docs for the now-merged stack are obsolete. - -**Files:** `git rm docs/superpowers/plans/2026-06-18-unified-harness-surface-pr4-pydantic-ai.md` (and any sibling `*-unified-harness-*` plan doc that lands). - -- [ ] **Step 1:** `ls docs/superpowers/plans/` and remove the unified-harness-surface plan doc(s). Keep this PR-10 plan until PR 10 itself merges. -- [ ] **Step 2: Commit** (`docs: remove stale unified-harness-surface planning doc (stack merged)`). - -### Task H2: Final docs consistency pass + changelog - -- [ ] **Step 1:** Re-read `adk/docs/harness.md` end-to-end against the post-E/F/I tree; confirm every symbol, tap, example, and module path matches reality. -- [ ] **Step 2: Re-grep for any stale reference** - -Run: `grep -rln "harness_pydantic_ai\|harness_langgraph\|harness_openai\|harness_codex\|create_.*_tracing_handler" examples/ docs/ adk/docs/ src/agentex/lib/cli/templates/ README.md` -Expected: zero for the `harness_*` paths and the deprecated handlers. (This plan doc itself still names the old `harness_*`/openai dirs as the historical retirement record — that is expected.) Then separately confirm the `sync_provider`/`openai_turn` shims still resolve for their intended one-release consumers (the `sync-openai-agents` template + the `010_multiturn`/`020_streaming` base sync tutorials), rather than expecting zero references — `python -c "from agentex.lib.adk.providers._modules.sync_provider import SyncStreamingProvider, convert_openai_to_agentex_events; from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn"`. - -- [ ] **Step 3: Add the changelog / release note** documenting the breaking removals: `create_langgraph_tracing_handler` / `create_pydantic_ai_tracing_handler` (+ classes), any removed `stream_*_events`/`emit_langgraph_messages` public helper, the openai module relocation (new import path), and the `adk.harness` namespace if adopted. -- [ ] **Step 4: Commit** (`docs(harness): final docs consistency pass + changelog for the harness-cleanup removals`). - ---- - -## Verification (whole PR) - -- Grep the whole repo (and confirm with the golden agent / known consumers) for each removed symbol — zero references before deletion (Task F1 Step 1, I1/I2 inventories). -- After Batch A: the shared `_fakes` module is the only definition of the fakes; the determinism test exists once — grep confirms no per-file/per-harness copies. -- After Batch B/I: the five harnesses each have exactly `__sync.py` + `__turn.py` under `adk/_modules/`; openai no longer lives under `providers/_modules/`; the turn modules use the shared usage normalizer. `ls src/agentex/lib/adk/_modules/_*.py` shows the 10 expected files (+ any deliberately-kept shim). -- After Batch E: exactly one tutorial agent per framework per tier; none import deprecated symbols; tutorial CI job + index/README links resolve. -- `adk/docs/harness.md` documents all five taps, uses the canonical `yield_turn` example with no unused variable, and references no deprecated symbol or old module path. -- The `sync-openai-agents` CLI template imports the new openai path and renders/runs. -- Full `./scripts/test` on Python 3.12 AND 3.13. **Run the two versions separately or in shorter scoped batches** — the dual-version `./scripts/test` in one shot has tripped a 600s no-output watchdog; prefer scoped runs or background with periodic output. -- `./scripts/lint` clean (whole-repo ruff + pyright). -- Changelog / release note present (Task H2). - -## Risk - -Removing publicly-exported (deprecated) symbols and relocating public module paths are breaking changes — gate Batches E/F/I on the version-bump policy and on confirming the golden agent + any external consumers are migrated. The openai relocation touches a live CLI template; keep a one-release re-export shim if any external code may import the old path. Everything here is recoverable from history; sequence it as the final, deliberate cleanup of the harness-surface workstream. Batches A–D and G are non-breaking and can ship earlier if E/F/I are blocked. - ---- - -## Appendix — scope-item → batch mapping (auditable) - -| Scope item | Batch/Task | -|---|---| -| 1 — delete deprecated tracing handlers | F1 | -| 2 — remove resolved-workaround markers | F2 | -| 3 — adk.harness namespace (optional) | C2 | -| 4 — vestigial conformance runner | D | -| 5 — dead sync/async branches | B1 / I1 | -| 6 — shared test fakes | A1 | -| 7 — parametrize determinism test | A2 | -| 8 — shared usage normalization | B2 | -| 9 — converge sync paths | B3 / I | -| 10 — reconcile adk/__init__.py edits | C1 | -| 11 — tutorial consistency pass | E1 | -| 12 — integration-test parity | G | -| 13 — retire duplicate tutorials | E1 | -| NEW — filesystem layout + naming (sync.py/turn.py, openai→_modules) | I | -| NEW — remove stale unified-harness plan doc | H1 | - -Cross-cutting facts to preserve: -- Items 1 and 13 are coupled — the pre-unified tutorials import the symbols item 1 removes; retire them in the same commit range (Batches E+F). -- Item 11's renames ARE item 13's retirement — one operation, not two. -- Settled tutorial decision: **replace in place on the numbered `NNN_` paradigm**; codex takes fresh `NNN_codex` numbers; `090_claude_agents_sdk_mvp` (Agents SDK, not the claude-code harness) stays. -- The openai `SyncStreamingModel`/`SyncStreamingProvider` are load-bearing (CLI template) — relocate in Batch I with a shim, do NOT delete in Batch F. -- Non-breaking (A–D, G, H1) vs breaking (E, F, I) — split if the version-bump policy blocks the breaking set. From 7cf6f9859b2b824fe37669b8f4f3f29662c3ff59 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 20:37:05 -0400 Subject: [PATCH 11/12] feat(cli): add agentex init templates for claude-code, codex, and async openai-agents Adds scaffolding for the harnesses that lacked an init path: claude-code and codex across all three tiers (sync / default-async / temporal), plus the missing default-openai-agents (async-base) variant. Each template uses the unified harness surface (UnifiedEmitter + the harness *Turn), mirrors the migrated tutorials, and is wired into the TemplateType enum, project-file map, and the sync/async/temporal init menus. All 19 template types render to valid Python under test_init_templates. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/agentex/lib/cli/commands/init.py | 21 ++ .../default-claude-code/.dockerignore.j2 | 43 ++++ .../default-claude-code/.env.example.j2 | 13 + .../default-claude-code/Dockerfile-uv.j2 | 47 ++++ .../default-claude-code/Dockerfile.j2 | 42 ++++ .../default-claude-code/README.md.j2 | 64 +++++ .../default-claude-code/dev.ipynb.j2 | 126 ++++++++++ .../default-claude-code/environments.yaml.j2 | 57 +++++ .../default-claude-code/manifest.yaml.j2 | 120 ++++++++++ .../default-claude-code/project/acp.py.j2 | 147 ++++++++++++ .../default-claude-code/pyproject.toml.j2 | 33 +++ .../default-claude-code/requirements.txt.j2 | 8 + .../templates/default-codex/.dockerignore.j2 | 43 ++++ .../templates/default-codex/.env.example.j2 | 13 + .../templates/default-codex/Dockerfile-uv.j2 | 47 ++++ .../cli/templates/default-codex/Dockerfile.j2 | 42 ++++ .../cli/templates/default-codex/README.md.j2 | 72 ++++++ .../cli/templates/default-codex/dev.ipynb.j2 | 126 ++++++++++ .../default-codex/environments.yaml.j2 | 57 +++++ .../templates/default-codex/manifest.yaml.j2 | 120 ++++++++++ .../templates/default-codex/project/acp.py.j2 | 226 ++++++++++++++++++ .../templates/default-codex/pyproject.toml.j2 | 33 +++ .../default-codex/requirements.txt.j2 | 8 + .../default-openai-agents/.dockerignore.j2 | 43 ++++ .../default-openai-agents/.env.example.j2 | 13 + .../default-openai-agents/Dockerfile-uv.j2 | 47 ++++ .../default-openai-agents/Dockerfile.j2 | 43 ++++ .../default-openai-agents/README.md.j2 | 69 ++++++ .../default-openai-agents/dev.ipynb.j2 | 167 +++++++++++++ .../environments.yaml.j2 | 53 ++++ .../default-openai-agents/manifest.yaml.j2 | 115 +++++++++ .../default-openai-agents/project/acp.py.j2 | 135 +++++++++++ .../default-openai-agents/pyproject.toml.j2 | 34 +++ .../default-openai-agents/requirements.txt.j2 | 11 + .../sync-claude-code/.dockerignore.j2 | 43 ++++ .../sync-claude-code/.env.example.j2 | 13 + .../sync-claude-code/Dockerfile-uv.j2 | 47 ++++ .../templates/sync-claude-code/Dockerfile.j2 | 43 ++++ .../templates/sync-claude-code/README.md.j2 | 64 +++++ .../templates/sync-claude-code/dev.ipynb.j2 | 167 +++++++++++++ .../sync-claude-code/environments.yaml.j2 | 53 ++++ .../sync-claude-code/manifest.yaml.j2 | 117 +++++++++ .../sync-claude-code/project/acp.py.j2 | 135 +++++++++++ .../sync-claude-code/pyproject.toml.j2 | 33 +++ .../sync-claude-code/requirements.txt.j2 | 8 + .../cli/templates/sync-codex/.dockerignore.j2 | 43 ++++ .../cli/templates/sync-codex/.env.example.j2 | 13 + .../cli/templates/sync-codex/Dockerfile-uv.j2 | 47 ++++ .../cli/templates/sync-codex/Dockerfile.j2 | 43 ++++ .../lib/cli/templates/sync-codex/README.md.j2 | 67 ++++++ .../lib/cli/templates/sync-codex/dev.ipynb.j2 | 167 +++++++++++++ .../templates/sync-codex/environments.yaml.j2 | 53 ++++ .../cli/templates/sync-codex/manifest.yaml.j2 | 117 +++++++++ .../templates/sync-codex/project/acp.py.j2 | 174 ++++++++++++++ .../templates/sync-codex/pyproject.toml.j2 | 33 +++ .../templates/sync-codex/requirements.txt.j2 | 8 + .../temporal-claude-code/.dockerignore.j2 | 43 ++++ .../temporal-claude-code/.env.example.j2 | 13 + .../temporal-claude-code/Dockerfile-uv.j2 | 55 +++++ .../temporal-claude-code/Dockerfile.j2 | 48 ++++ .../temporal-claude-code/README.md.j2 | 73 ++++++ .../temporal-claude-code/dev.ipynb.j2 | 126 ++++++++++ .../temporal-claude-code/environments.yaml.j2 | 64 +++++ .../temporal-claude-code/manifest.yaml.j2 | 140 +++++++++++ .../temporal-claude-code/project/acp.py.j2 | 31 +++ .../project/activities.py.j2 | 139 +++++++++++ .../project/run_worker.py.j2 | 41 ++++ .../project/workflow.py.j2 | 135 +++++++++++ .../temporal-claude-code/pyproject.toml.j2 | 37 +++ .../temporal-claude-code/requirements.txt.j2 | 11 + .../templates/temporal-codex/.dockerignore.j2 | 43 ++++ .../templates/temporal-codex/.env.example.j2 | 13 + .../templates/temporal-codex/Dockerfile-uv.j2 | 55 +++++ .../templates/temporal-codex/Dockerfile.j2 | 48 ++++ .../cli/templates/temporal-codex/README.md.j2 | 80 +++++++ .../cli/templates/temporal-codex/dev.ipynb.j2 | 126 ++++++++++ .../temporal-codex/environments.yaml.j2 | 64 +++++ .../templates/temporal-codex/manifest.yaml.j2 | 140 +++++++++++ .../temporal-codex/project/acp.py.j2 | 32 +++ .../temporal-codex/project/activities.py.j2 | 145 +++++++++++ .../temporal-codex/project/run_worker.py.j2 | 41 ++++ .../temporal-codex/project/workflow.py.j2 | 145 +++++++++++ .../temporal-codex/pyproject.toml.j2 | 37 +++ .../temporal-codex/requirements.txt.j2 | 11 + 84 files changed, 5662 insertions(+) create mode 100644 src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/README.md.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 create mode 100644 src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/.env.example.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/README.md.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 create mode 100644 src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 create mode 100644 src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 create mode 100644 src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/.env.example.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/README.md.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 create mode 100644 src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/README.md.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 create mode 100644 src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 diff --git a/src/agentex/lib/cli/commands/init.py b/src/agentex/lib/cli/commands/init.py index 307a5d0e8..9849e9bbc 100644 --- a/src/agentex/lib/cli/commands/init.py +++ b/src/agentex/lib/cli/commands/init.py @@ -26,14 +26,21 @@ class TemplateType(str, Enum): TEMPORAL_OPENAI_AGENTS = "temporal-openai-agents" TEMPORAL_PYDANTIC_AI = "temporal-pydantic-ai" TEMPORAL_LANGGRAPH = "temporal-langgraph" + TEMPORAL_CLAUDE_CODE = "temporal-claude-code" + TEMPORAL_CODEX = "temporal-codex" DEFAULT = "default" DEFAULT_LANGGRAPH = "default-langgraph" DEFAULT_PYDANTIC_AI = "default-pydantic-ai" + DEFAULT_OPENAI_AGENTS = "default-openai-agents" + DEFAULT_CLAUDE_CODE = "default-claude-code" + DEFAULT_CODEX = "default-codex" SYNC = "sync" SYNC_OPENAI_AGENTS = "sync-openai-agents" SYNC_OPENAI_AGENTS_LOCAL_SANDBOX = "sync-openai-agents-local-sandbox" SYNC_LANGGRAPH = "sync-langgraph" SYNC_PYDANTIC_AI = "sync-pydantic-ai" + SYNC_CLAUDE_CODE = "sync-claude-code" + SYNC_CODEX = "sync-codex" def render_template( @@ -66,14 +73,21 @@ def create_project_structure( TemplateType.TEMPORAL_OPENAI_AGENTS: ["acp.py", "workflow.py", "run_worker.py", "activities.py"], TemplateType.TEMPORAL_PYDANTIC_AI: ["acp.py", "workflow.py", "run_worker.py", "agent.py", "tools.py"], TemplateType.TEMPORAL_LANGGRAPH: ["acp.py", "workflow.py", "run_worker.py", "graph.py", "tools.py"], + TemplateType.TEMPORAL_CLAUDE_CODE: ["acp.py", "workflow.py", "run_worker.py", "activities.py"], + TemplateType.TEMPORAL_CODEX: ["acp.py", "workflow.py", "run_worker.py", "activities.py"], TemplateType.DEFAULT: ["acp.py"], TemplateType.DEFAULT_LANGGRAPH: ["acp.py", "graph.py", "tools.py"], TemplateType.DEFAULT_PYDANTIC_AI: ["acp.py", "agent.py", "tools.py"], + TemplateType.DEFAULT_OPENAI_AGENTS: ["acp.py"], + TemplateType.DEFAULT_CLAUDE_CODE: ["acp.py"], + TemplateType.DEFAULT_CODEX: ["acp.py"], TemplateType.SYNC: ["acp.py"], TemplateType.SYNC_OPENAI_AGENTS: ["acp.py"], TemplateType.SYNC_OPENAI_AGENTS_LOCAL_SANDBOX: ["acp.py", "agent.py", "tools.py"], TemplateType.SYNC_LANGGRAPH: ["acp.py", "graph.py", "tools.py"], TemplateType.SYNC_PYDANTIC_AI: ["acp.py", "agent.py", "tools.py"], + TemplateType.SYNC_CLAUDE_CODE: ["acp.py"], + TemplateType.SYNC_CODEX: ["acp.py"], }[template_type] # Create project/code files @@ -184,8 +198,11 @@ def validate_agent_name(text: str) -> bool | str: "Which Async template would you like to use?", choices=[ {"name": "Basic Async ACP", "value": TemplateType.DEFAULT}, + {"name": "Async ACP + OpenAI Agents SDK", "value": TemplateType.DEFAULT_OPENAI_AGENTS}, {"name": "Async ACP + LangGraph", "value": TemplateType.DEFAULT_LANGGRAPH}, {"name": "Async ACP + Pydantic AI", "value": TemplateType.DEFAULT_PYDANTIC_AI}, + {"name": "Async ACP + Claude Code", "value": TemplateType.DEFAULT_CLAUDE_CODE}, + {"name": "Async ACP + Codex", "value": TemplateType.DEFAULT_CODEX}, ], ).ask() if not template_type: @@ -198,6 +215,8 @@ def validate_agent_name(text: str) -> bool | str: {"name": "Temporal + OpenAI Agents SDK (Recommended)", "value": TemplateType.TEMPORAL_OPENAI_AGENTS}, {"name": "Temporal + Pydantic AI", "value": TemplateType.TEMPORAL_PYDANTIC_AI}, {"name": "Temporal + LangGraph", "value": TemplateType.TEMPORAL_LANGGRAPH}, + {"name": "Temporal + Claude Code", "value": TemplateType.TEMPORAL_CLAUDE_CODE}, + {"name": "Temporal + Codex", "value": TemplateType.TEMPORAL_CODEX}, ], ).ask() if not template_type: @@ -211,6 +230,8 @@ def validate_agent_name(text: str) -> bool | str: {"name": "Sync ACP + OpenAI Agents SDK + Local Sandbox", "value": TemplateType.SYNC_OPENAI_AGENTS_LOCAL_SANDBOX}, {"name": "Sync ACP + LangGraph", "value": TemplateType.SYNC_LANGGRAPH}, {"name": "Sync ACP + Pydantic AI", "value": TemplateType.SYNC_PYDANTIC_AI}, + {"name": "Sync ACP + Claude Code", "value": TemplateType.SYNC_CLAUDE_CODE}, + {"name": "Sync ACP + Codex", "value": TemplateType.SYNC_CODEX}, ], ).ask() if not template_type: diff --git a/src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 b/src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 b/src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 new file mode 100644 index 000000000..0395caf74 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/Dockerfile.j2 @@ -0,0 +1,42 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-claude-code/README.md.j2 b/src/agentex/lib/cli/templates/default-claude-code/README.md.j2 new file mode 100644 index 000000000..ab05398e3 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/README.md.j2 @@ -0,0 +1,64 @@ +# {{ agent_name }} - AgentEx Async Claude Code Agent + +This template builds an **asynchronous** (non-Temporal) agent that drives the +**Claude Code CLI** through the unified harness surface on AgentEx: +- Spawns `claude -p --output-format stream-json --verbose` as a local subprocess +- Wraps the CLI's stdout stream in a `ClaudeCodeTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` + (the async Redis push path), so the UI receives output in real time +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `claude` CLI installed and on your `PATH` +- An `ANTHROPIC_API_KEY` (or equivalent credential) in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, and event handlers +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Async ACP with the harness +The async ACP model streams events over Redis instead of an HTTP response. The +`@acp.on_task_event_send` handler spawns the Claude Code CLI and pushes the +harness events to the task stream. + +### The unified harness surface +`ClaudeCodeTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Customize the subprocess +Edit `_spawn_claude` in `project/acp.py` to change the CLI flags, working +directory, or how the prompt is delivered. + +### 2. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 3. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 b/src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 b/src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 new file mode 100644 index 000000000..f802776f0 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/environments.yaml.j2 @@ -0,0 +1,57 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal: + enabled: false + + diff --git a/src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 new file mode 100644 index 000000000..2d94ba41c --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/manifest.yaml.j2 @@ -0,0 +1,120 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: LITELLM_API_KEY + secret_name: litellm-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: + LITELLM_API_KEY: "" # Set your LLM API key + # OPENAI_BASE_URL: "" + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 new file mode 100644 index 000000000..cd7e771db --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/project/acp.py.j2 @@ -0,0 +1,147 @@ +"""ACP handler for {{ agent_name }} — an async Claude Code agent. + +Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL +asyncio subprocess (no Scale sandbox — that is a production concern). Stdout +lines are fed into ``ClaudeCodeTurn``. Events are delivered via +``UnifiedEmitter.auto_send_turn``, the async Redis push path. + +Live runs require the ``claude`` CLI to be installed and an +ANTHROPIC_API_KEY (or equivalent credential) in the environment. +""" + +from __future__ import annotations + +import os +import asyncio +from typing import AsyncIterator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import ClaudeCodeTurn +from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.fastacp import AsyncACPConfig +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create( + acp_type="async", + config=AsyncACPConfig(type="base"), +) + + +async def _spawn_claude(prompt: str) -> AsyncIterator[str]: + """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines. + + Injectable seam: tests can monkeypatch this with a fake async iterator of + pre-recorded lines so no real CLI invocation is needed offline. + """ + proc = await asyncio.create_subprocess_exec( + "claude", + "-p", + "--output-format", + "stream-json", + "--verbose", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stdin is not None + + proc.stdin.write(prompt.encode()) + proc.stdin.close() + + # Drain stderr concurrently. With --verbose, Claude Code can write enough to + # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks + # on its stderr write while we block reading stdout — a deadlock. A + # background task keeps stderr flowing so stdout never stalls. + async def _drain_stderr() -> None: + assert proc.stderr is not None + async for _ in proc.stderr: + pass + + stderr_task = asyncio.create_task(_drain_stderr()) + + try: + buffer = "" + async for chunk in proc.stdout: + buffer += chunk.decode("utf-8", errors="replace") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + + if buffer.strip(): + yield buffer.strip() + + await proc.wait() + finally: + # Release the subprocess and stderr drain task even if the consumer + # abandons the generator early (task cancellation / client disconnect): + # cancel the drain task and terminate+reap the process if it is still + # running, so neither is leaked. + stderr_task.cancel() + try: + await stderr_task + except asyncio.CancelledError: + pass + if proc.returncode is None: + try: + proc.terminate() + except ProcessLookupError: + pass + await proc.wait() + + +@acp.on_task_create +async def handle_task_create(params: CreateTaskParams): + logger.info("Task created: %s", params.task.id) + + +@acp.on_task_event_send +async def handle_task_event_send(params: SendEventParams): + """Handle a user message: spawn Claude Code locally and push events to the task stream.""" + task_id = params.task.id + prompt = params.event.content.content + logger.info("Processing message for task %s", task_id) + + await adk.messages.create(task_id=task_id, content=params.event.content) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": prompt}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn = ClaudeCodeTurn(_spawn_claude(prompt)) + result = await emitter.auto_send_turn(turn) + if turn_span: + turn_span.output = {"final_text": result.final_text} + + +@acp.on_task_cancel +async def handle_task_canceled(params: CancelTaskParams): + logger.info("Task canceled: %s", params.task.id) diff --git a/src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 b/src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 b/src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-claude-code/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 b/src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/default-codex/.env.example.j2 b/src/agentex/lib/cli/templates/default-codex/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 b/src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 new file mode 100644 index 000000000..0395caf74 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/Dockerfile.j2 @@ -0,0 +1,42 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-codex/README.md.j2 b/src/agentex/lib/cli/templates/default-codex/README.md.j2 new file mode 100644 index 000000000..b82f1c5f2 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/README.md.j2 @@ -0,0 +1,72 @@ +# {{ agent_name }} - AgentEx Async Codex Agent + +This template builds an **asynchronous** (non-Temporal) agent that drives the +**Codex CLI** through the unified harness surface on AgentEx: +- Spawns `codex exec --json` as a local subprocess +- Wraps the CLI's stdout stream in a `CodexTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` + (the async Redis push path), so the UI receives output in real time +- Persists the codex session/thread ID via `adk.state` for multi-turn memory +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `codex` CLI installed and on your `PATH` (`npm install -g @openai/codex`) +- An `OPENAI_API_KEY` in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, state, and event handlers +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Async ACP with the harness +The async ACP model streams events over Redis instead of an HTTP response. The +`@acp.on_task_event_send` handler spawns the Codex CLI and pushes the harness +events to the task stream. + +### Multi-turn memory +The codex session/thread ID is persisted via `adk.state`, so each new turn +resumes the same codex session with `codex exec resume `. + +### The unified harness surface +`CodexTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Choose a model +Set `CODEX_MODEL` (defaults to `o4-mini`) to control which model codex uses. + +### 2. Customize the subprocess +Edit `_spawn_codex` in `project/acp.py` to change the CLI flags or how the +prompt is delivered. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 b/src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 b/src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 new file mode 100644 index 000000000..f802776f0 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/environments.yaml.j2 @@ -0,0 +1,57 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal: + enabled: false + + diff --git a/src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 new file mode 100644 index 000000000..2d94ba41c --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/manifest.yaml.j2 @@ -0,0 +1,120 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: LITELLM_API_KEY + secret_name: litellm-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: + LITELLM_API_KEY: "" # Set your LLM API key + # OPENAI_BASE_URL: "" + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 new file mode 100644 index 000000000..80f09b64b --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/project/acp.py.j2 @@ -0,0 +1,226 @@ +"""Async (base) ACP handler for {{ agent_name }} — a Codex CLI harness agent. + +Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` + +``UnifiedEmitter`` for an async (Redis-streaming) ACP agent without Temporal. + +The handler: +1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox). + This is correct for local development; production isolation is a separate + concern. +2. Wraps the stdout line stream in a ``CodexTurn``. +3. Delivers every canonical ``StreamTaskMessage*`` event to Redis via + ``UnifiedEmitter.auto_send_turn``, so the UI receives tokens in real time. +4. Multi-turn memory is persisted via ``adk.state``. + +Live runs require: +- ``codex`` CLI on PATH (``npm install -g @openai/codex``) +- ``OPENAI_API_KEY`` set in the environment +""" + +from __future__ import annotations + +import os +import time +import codecs +import asyncio +from collections.abc import AsyncIterator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import CodexTurn +from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.fastacp import AsyncACPConfig +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.utils.model_utils import BaseModel +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create( + acp_type="async", + config=AsyncACPConfig(type="base"), +) + +MODEL = os.environ.get("CODEX_MODEL", "o4-mini") + + +class ConversationState(BaseModel): + """Per-task conversation state persisted via ``adk.state``. + + We store the codex session/thread ID so subsequent turns can resume the + same codex session via ``codex exec resume ``. + """ + + codex_thread_id: str | None = None + turn_number: int = 0 + + +async def _spawn_codex( + model: str, + thread_id: str | None = None, +) -> asyncio.subprocess.Process: + """Spawn ``codex exec --json`` locally and return the live process. + + Injection seam: tests replace this function with a fake that returns a + mock process whose stdout yields pre-recorded event lines. + + When ``thread_id`` is provided the subcommand becomes + ``codex exec ... resume -`` so codex continues the prior + conversation thread. + + The caller writes the prompt to stdin after the process starts, then + closes stdin so codex knows input is complete. + """ + base_flags = [ + "--json", + "--skip-git-repo-check", + "--dangerously-bypass-approvals-and-sandbox", + "--model", + model, + ] + + if thread_id: + cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"] + else: + cmd = ["codex", "exec", *base_flags, "-"] + + return await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + # Discard stderr: codex --json writes events to stdout; its stderr is + # progress/debug noise. Capturing it with PIPE but never reading it + # would deadlock once codex fills the OS pipe buffer (~64 KB). + stderr=asyncio.subprocess.DEVNULL, + env={**os.environ}, + ) + + +async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]: + """Yield newline-delimited JSON lines from the process stdout. + + Uses an incremental UTF-8 decoder so a multibyte character split across two + 4 KB reads is decoded correctly instead of being corrupted at the boundary. + """ + assert process.stdout is not None + decoder = codecs.getincrementaldecoder("utf-8")(errors="replace") + buffer = "" + while True: + chunk = await process.stdout.read(4096) + if not chunk: + break + buffer += decoder.decode(chunk) + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + buffer += decoder.decode(b"", final=True) + if buffer.strip(): + yield buffer.strip() + + +@acp.on_task_create +async def handle_task_create(params: CreateTaskParams): + """Initialize per-task state on task creation.""" + logger.info("Task created: %s", params.task.id) + await adk.state.create( + task_id=params.task.id, + agent_id=params.agent.id, + state=ConversationState(), + ) + + +@acp.on_task_event_send +async def handle_task_event_send(params: SendEventParams): + """Handle each user message: spawn codex, stream events, save thread ID.""" + task_id = params.task.id + agent_id = params.agent.id + user_message = params.event.content.content + + logger.info("Processing message for task %s", task_id) + + await adk.messages.create(task_id=task_id, content=params.event.content) + + task_state = await adk.state.get_by_task_and_agent(task_id=task_id, agent_id=agent_id) + if task_state is None: + state = ConversationState() + task_state = await adk.state.create(task_id=task_id, agent_id=agent_id, state=state) + else: + state = ConversationState.model_validate(task_state.state) + + state.turn_number += 1 + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name=f"Turn {state.turn_number}", + input={"message": user_message}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + start_ms = int(time.monotonic() * 1000) + + process = await _spawn_codex(MODEL, thread_id=state.codex_thread_id) + + assert process.stdin is not None + process.stdin.write(user_message.encode("utf-8")) + await process.stdin.drain() + process.stdin.close() + + turn = CodexTurn( + events=_process_stdout(process), + model=MODEL, + ) + + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + + result = await emitter.auto_send_turn(turn) + + await process.wait() + + # Record the real wall-clock duration AFTER streaming completes; setting + # it before the stream ran would capture only subprocess spawn overhead. + turn.duration_ms = int(time.monotonic() * 1000) - start_ms + + usage = turn.usage() + + # Persist the codex session id (public accessor; valid post-stream) so the + # next turn resumes the same session. + if turn.session_id: + state.codex_thread_id = turn.session_id + + await adk.state.update( + state_id=task_state.id, + task_id=task_id, + agent_id=agent_id, + state=state, + ) + + if turn_span: + turn_span.output = { + "final_text": result.final_text, + "model": usage.model, + } + + +@acp.on_task_cancel +async def handle_task_canceled(params: CancelTaskParams): + logger.info("Task canceled: %s", params.task.id) diff --git a/src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 b/src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 b/src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-codex/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 b/src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 b/src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 new file mode 100644 index 000000000..4d9f41d45 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/Dockerfile.j2 @@ -0,0 +1,43 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 b/src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 new file mode 100644 index 000000000..9611e83bd --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/README.md.j2 @@ -0,0 +1,69 @@ +# {{ agent_name }} - AgentEx Async OpenAI Agents SDK Agent + +This template builds an **asynchronous** (non-Temporal) agent built on the +**OpenAI Agents SDK**, delivered through the unified harness surface on AgentEx: +- Defines an OpenAI Agents SDK `Agent` (with an example weather tool) inline in + `acp.py` +- Wraps the SDK run in an `OpenAITurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` + (the async Redis push path), so the UI receives output in real time +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- An `OPENAI_API_KEY` in your environment (or a `LITELLM_API_KEY`, which is + copied to `OPENAI_API_KEY` for LiteLLM-proxy compatibility) + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, agent + tool definitions, event handlers +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Async ACP with the harness +The async ACP model streams events over Redis instead of an HTTP response. The +`@acp.on_task_event_send` handler runs the OpenAI Agents SDK and pushes the +harness events to the task stream. + +### The unified harness surface +`OpenAITurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes the SDK's streamed run into canonical AgentEx events; the emitter +traces and delivers them. + +## Development + +### 1. Add Your Own Tools +Define new `@function_tool` functions in `project/acp.py` and add them to the +agent's `tools=[...]` list in `create_agent()`. + +### 2. Customize the Agent +Edit `MODEL_NAME` and `INSTRUCTIONS` in `project/acp.py` to change the model or +system prompt. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 b/src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 new file mode 100644 index 000000000..d8c10a65a --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/dev.ipynb.j2 @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# # (Optional) Create a new task. If you don't create a new task, each message will be sent to a new task. The server will create the task for you.\n", + "\n", + "# import uuid\n", + "\n", + "# TASK_ID = str(uuid.uuid4())[:8]\n", + "\n", + "# rpc_response = client.agents.rpc_by_name(\n", + "# agent_name=AGENT_NAME,\n", + "# method=\"task/create\",\n", + "# params={\n", + "# \"name\": f\"{TASK_ID}-task\",\n", + "# \"params\": {}\n", + "# }\n", + "# )\n", + "\n", + "# task = rpc_response.result\n", + "# print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Test non streaming response\n", + "from agentex.types import TextContent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_message(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": False\n", + " }\n", + ")\n", + "\n", + "if not rpc_response or not rpc_response.result:\n", + " raise ValueError(\"No result in response\")\n", + "\n", + "# Extract and print just the text content from the response\n", + "for task_message in rpc_response.result:\n", + " content = task_message.content\n", + " if isinstance(content, TextContent):\n", + " text = content.content\n", + " print(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79688331", + "metadata": {}, + "outputs": [], + "source": [ + "# Test streaming response\n", + "from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageFull\n", + "from agentex.types.text_delta import TextDelta\n", + "\n", + "\n", + "# The result object of message/send will be a TaskMessageUpdate which is a union of the following types:\n", + "# - StreamTaskMessageStart: \n", + "# - An indicator that a streaming message was started, doesn't contain any useful content\n", + "# - StreamTaskMessageDelta: \n", + "# - A delta of a streaming message, contains the text delta to aggregate\n", + "# - StreamTaskMessageDone: \n", + "# - An indicator that a streaming message was done, doesn't contain any useful content\n", + "# - StreamTaskMessageFull: \n", + "# - A non-streaming message, there is nothing to aggregate, since this contains the full message, not deltas\n", + "\n", + "# Whenn processing StreamTaskMessageDelta, if you are expecting more than TextDeltas, such as DataDelta, ToolRequestDelta, or ToolResponseDelta, you can process them as well\n", + "# Whenn processing StreamTaskMessageFull, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "for agent_rpc_response_chunk in client.agents.send_message_stream(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": True\n", + " }\n", + "):\n", + " # We know that the result of the message/send when stream is set to True will be a TaskMessageUpdate\n", + " task_message_update = agent_rpc_response_chunk.result\n", + " # Print oly the text deltas as they arrive or any full messages\n", + " if isinstance(task_message_update, StreamTaskMessageDelta):\n", + " delta = task_message_update.delta\n", + " if isinstance(delta, TextDelta):\n", + " print(delta.text_delta, end=\"\", flush=True)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in streaming message.\")\n", + " elif isinstance(task_message_update, StreamTaskMessageFull):\n", + " content = task_message_update.content\n", + " if isinstance(content, TextContent):\n", + " print(content.content)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in full message.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e7e042", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 b/src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 new file mode 100644 index 000000000..73924abdd --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 b/src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 new file mode 100644 index 000000000..deae08dee --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/manifest.yaml.j2 @@ -0,0 +1,115 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: async + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: [] # Update with your credentials + # - env_var_name: LITELLM_API_KEY + # secret_name: litellm-api-key + # secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: {} # Update with your environment variables + # LITELLM_API_KEY: "" + # OPENAI_BASE_URL: "" + # OPENAI_ORG_ID: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 b/src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 new file mode 100644 index 000000000..b430fa07d --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/project/acp.py.j2 @@ -0,0 +1,135 @@ +"""ACP handler for {{ agent_name }} — an async OpenAI Agents SDK agent. + +Uses the async ACP model with Redis streaming instead of HTTP yields. The +OpenAI Agents SDK run is wrapped in an ``OpenAITurn`` and pushed to the task +stream via ``UnifiedEmitter.auto_send_turn`` — the async delivery path of the +unified harness surface. ``auto_send_turn`` returns a ``TurnResult`` carrying +the accumulated final text and normalized usage. + +The agent and its tools are defined inline below so this template stays a +single, self-contained ``acp.py``. +""" + +from __future__ import annotations + +import os +from datetime import datetime + +from dotenv import load_dotenv + +load_dotenv() + +from agents import Agent, Runner, function_tool, set_tracing_disabled + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CancelTaskParams, CreateTaskParams +from agentex.lib.types.fastacp import AsyncACPConfig +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.lib.core.harness.emitter import UnifiedEmitter +from agentex.lib.adk.providers._modules.openai_turn import OpenAITurn +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +# Disable the openai-agents SDK's native tracer so it doesn't ship traces to +# api.openai.com using OPENAI_API_KEY (which may be a LiteLLM proxy key). +# SGP tracing below still runs via the Agentex tracing manager. +set_tracing_disabled(True) + +logger = make_logger(__name__) + +# LiteLLM proxy auth: copy LITELLM_API_KEY to OPENAI_API_KEY for OpenAI client compatibility. +_litellm_key = os.environ.get("LITELLM_API_KEY") +if _litellm_key and not os.environ.get("OPENAI_API_KEY"): + os.environ["OPENAI_API_KEY"] = _litellm_key + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create( + acp_type="async", + config=AsyncACPConfig(type="base"), +) + +MODEL_NAME = "gpt-4o" +INSTRUCTIONS = """You are a helpful AI assistant with access to tools. + +Current date and time: {timestamp} + +Guidelines: +- Be concise and helpful +- Use the weather tool when the user asks about the weather +- Always report the real tool output back to the user +""" + + +@function_tool +def get_weather(city: str) -> str: + """Get the current weather for a city.""" + return f"The weather in {city} is sunny and 72°F" + + +def create_agent() -> Agent: + """Build and return the OpenAI Agents SDK agent with the weather tool.""" + return Agent( + name="{{ agent_name }}", + model=MODEL_NAME, + instructions=INSTRUCTIONS.format(timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S")), + tools=[get_weather], + ) + + +_agent = None + + +def get_agent() -> Agent: + global _agent + if _agent is None: + _agent = create_agent() + return _agent + + +@acp.on_task_create +async def handle_task_create(params: CreateTaskParams): + logger.info(f"Task created: {params.task.id}") + + +@acp.on_task_event_send +async def handle_task_event_send(params: SendEventParams): + """Handle each user message: run the agent and auto-send its turn.""" + agent = get_agent() + task_id = params.task.id + user_message = params.event.content.content + + logger.info(f"Processing message for task {task_id}") + + # Echo the user's message into the task history. + await adk.messages.create(task_id=task_id, content=params.event.content) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": user_message}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + result = Runner.run_streamed(starting_agent=agent, input=user_message) + turn = OpenAITurn(result=result, model=MODEL_NAME) + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn_result = await emitter.auto_send_turn(turn) + if turn_span: + turn_span.output = {"final_output": turn_result.final_text} + + +@acp.on_task_cancel +async def handle_task_canceled(params: CancelTaskParams): + logger.info(f"Task canceled: {params.task.id}") diff --git a/src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 b/src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 new file mode 100644 index 000000000..4b9c7ed71 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/pyproject.toml.j2 @@ -0,0 +1,34 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "openai-agents", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 b/src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 new file mode 100644 index 000000000..14779c089 --- /dev/null +++ b/src/agentex/lib/cli/templates/default-openai-agents/requirements.txt.j2 @@ -0,0 +1,11 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# OpenAI Agents SDK +openai-agents + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 b/src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 b/src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 new file mode 100644 index 000000000..4d9f41d45 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/Dockerfile.j2 @@ -0,0 +1,43 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 b/src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 new file mode 100644 index 000000000..7e38eddec --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/README.md.j2 @@ -0,0 +1,64 @@ +# {{ agent_name }} - AgentEx Sync Claude Code Agent + +This template builds a **synchronous** agent that drives the **Claude Code CLI** +through the unified harness surface on AgentEx: +- Spawns `claude -p --output-format stream-json --verbose` as a local subprocess +- Wraps the CLI's stdout stream in a `ClaudeCodeTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.yield_turn` + (the sync HTTP yield path) +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `claude` CLI installed and on your `PATH` +- An `ANTHROPIC_API_KEY` (or equivalent credential) in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, and message handler +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Sync ACP with the harness +The sync ACP model uses HTTP request/response. The `@acp.on_message_send` +handler spawns the Claude Code CLI and yields the harness events back to the +client as they arrive. + +### The unified harness surface +`ClaudeCodeTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Customize the subprocess +Edit `_spawn_claude` in `project/acp.py` to change the CLI flags, working +directory, or how the prompt is delivered. + +### 2. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 3. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 b/src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 new file mode 100644 index 000000000..d8c10a65a --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/dev.ipynb.j2 @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# # (Optional) Create a new task. If you don't create a new task, each message will be sent to a new task. The server will create the task for you.\n", + "\n", + "# import uuid\n", + "\n", + "# TASK_ID = str(uuid.uuid4())[:8]\n", + "\n", + "# rpc_response = client.agents.rpc_by_name(\n", + "# agent_name=AGENT_NAME,\n", + "# method=\"task/create\",\n", + "# params={\n", + "# \"name\": f\"{TASK_ID}-task\",\n", + "# \"params\": {}\n", + "# }\n", + "# )\n", + "\n", + "# task = rpc_response.result\n", + "# print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Test non streaming response\n", + "from agentex.types import TextContent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_message(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": False\n", + " }\n", + ")\n", + "\n", + "if not rpc_response or not rpc_response.result:\n", + " raise ValueError(\"No result in response\")\n", + "\n", + "# Extract and print just the text content from the response\n", + "for task_message in rpc_response.result:\n", + " content = task_message.content\n", + " if isinstance(content, TextContent):\n", + " text = content.content\n", + " print(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79688331", + "metadata": {}, + "outputs": [], + "source": [ + "# Test streaming response\n", + "from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageFull\n", + "from agentex.types.text_delta import TextDelta\n", + "\n", + "\n", + "# The result object of message/send will be a TaskMessageUpdate which is a union of the following types:\n", + "# - StreamTaskMessageStart: \n", + "# - An indicator that a streaming message was started, doesn't contain any useful content\n", + "# - StreamTaskMessageDelta: \n", + "# - A delta of a streaming message, contains the text delta to aggregate\n", + "# - StreamTaskMessageDone: \n", + "# - An indicator that a streaming message was done, doesn't contain any useful content\n", + "# - StreamTaskMessageFull: \n", + "# - A non-streaming message, there is nothing to aggregate, since this contains the full message, not deltas\n", + "\n", + "# Whenn processing StreamTaskMessageDelta, if you are expecting more than TextDeltas, such as DataDelta, ToolRequestDelta, or ToolResponseDelta, you can process them as well\n", + "# Whenn processing StreamTaskMessageFull, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "for agent_rpc_response_chunk in client.agents.send_message_stream(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": True\n", + " }\n", + "):\n", + " # We know that the result of the message/send when stream is set to True will be a TaskMessageUpdate\n", + " task_message_update = agent_rpc_response_chunk.result\n", + " # Print oly the text deltas as they arrive or any full messages\n", + " if isinstance(task_message_update, StreamTaskMessageDelta):\n", + " delta = task_message_update.delta\n", + " if isinstance(delta, TextDelta):\n", + " print(delta.text_delta, end=\"\", flush=True)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in streaming message.\")\n", + " elif isinstance(task_message_update, StreamTaskMessageFull):\n", + " content = task_message_update.content\n", + " if isinstance(content, TextContent):\n", + " print(content.content)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in full message.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e7e042", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 b/src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 new file mode 100644 index 000000000..73924abdd --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 new file mode 100644 index 000000000..7bf2cb355 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/manifest.yaml.j2 @@ -0,0 +1,117 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: sync + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: LITELLM_API_KEY + secret_name: litellm-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: + LITELLM_API_KEY: "" # Set your LLM API key + # OPENAI_BASE_URL: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 new file mode 100644 index 000000000..95b370761 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/project/acp.py.j2 @@ -0,0 +1,135 @@ +"""ACP handler for {{ agent_name }} — a sync Claude Code agent. + +Spawns ``claude -p --output-format stream-json --verbose`` as a LOCAL +asyncio subprocess (no Scale sandbox — that is a production concern). Stdout +lines are fed into ``ClaudeCodeTurn``, which wraps +``convert_claude_code_to_agentex_events``. Events are delivered via +``UnifiedEmitter.yield_turn``, the sync HTTP yield path. + +Live runs require the ``claude`` CLI to be installed and an +ANTHROPIC_API_KEY (or equivalent credential) to be in the environment. +""" + +from __future__ import annotations + +import os +import asyncio +from typing import AsyncIterator, AsyncGenerator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import ClaudeCodeTurn +from agentex.lib.types.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.types.task_message_update import TaskMessageUpdate +from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create(acp_type="sync") + + +async def _spawn_claude(prompt: str) -> AsyncIterator[str]: + """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines. + + This is a seam: tests can replace it with a fake async iterator of + pre-recorded lines so no real CLI invocation is needed offline. + """ + proc = await asyncio.create_subprocess_exec( + "claude", + "-p", + "--output-format", + "stream-json", + "--verbose", + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stdin is not None + + proc.stdin.write(prompt.encode()) + proc.stdin.close() + + # Drain stderr concurrently. With --verbose, Claude Code can write enough to + # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks + # on its stderr write while we block reading stdout — a deadlock. A + # background task keeps stderr flowing so stdout never stalls. + async def _drain_stderr() -> None: + assert proc.stderr is not None + async for _ in proc.stderr: + pass + + stderr_task = asyncio.create_task(_drain_stderr()) + + try: + buffer = "" + async for chunk in proc.stdout: + buffer += chunk.decode("utf-8", errors="replace") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + + if buffer.strip(): + yield buffer.strip() + + await proc.wait() + finally: + # Release the subprocess and stderr drain task even if the consumer + # abandons the generator early (task cancellation / client disconnect): + # cancel the drain task and terminate+reap the process if it is still + # running, so neither is leaked. + stderr_task.cancel() + try: + await stderr_task + except asyncio.CancelledError: + pass + if proc.returncode is None: + try: + proc.terminate() + except ProcessLookupError: + pass + await proc.wait() + + +@acp.on_message_send +async def handle_message_send( + params: SendMessageParams, +) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: + """Handle an incoming message: run Claude Code locally and stream events.""" + task_id = params.task.id + prompt = params.content.content + logger.info("Processing message for task %s", task_id) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": prompt}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + turn = ClaudeCodeTurn(_spawn_claude(prompt)) + async for event in emitter.yield_turn(turn): + yield event diff --git a/src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 b/src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 b/src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-claude-code/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 b/src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/sync-codex/.env.example.j2 b/src/agentex/lib/cli/templates/sync-codex/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 new file mode 100644 index 000000000..582434ac9 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/Dockerfile-uv.j2 @@ -0,0 +1,47 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 b/src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 new file mode 100644 index 000000000..4d9f41d45 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/Dockerfile.j2 @@ -0,0 +1,43 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + node \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + + +# Set environment variables +ENV PYTHONPATH=/app + +# Run the agent using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-codex/README.md.j2 b/src/agentex/lib/cli/templates/sync-codex/README.md.j2 new file mode 100644 index 000000000..4ca1aeccf --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/README.md.j2 @@ -0,0 +1,67 @@ +# {{ agent_name }} - AgentEx Sync Codex Agent + +This template builds a **synchronous** agent that drives the **Codex CLI** +through the unified harness surface on AgentEx: +- Spawns `codex exec --json` as a local subprocess +- Wraps the CLI's stdout stream in a `CodexTurn` +- Delivers canonical `StreamTaskMessage*` events via `UnifiedEmitter.yield_turn` + (the sync HTTP yield path) +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `codex` CLI installed and on your `PATH` (`npm install -g @openai/codex`) +- An `OPENAI_API_KEY` in your environment + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ └── acp.py # ACP server, subprocess spawn, and message handler +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Sync ACP with the harness +The sync ACP model uses HTTP request/response. The `@acp.on_message_send` +handler spawns the Codex CLI and yields the harness events back to the client +as they arrive. + +### The unified harness surface +`CodexTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Choose a model +Set `CODEX_MODEL` (defaults to `o4-mini`) to control which model codex uses. + +### 2. Customize the subprocess +Edit `_spawn_codex` in `project/acp.py` to change the CLI flags or how the +prompt is delivered. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 b/src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 new file mode 100644 index 000000000..d8c10a65a --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/dev.ipynb.j2 @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# # (Optional) Create a new task. If you don't create a new task, each message will be sent to a new task. The server will create the task for you.\n", + "\n", + "# import uuid\n", + "\n", + "# TASK_ID = str(uuid.uuid4())[:8]\n", + "\n", + "# rpc_response = client.agents.rpc_by_name(\n", + "# agent_name=AGENT_NAME,\n", + "# method=\"task/create\",\n", + "# params={\n", + "# \"name\": f\"{TASK_ID}-task\",\n", + "# \"params\": {}\n", + "# }\n", + "# )\n", + "\n", + "# task = rpc_response.result\n", + "# print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Test non streaming response\n", + "from agentex.types import TextContent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_message(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": False\n", + " }\n", + ")\n", + "\n", + "if not rpc_response or not rpc_response.result:\n", + " raise ValueError(\"No result in response\")\n", + "\n", + "# Extract and print just the text content from the response\n", + "for task_message in rpc_response.result:\n", + " content = task_message.content\n", + " if isinstance(content, TextContent):\n", + " text = content.content\n", + " print(text)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79688331", + "metadata": {}, + "outputs": [], + "source": [ + "# Test streaming response\n", + "from agentex.types.task_message_update import StreamTaskMessageDelta, StreamTaskMessageFull\n", + "from agentex.types.text_delta import TextDelta\n", + "\n", + "\n", + "# The result object of message/send will be a TaskMessageUpdate which is a union of the following types:\n", + "# - StreamTaskMessageStart: \n", + "# - An indicator that a streaming message was started, doesn't contain any useful content\n", + "# - StreamTaskMessageDelta: \n", + "# - A delta of a streaming message, contains the text delta to aggregate\n", + "# - StreamTaskMessageDone: \n", + "# - An indicator that a streaming message was done, doesn't contain any useful content\n", + "# - StreamTaskMessageFull: \n", + "# - A non-streaming message, there is nothing to aggregate, since this contains the full message, not deltas\n", + "\n", + "# Whenn processing StreamTaskMessageDelta, if you are expecting more than TextDeltas, such as DataDelta, ToolRequestDelta, or ToolResponseDelta, you can process them as well\n", + "# Whenn processing StreamTaskMessageFull, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "for agent_rpc_response_chunk in client.agents.send_message_stream(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"stream\": True\n", + " }\n", + "):\n", + " # We know that the result of the message/send when stream is set to True will be a TaskMessageUpdate\n", + " task_message_update = agent_rpc_response_chunk.result\n", + " # Print oly the text deltas as they arrive or any full messages\n", + " if isinstance(task_message_update, StreamTaskMessageDelta):\n", + " delta = task_message_update.delta\n", + " if isinstance(delta, TextDelta):\n", + " print(delta.text_delta, end=\"\", flush=True)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in streaming message.\")\n", + " elif isinstance(task_message_update, StreamTaskMessageFull):\n", + " content = task_message_update.content\n", + " if isinstance(content, TextContent):\n", + " print(content.content)\n", + " else:\n", + " print(f\"Found non-text {type(task_message)} object in full message.\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c5e7e042", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 b/src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 new file mode 100644 index 000000000..73924abdd --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/environments.yaml.j2 @@ -0,0 +1,53 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + diff --git a/src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 b/src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 new file mode 100644 index 000000000..7bf2cb355 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/manifest.yaml.j2 @@ -0,0 +1,117 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + +# Agent Configuration +# ----------------- +agent: + acp_type: sync + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: {{ description }} + + # Temporal workflow configuration + # Set enabled: true to use Temporal workflows for long-running tasks + temporal: + enabled: false + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: LITELLM_API_KEY + secret_name: litellm-api-key + secret_key: api-key + - env_var_name: SGP_API_KEY + secret_name: sgp-api-key + secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: + LITELLM_API_KEY: "" # Set your LLM API key + # OPENAI_BASE_URL: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret names + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 b/src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 new file mode 100644 index 000000000..931385328 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/project/acp.py.j2 @@ -0,0 +1,174 @@ +"""Sync ACP handler for {{ agent_name }} — a Codex CLI harness agent. + +Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` + +``UnifiedEmitter`` for a sync (HTTP-yield) ACP agent. + +The handler: +1. Spawns ``codex exec --json`` as a LOCAL asyncio subprocess (no sandbox). + This is correct for local development; production isolation is a separate + concern. +2. Wraps the stdout line stream in a ``CodexTurn``. +3. Delivers every canonical ``StreamTaskMessage*`` event via + ``UnifiedEmitter.yield_turn``, which traces + yields each event back to + the HTTP caller in one pass. + +Live runs require: +- ``codex`` CLI on PATH (``npm install -g @openai/codex``) +- ``OPENAI_API_KEY`` set in the environment +""" + +from __future__ import annotations + +import os +import time +import codecs +import asyncio +from typing import AsyncGenerator +from collections.abc import AsyncIterator + +from dotenv import load_dotenv + +load_dotenv() + +import agentex.lib.adk as adk +from agentex.lib.adk import CodexTurn +from agentex.lib.types.acp import SendMessageParams +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.lib.sdk.fastacp.fastacp import FastACP +from agentex.types.task_message_update import TaskMessageUpdate +from agentex.types.task_message_content import TaskMessageContent +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +logger = make_logger(__name__) + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +acp = FastACP.create(acp_type="sync") + +MODEL = os.environ.get("CODEX_MODEL", "o4-mini") + + +async def _spawn_codex(model: str) -> asyncio.subprocess.Process: + """Spawn ``codex exec --json`` locally and return the live process. + + Injection seam: tests replace this function with a fake that returns a + mock process whose stdout yields pre-recorded event lines. + + The flags: + --json machine-readable newline-delimited events + --skip-git-repo-check safe to run outside a git repo + --dangerously-bypass-approvals-and-sandbox + skip interactive approval prompts in a + non-interactive (server) context + --model which OpenAI model to use + + The caller writes the prompt to stdin after the process starts, then + closes stdin so codex knows input is complete. + """ + cmd = [ + "codex", + "exec", + "--json", + "--skip-git-repo-check", + "--dangerously-bypass-approvals-and-sandbox", + "--model", + model, + "-", # read prompt from stdin + ] + return await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + # Discard stderr: codex --json writes events to stdout; its stderr is + # progress/debug noise. Capturing it with PIPE but never reading it + # would deadlock once codex fills the OS pipe buffer (~64 KB). + stderr=asyncio.subprocess.DEVNULL, + env={**os.environ}, + ) + + +async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]: + """Yield newline-delimited JSON lines from the process stdout. + + Uses an incremental UTF-8 decoder so a multibyte character split across two + 4 KB reads is decoded correctly instead of being corrupted at the boundary. + """ + assert process.stdout is not None + decoder = codecs.getincrementaldecoder("utf-8")(errors="replace") + buffer = "" + while True: + chunk = await process.stdout.read(4096) + if not chunk: + break + buffer += decoder.decode(chunk) + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + buffer += decoder.decode(b"", final=True) + if buffer.strip(): + yield buffer.strip() + + +@acp.on_message_send +async def handle_message_send( + params: SendMessageParams, +) -> TaskMessageContent | list[TaskMessageContent] | AsyncGenerator[TaskMessageUpdate, None]: + """Handle each message by running ``codex exec`` locally and streaming events.""" + task_id = params.task.id + user_message = params.content.content + logger.info("Processing message for task %s", task_id) + + start_ms = int(time.monotonic() * 1000) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name="message", + input={"message": user_message}, + data={"__span_type__": "AGENT_WORKFLOW"}, + ) as turn_span: + process = await _spawn_codex(MODEL) + + # Write prompt to stdin then close it so codex knows input is done. + assert process.stdin is not None + process.stdin.write(user_message.encode("utf-8")) + await process.stdin.drain() + process.stdin.close() + + turn = CodexTurn( + events=_process_stdout(process), + model=MODEL, + ) + + emitter = UnifiedEmitter( + task_id=task_id, + trace_id=task_id, + parent_span_id=turn_span.id if turn_span else None, + ) + + async for event in emitter.yield_turn(turn): + yield event + + await process.wait() + + # Record the real wall-clock duration AFTER streaming completes; setting + # it before the stream ran would capture only subprocess spawn overhead. + turn.duration_ms = int(time.monotonic() * 1000) - start_ms + + if turn_span: + usage = turn.usage() + turn_span.output = { + "model": usage.model, + "input_tokens": usage.input_tokens, + "output_tokens": usage.output_tokens, + } diff --git a/src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 b/src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 new file mode 100644 index 000000000..e499b1dc1 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/pyproject.toml.j2 @@ -0,0 +1,33 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "black", + "isort", + "flake8", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 b/src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 new file mode 100644 index 000000000..8c0630384 --- /dev/null +++ b/src/agentex/lib/cli/templates/sync-codex/requirements.txt.j2 @@ -0,0 +1,8 @@ +# Install agentex-sdk from local path +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 new file mode 100644 index 000000000..2a3f1108b --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile-uv.j2 @@ -0,0 +1,55 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install tctl (Temporal CLI) +RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 new file mode 100644 index 000000000..ba47485a9 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/Dockerfile.j2 @@ -0,0 +1,48 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install tctl (Temporal CLI) +RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 new file mode 100644 index 000000000..35ac019b5 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/README.md.j2 @@ -0,0 +1,73 @@ +# {{ agent_name }} — AgentEx Temporal + Claude Code + +This template builds a **Temporal-durable** agent that drives the **Claude Code +CLI** through the unified harness surface on AgentEx: +- A Temporal workflow holds conversation state (the Claude Code `session_id`) + durably across worker crashes +- Each turn delegates to the `run_claude_code_turn` activity, which spawns the + CLI (subprocess I/O is not permitted on the workflow event loop) +- The activity wraps the CLI's stdout stream in a `ClaudeCodeTurn` and delivers + canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `claude` CLI installed and on your `PATH` +- An `ANTHROPIC_API_KEY` (or equivalent credential) in your environment +- A running Temporal service (provided automatically by the local dev stack) + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +This starts both the ACP HTTP server and the Temporal worker. + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ ├── acp.py # Thin ACP server; FastACP auto-wires to the workflow +│ ├── workflow.py # Temporal workflow (durable conversation state) +│ ├── activities.py # run_claude_code_turn activity (CLI subprocess) +│ └── run_worker.py # Temporal worker entrypoint +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Subprocess must run in an activity +Temporal runs workflow + signal-handler bodies on a deterministic sandbox event +loop that does not implement `subprocess_exec`. The workflow therefore delegates +each turn to the `run_claude_code_turn` activity, which also gains Temporal's +retry + timeout guarantees. + +### The unified harness surface +`ClaudeCodeTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Customize the subprocess +Edit `_spawn_claude` in `project/activities.py` to change the CLI flags, working +directory, or how the prompt is delivered. + +### 2. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 3. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 new file mode 100644 index 000000000..a3df5e228 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/environments.yaml.j2 @@ -0,0 +1,64 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + # This is used to override the global helm values.yaml file in the agentex-agent helm charts + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal-worker: + enabled: true + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 new file mode 100644 index 000000000..18cffd54a --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/manifest.yaml.j2 @@ -0,0 +1,140 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + # Path to temporal worker file + # Examples: + # project/run_worker.py (standard) + # workers/temporal.py (custom structure) + # ../shared/worker.py (shared across projects) + worker: project/run_worker.py + + +# Agent Configuration +# ----------------- +agent: + # Type of agent - either sync or async + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: "{{ description }}" + + # Temporal workflow configuration + # This enables your agent to run as a Temporal workflow for long-running tasks + temporal: + enabled: true + workflows: + # Name of the workflow class + # Must match the @workflow.defn name in your workflow.py + - name: {{ workflow_name }} + + # Queue name for task distribution + # Used by Temporal to route tasks to your agent + # Convention: _task_queue + queue_name: {{ queue_name }} + + # Optional: Health check port for temporal worker + # Defaults to 80 if not specified + # health_check_port: 80 + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + # - env_var_name: LITELLM_API_KEY + # secret_name: litellm-api-key + # secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: {} + # LITELLM_API_KEY: "" + # OPENAI_BASE_URL: "" + # OPENAI_ORG_ID: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret name + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 new file mode 100644 index 000000000..0515efeeb --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/acp.py.j2 @@ -0,0 +1,31 @@ +"""ACP server for {{ agent_name }} — a Temporal Claude Code agent. + +This file is intentionally thin. When ``acp_type="async"`` is combined +with ``TemporalACPConfig``, FastACP auto-wires: + + HTTP task/create -> @workflow.run on the workflow class + HTTP task/event/send -> @workflow.signal(SignalName.RECEIVE_EVENT) + HTTP task/cancel -> workflow cancellation via the Temporal client + +The actual agent code lives in ``project/workflow.py`` and is executed by +the Temporal worker (``project/run_worker.py``), not by this HTTP process. +""" + +from __future__ import annotations + +import os + +from dotenv import load_dotenv + +load_dotenv() + +from agentex.lib.types.fastacp import TemporalACPConfig +from agentex.lib.sdk.fastacp.fastacp import FastACP + +acp = FastACP.create( + acp_type="async", + config=TemporalACPConfig( + type="temporal", + temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), + ), +) diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 new file mode 100644 index 000000000..b3e9b0f09 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/activities.py.j2 @@ -0,0 +1,139 @@ +"""Temporal activity for {{ agent_name }} — Claude Code harness. + +Subprocess spawning (and any other I/O) must run inside a Temporal *activity*, +not in workflow code. Temporal runs workflow + signal-handler bodies on a +deterministic sandbox event loop that does not implement ``subprocess_exec`` +(or threads / sockets), so spawning the CLI directly in the signal handler +raises ``NotImplementedError``. This activity runs the Claude Code CLI, drives +the ``ClaudeCodeTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async +Redis push path), and returns the turn result to the workflow. + +The ``_spawn_claude`` async generator is an injectable seam: offline tests +can provide a fake that yields pre-recorded stdout lines so no real CLI runs. +""" + +from __future__ import annotations + +import asyncio +from typing import Any, AsyncIterator +from datetime import datetime + +from temporalio import activity + +from agentex.lib.adk import ClaudeCodeTurn +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.utils.logging import make_logger +from agentex.lib.utils.model_utils import BaseModel + +logger = make_logger(__name__) + +RUN_CLAUDE_CODE_TURN_ACTIVITY = "run_claude_code_turn" + + +class RunClaudeCodeTurnParams(BaseModel): + """Arguments for one Claude Code turn run inside an activity.""" + + task_id: str + prompt: str + trace_id: str | None = None + parent_span_id: str | None = None + session_id: str | None = None + created_at: datetime | None = None + + +class RunClaudeCodeTurnResult(BaseModel): + """Result returned from the activity to the workflow.""" + + final_text: str + session_id: str | None = None + + +async def _spawn_claude(prompt: str, session_id: str | None = None) -> AsyncIterator[str]: + """Spawn ``claude -p --output-format stream-json`` locally and yield stdout lines. + + Pass ``session_id`` to resume a previous Claude Code session (multi-turn + memory via ``-r ``). + + Injectable seam: tests can monkeypatch this with a fake async iterator so no + real CLI invocation is needed offline. + """ + cmd = [ + "claude", + "-p", + "--output-format", + "stream-json", + "--verbose", + ] + if session_id: + cmd.extend(["-r", session_id]) + + proc = await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + assert proc.stdout is not None + assert proc.stdin is not None + + proc.stdin.write(prompt.encode()) + proc.stdin.close() + + # Drain stderr concurrently. With --verbose, Claude Code can write enough to + # stderr to fill the OS pipe buffer; if we only read stdout, the CLI blocks + # on its stderr write while we block reading stdout — a deadlock. A + # background task keeps stderr flowing so stdout never stalls. + async def _drain_stderr() -> None: + assert proc.stderr is not None + async for _ in proc.stderr: + pass + + stderr_task = asyncio.create_task(_drain_stderr()) + + try: + buffer = "" + async for chunk in proc.stdout: + buffer += chunk.decode("utf-8", errors="replace") + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + + if buffer.strip(): + yield buffer.strip() + + await proc.wait() + finally: + # Release the subprocess and stderr drain task even if the consumer + # abandons the generator early (task cancellation / client disconnect): + # cancel the drain task and terminate+reap the process if it is still + # running, so neither is leaked. + stderr_task.cancel() + try: + await stderr_task + except asyncio.CancelledError: + pass + if proc.returncode is None: + try: + proc.terminate() + except ProcessLookupError: + pass + await proc.wait() + + +@activity.defn(name=RUN_CLAUDE_CODE_TURN_ACTIVITY) +async def run_claude_code_turn(params: RunClaudeCodeTurnParams) -> dict[str, Any]: + """Run one Claude Code turn end-to-end and stream events to the task. + + Runs in an activity (real asyncio loop) so subprocess I/O is permitted. + """ + emitter = UnifiedEmitter( + task_id=params.task_id, + trace_id=params.trace_id, + parent_span_id=params.parent_span_id, + ) + turn = ClaudeCodeTurn(_spawn_claude(params.prompt, session_id=params.session_id)) + result = await emitter.auto_send_turn(turn, created_at=params.created_at) + + return RunClaudeCodeTurnResult(final_text=result.final_text, session_id=turn.session_id).model_dump() diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 new file mode 100644 index 000000000..354326b9d --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/run_worker.py.j2 @@ -0,0 +1,41 @@ +"""Temporal worker for {{ agent_name }} — Claude Code harness. + +Run as a separate long-lived process alongside the ACP HTTP server. The +worker polls Temporal for workflow + activity tasks and executes them. + +The Claude Code CLI subprocess runs in the ``run_claude_code_turn`` activity +(registered below alongside the built-in Agentex activities), because +subprocess I/O is not permitted on the Temporal workflow event loop. +""" + +import asyncio + +from project.workflow import {{ workflow_class }} +from project.activities import run_claude_code_turn +from agentex.lib.utils.debug import setup_debug_if_enabled +from agentex.lib.utils.logging import make_logger +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.activities import get_all_activities +from agentex.lib.core.temporal.workers.worker import AgentexWorker + +environment_variables = EnvironmentVariables.refresh() +logger = make_logger(__name__) + + +async def main(): + setup_debug_if_enabled() + + task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE + if task_queue_name is None: + raise ValueError("WORKFLOW_TASK_QUEUE is not set") + + worker = AgentexWorker(task_queue=task_queue_name) + + await worker.run( + activities=[run_claude_code_turn, *get_all_activities()], + workflow={{ workflow_class }}, + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 new file mode 100644 index 000000000..06d68a7b5 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/project/workflow.py.j2 @@ -0,0 +1,135 @@ +"""Temporal workflow for {{ agent_name }} — Claude Code harness. + +Holds conversation state (session_id for multi-turn resume) durably across +crashes. Each user message triggers ``on_task_event_send``, which delegates the +turn to the ``run_claude_code_turn`` activity. The activity spawns the Claude +Code CLI, wraps its stdout in ``ClaudeCodeTurn``, and delivers the turn via +``UnifiedEmitter.auto_send_turn`` (the async Redis push path). + +Note on subprocess inside Temporal +------------------------------------ +Subprocess (and all other) I/O must run in a Temporal *activity*, never in +workflow code. Temporal runs workflow + signal-handler bodies on a +deterministic sandbox event loop that does not implement ``subprocess_exec`` +(spawning the CLI there raises ``NotImplementedError``). The activity also gets +Temporal's retry + timeout guarantees. +""" + +from __future__ import annotations + +import os +import json +from datetime import timedelta + +from temporalio import workflow + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CreateTaskParams +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.types.workflow import SignalName +from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +with workflow.unsafe.imports_passed_through(): + from project.activities import RunClaudeCodeTurnParams, run_claude_code_turn + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +environment_variables = EnvironmentVariables.refresh() + +if environment_variables.WORKFLOW_NAME is None: + raise ValueError("Environment variable WORKFLOW_NAME is not set") +if environment_variables.AGENT_NAME is None: + raise ValueError("Environment variable AGENT_NAME is not set") + +logger = make_logger(__name__) + + +@workflow.defn(name=environment_variables.WORKFLOW_NAME) +class {{ workflow_class }}(BaseWorkflow): + """Temporal workflow that runs Claude Code locally for each user message. + + Persists the Claude Code session_id across turns so the CLI can resume + the conversation (``-r ``). Temporal's durable state ensures + the session_id survives worker crashes. + """ + + def __init__(self): + super().__init__(display_name=environment_variables.AGENT_NAME) + self._complete_task = False + self._turn_number = 0 + # Claude Code session_id for multi-turn resume. + self._session_id: str | None = None + + @workflow.signal(name=SignalName.RECEIVE_EVENT) + async def on_task_event_send(self, params: SendEventParams) -> None: + """Handle a user message: spawn Claude Code and push events to the task stream.""" + self._turn_number += 1 + task_id = params.task.id + prompt = params.event.content.content + logger.info("Turn %d for task %s", self._turn_number, task_id) + + await adk.messages.create(task_id=task_id, content=params.event.content) + + async with adk.tracing.span( + trace_id=task_id, + task_id=task_id, + name=f"Turn {self._turn_number}", + input={"message": prompt}, + ) as span: + # Delegate the subprocess turn to an activity: subprocess I/O is not + # permitted on the Temporal workflow event loop. The activity streams + # events to the task and returns the final text + session_id. + # workflow.now() gives a deterministic timestamp under replay. + result = await workflow.execute_activity( + run_claude_code_turn, + RunClaudeCodeTurnParams( + task_id=task_id, + prompt=prompt, + trace_id=task_id, + parent_span_id=span.id if span else None, + session_id=self._session_id, + created_at=workflow.now(), + ), + start_to_close_timeout=timedelta(minutes=5), + ) + + # Capture session_id to enable Claude Code resume on the next turn. + sid = result.get("session_id") + if sid: + self._session_id = sid + + if span: + span.output = {"final_text": result.get("final_text")} + + @workflow.run + async def on_task_create(self, params: CreateTaskParams) -> str: + logger.info("Task created: %s", params.task.id) + + await adk.messages.create( + task_id=params.task.id, + content=TextContent( + author="agent", + content=( + f"Task initialized with params:\n{json.dumps(params.params, indent=2)}\n" + "Send me a message and I'll run it through Claude Code locally." + ), + ), + ) + + await workflow.wait_condition(lambda: self._complete_task, timeout=None) + return "Task completed" + + @workflow.signal + async def complete_task_signal(self) -> None: + logger.info("Received complete_task signal") + self._complete_task = True diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 new file mode 100644 index 000000000..2c6ec9c2f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/pyproject.toml.j2 @@ -0,0 +1,37 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "temporalio>=1.18.2", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-asyncio", + "httpx", + "black", + "isort", + "flake8", + "debugpy>=1.8.15", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 b/src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 new file mode 100644 index 000000000..a060d2331 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-claude-code/requirements.txt.j2 @@ -0,0 +1,11 @@ +# Agentex SDK +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Temporal workflow engine +temporalio>=1.18.2 + +# Loads .env files for local development +python-dotenv>=1.0,<2 diff --git a/src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 b/src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 new file mode 100644 index 000000000..c2d7fca4d --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/.dockerignore.j2 @@ -0,0 +1,43 @@ +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Environments +.env** +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# IDE +.idea/ +.vscode/ +*.swp +*.swo + +# Git +.git +.gitignore + +# Misc +.DS_Store diff --git a/src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 b/src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 new file mode 100644 index 000000000..015f49ef7 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/.env.example.j2 @@ -0,0 +1,13 @@ +# {{ agent_name }} - Environment Variables +# Copy this file to .env and fill in the values + +# API key for your LLM provider +LITELLM_API_KEY= + +# LLM base URL (optional - override to use a different provider) +# OPENAI_BASE_URL= + +# SGP Configuration (optional - for tracing) +# SGP_API_KEY= +# SGP_ACCOUNT_ID= +# SGP_CLIENT_BASE_URL= diff --git a/src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 new file mode 100644 index 000000000..2a3f1108b --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile-uv.j2 @@ -0,0 +1,55 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/** + +# Install tctl (Temporal CLI) +RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +ENV UV_COMPILE_BYTECODE=1 +ENV UV_LINK_MODE=copy +ENV UV_HTTP_TIMEOUT=1000 + +WORKDIR /app/{{ project_path_from_build_root }} + +# Copy dependency files for layer caching +COPY {{ project_path_from_build_root }}/pyproject.toml {{ project_path_from_build_root }}/uv.lock ./ + +# Install dependencies (without project itself, for layer caching) +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-install-project --no-dev + +# Copy the project code +COPY {{ project_path_from_build_root }}/project ./project + +# Install the project +RUN --mount=type=cache,target=/root/.cache/uv \ + uv sync --locked --no-dev + +ENV PATH="/app/{{ project_path_from_build_root }}/.venv/bin:$PATH" + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 new file mode 100644 index 000000000..ba47485a9 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/Dockerfile.j2 @@ -0,0 +1,48 @@ +# syntax=docker/dockerfile:1.3 +FROM python:3.12-slim +COPY --from=ghcr.io/astral-sh/uv:0.6.4 /uv /uvx /bin/ + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + htop \ + vim \ + curl \ + tar \ + python3-dev \ + postgresql-client \ + build-essential \ + libpq-dev \ + gcc \ + cmake \ + netcat-openbsd \ + nodejs \ + npm \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +# Install tctl (Temporal CLI) +RUN curl -L https://github.com/temporalio/tctl/releases/download/v1.18.1/tctl_1.18.1_linux_arm64.tar.gz -o /tmp/tctl.tar.gz && \ + tar -xzf /tmp/tctl.tar.gz -C /usr/local/bin && \ + chmod +x /usr/local/bin/tctl && \ + rm /tmp/tctl.tar.gz + +RUN uv pip install --system --upgrade pip setuptools wheel + +ENV UV_HTTP_TIMEOUT=1000 + +# Copy just the requirements file to optimize caching +COPY {{ project_path_from_build_root }}/requirements.txt /app/{{ project_path_from_build_root }}/requirements.txt + +WORKDIR /app/{{ project_path_from_build_root }} + +# Install the required Python packages +RUN uv pip install --system -r requirements.txt + +# Copy the project code +COPY {{ project_path_from_build_root }}/project /app/{{ project_path_from_build_root }}/project + +# Run the ACP server using uvicorn +CMD ["uvicorn", "project.acp:acp", "--host", "0.0.0.0", "--port", "8000"] + +# When we deploy the worker, we will replace the CMD with the following +# CMD ["python", "-m", "run_worker"] \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/README.md.j2 b/src/agentex/lib/cli/templates/temporal-codex/README.md.j2 new file mode 100644 index 000000000..794109ff3 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/README.md.j2 @@ -0,0 +1,80 @@ +# {{ agent_name }} — AgentEx Temporal + Codex + +This template builds a **Temporal-durable** agent that drives the **Codex CLI** +through the unified harness surface on AgentEx: +- A Temporal workflow holds conversation state (the codex thread ID) durably + across worker crashes — no external state store needed +- Each turn delegates to the `run_codex_turn` activity, which spawns the CLI + (subprocess I/O is not permitted on the workflow event loop) +- The activity wraps the CLI's stdout stream in a `CodexTurn` and delivers + canonical `StreamTaskMessage*` events via `UnifiedEmitter.auto_send_turn` +- Tracing integration to SGP / AgentEx + +## Prerequisites + +- The `codex` CLI installed and on your `PATH` (`npm install -g @openai/codex`) +- An `OPENAI_API_KEY` in your environment +- A running Temporal service (provided automatically by the local dev stack) + +## Running the Agent + +```bash +agentex agents run --manifest manifest.yaml +``` + +This starts both the ACP HTTP server and the Temporal worker. + +## Project Structure + +``` +{{ project_name }}/ +├── project/ +│ ├── __init__.py +│ ├── acp.py # Thin ACP server; FastACP auto-wires to the workflow +│ ├── workflow.py # Temporal workflow (durable conversation state) +│ ├── activities.py # run_codex_turn activity (CLI subprocess) +│ └── run_worker.py # Temporal worker entrypoint +├── Dockerfile +├── manifest.yaml +├── dev.ipynb +{% if use_uv %} +└── pyproject.toml +{% else %} +└── requirements.txt +{% endif %} +``` + +## Key Concepts + +### Subprocess must run in an activity +Temporal runs workflow + signal-handler bodies on a deterministic sandbox event +loop that does not implement `subprocess_exec`. The workflow therefore delegates +each turn to the `run_codex_turn` activity, which also gains Temporal's retry + +timeout guarantees. + +### Durable multi-turn memory +The codex thread ID is kept on the workflow instance; Temporal's durable replay +reconstructs it after a crash, so the next turn resumes the same codex session. + +### The unified harness surface +`CodexTurn` + `UnifiedEmitter` are the unified harness surface. The turn +normalizes CLI output into canonical AgentEx events; the emitter traces and +delivers them. + +## Development + +### 1. Choose a model +Set `CODEX_MODEL` (defaults to `o4-mini`) to control which model codex uses. + +### 2. Customize the subprocess +Edit `_spawn_codex` in `project/activities.py` to change the CLI flags or how +the prompt is delivered. + +### 3. Configure Credentials +Set your credentials via `manifest.yaml`, an exported environment variable, or a +`.env` file in the project directory. + +### 4. Run Locally +```bash +export ENVIRONMENT=development && agentex agents run --manifest manifest.yaml +``` diff --git a/src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 b/src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 new file mode 100644 index 000000000..d3a68303f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/dev.ipynb.j2 @@ -0,0 +1,126 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "36834357", + "metadata": {}, + "outputs": [], + "source": [ + "from agentex import Agentex\n", + "\n", + "client = Agentex(base_url=\"http://localhost:5003\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d1c309d6", + "metadata": {}, + "outputs": [], + "source": [ + "AGENT_NAME = \"{{ agent_name }}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f6e6ef0", + "metadata": {}, + "outputs": [], + "source": [ + "# (REQUIRED) Create a new task. For Async agents, you must create a task for messages to be associated with.\n", + "import uuid\n", + "\n", + "rpc_response = client.agents.create_task(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"name\": f\"{str(uuid.uuid4())[:8]}-task\",\n", + " \"params\": {}\n", + " }\n", + ")\n", + "\n", + "task = rpc_response.result\n", + "print(task)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b03b0d37", + "metadata": {}, + "outputs": [], + "source": [ + "# Send an event to the agent\n", + "\n", + "# The response is expected to be a list of TaskMessage objects, which is a union of the following types:\n", + "# - TextContent: A message with just text content \n", + "# - DataContent: A message with JSON-serializable data content\n", + "# - ToolRequestContent: A message with a tool request, which contains a JSON-serializable request to call a tool\n", + "# - ToolResponseContent: A message with a tool response, which contains response object from a tool call in its content\n", + "\n", + "# When processing the message/send response, if you are expecting more than TextContent, such as DataContent, ToolRequestContent, or ToolResponseContent, you can process them as well\n", + "\n", + "rpc_response = client.agents.send_event(\n", + " agent_name=AGENT_NAME,\n", + " params={\n", + " \"content\": {\"type\": \"text\", \"author\": \"user\", \"content\": \"Hello what can you do?\"},\n", + " \"task_id\": task.id,\n", + " }\n", + ")\n", + "\n", + "event = rpc_response.result\n", + "print(event)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6927cc0", + "metadata": {}, + "outputs": [], + "source": [ + "# Subscribe to the async task messages produced by the agent\n", + "from agentex.lib.utils.dev_tools import subscribe_to_async_task_messages\n", + "\n", + "task_messages = subscribe_to_async_task_messages(\n", + " client=client,\n", + " task=task, \n", + " only_after_timestamp=event.created_at, \n", + " print_messages=True,\n", + " rich_print=True,\n", + " timeout=5,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4864e354", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 b/src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 new file mode 100644 index 000000000..a3df5e228 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/environments.yaml.j2 @@ -0,0 +1,64 @@ +# Agent Environment Configuration +# ------------------------------ +# This file defines environment-specific settings for your agent. +# This DIFFERS from the manifest.yaml file in that it is used to program things that are ONLY per environment. + +# ********** EXAMPLE ********** +# schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +# environments: +# dev: +# auth: +# principal: +# user_id: "1234567890" +# user_name: "John Doe" +# user_email: "john.doe@example.com" +# user_role: "admin" +# user_permissions: "read, write, delete" +# helm_overrides: # This is used to override the global helm values.yaml file in the agentex-agent helm charts +# replicas: 3 +# resources: +# requests: +# cpu: "1000m" +# memory: "2Gi" +# limits: +# cpu: "2000m" +# memory: "4Gi" +# env: +# - name: LOG_LEVEL +# value: "DEBUG" +# - name: ENVIRONMENT +# value: "staging" +# +# kubernetes: +# # OPTIONAL - Otherwise it will be derived from separately. However, this can be used to override the derived +# # namespace and deploy it with in the same namespace that already exists for a separate agent. +# namespace: "team-{{agent_name}}" +# ********** END EXAMPLE ********** + +schema_version: "v1" # This is used to validate the file structure and is not used by the agentex CLI +environments: + dev: + auth: + principal: + user_id: # TODO: Fill in + account_id: # TODO: Fill in + helm_overrides: + # This is used to override the global helm values.yaml file in the agentex-agent helm charts + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" + temporal-worker: + enabled: true + replicaCount: 2 + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 b/src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 new file mode 100644 index 000000000..18cffd54a --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/manifest.yaml.j2 @@ -0,0 +1,140 @@ +# Agent Manifest Configuration +# --------------------------- +# This file defines how your agent should be built and deployed. + +# Build Configuration +# ------------------ +# The build config defines what gets packaged into your agent's Docker image. +# This same configuration is used whether building locally or remotely. +# +# When building: +# 1. All files from include_paths are collected into a build context +# 2. The context is filtered by dockerignore rules +# 3. The Dockerfile uses this context to build your agent's image +# 4. The image is pushed to a registry and used to run your agent +build: + context: + # Root directory for the build context + root: ../ # Keep this as the default root + + # Paths to include in the Docker build context + # Must include: + # - Your agent's directory (your custom agent code) + # These paths are collected and sent to the Docker daemon for building + include_paths: + - {{ project_path_from_build_root }} + + # Path to your agent's Dockerfile + # This defines how your agent's image is built from the context + # Relative to the root directory + dockerfile: {{ project_path_from_build_root }}/Dockerfile + + # Path to your agent's .dockerignore + # Filters unnecessary files from the build context + # Helps keep build context small and builds fast + dockerignore: {{ project_path_from_build_root }}/.dockerignore + + +# Local Development Configuration +# ----------------------------- +# Only used when running the agent locally +local_development: + agent: + port: 8000 # Port where your local ACP server is running + host_address: host.docker.internal # Host address for Docker networking (host.docker.internal for Docker, localhost for direct) + + # File paths for local development (relative to this manifest.yaml) + paths: + # Path to ACP server file + # Examples: + # project/acp.py (standard) + # src/server.py (custom structure) + # ../shared/acp.py (shared across projects) + # /absolute/path/acp.py (absolute path) + acp: project/acp.py + + # Path to temporal worker file + # Examples: + # project/run_worker.py (standard) + # workers/temporal.py (custom structure) + # ../shared/worker.py (shared across projects) + worker: project/run_worker.py + + +# Agent Configuration +# ----------------- +agent: + # Type of agent - either sync or async + acp_type: async + + # Unique name for your agent + # Used for task routing and monitoring + name: {{ agent_name }} + + # Description of what your agent does + # Helps with documentation and discovery + description: "{{ description }}" + + # Temporal workflow configuration + # This enables your agent to run as a Temporal workflow for long-running tasks + temporal: + enabled: true + workflows: + # Name of the workflow class + # Must match the @workflow.defn name in your workflow.py + - name: {{ workflow_name }} + + # Queue name for task distribution + # Used by Temporal to route tasks to your agent + # Convention: _task_queue + queue_name: {{ queue_name }} + + # Optional: Health check port for temporal worker + # Defaults to 80 if not specified + # health_check_port: 80 + + # Optional: Credentials mapping + # Maps Kubernetes secrets to environment variables + # Common credentials include: + credentials: + - env_var_name: REDIS_URL + secret_name: redis-url-secret + secret_key: url + # - env_var_name: LITELLM_API_KEY + # secret_name: litellm-api-key + # secret_key: api-key + + # Optional: Set Environment variables for running your agent locally as well + # as for deployment later on + env: {} + # LITELLM_API_KEY: "" + # OPENAI_BASE_URL: "" + # OPENAI_ORG_ID: "" + + +# Deployment Configuration +# ----------------------- +# Configuration for deploying your agent to Kubernetes clusters +deployment: + # Container image configuration + image: + repository: "" # Update with your container registry + tag: "latest" # Default tag, should be versioned in production + + imagePullSecrets: [] # Update with your image pull secret name + # - name: my-registry-secret + + # Global deployment settings that apply to all clusters + # These can be overridden in cluster-specific environments (environments.yaml) + global: + # Default replica count + replicaCount: 1 + + # Default resource requirements + resources: + requests: + cpu: "500m" + memory: "1Gi" + limits: + cpu: "1000m" + memory: "2Gi" \ No newline at end of file diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 new file mode 100644 index 000000000..7ef5744f0 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/acp.py.j2 @@ -0,0 +1,32 @@ +"""ACP server for {{ agent_name }} — a Temporal Codex harness agent. + +This file is intentionally thin. When ``acp_type="async"`` is combined with +``TemporalACPConfig(type="temporal", ...)``, FastACP auto-wires: + + HTTP task/create -> @workflow.run on the workflow class + HTTP task/event/send -> @workflow.signal(SignalName.RECEIVE_EVENT) + HTTP task/cancel -> workflow cancellation via the Temporal client + +so we don't define any handlers here. The actual agent code lives in +``project/workflow.py`` and is executed by the Temporal worker +(``project/run_worker.py``), not by this HTTP process. +""" + +from __future__ import annotations + +import os + +from dotenv import load_dotenv + +load_dotenv() + +from agentex.lib.types.fastacp import TemporalACPConfig +from agentex.lib.sdk.fastacp.fastacp import FastACP + +acp = FastACP.create( + acp_type="async", + config=TemporalACPConfig( + type="temporal", + temporal_address=os.getenv("TEMPORAL_ADDRESS", "localhost:7233"), + ), +) diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 new file mode 100644 index 000000000..8d48164fc --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/activities.py.j2 @@ -0,0 +1,145 @@ +"""Temporal activity for {{ agent_name }} — Codex harness. + +Subprocess spawning (and any other I/O) must run inside a Temporal *activity*, +not in workflow code. Temporal runs workflow + signal-handler bodies on a +deterministic sandbox event loop that does not implement ``subprocess_exec`` +(or threads / sockets), so spawning ``codex exec`` directly in the signal +handler raises ``NotImplementedError``. This activity runs codex, drives the +``CodexTurn`` through ``UnifiedEmitter.auto_send_turn`` (the async Redis push +path), and returns the turn result to the workflow. + +The ``_spawn_codex`` / ``_process_stdout`` seams are injectable: offline tests +can replace them with fakes that yield pre-recorded event lines so no real CLI +runs. +""" + +from __future__ import annotations + +import os +import codecs +import asyncio +from typing import Any +from datetime import datetime +from collections.abc import AsyncIterator + +from temporalio import activity + +from agentex.lib.adk import CodexTurn +from agentex.lib.core.harness import UnifiedEmitter +from agentex.lib.utils.logging import make_logger +from agentex.lib.utils.model_utils import BaseModel + +logger = make_logger(__name__) + +RUN_CODEX_TURN_ACTIVITY = "run_codex_turn" + + +class RunCodexTurnParams(BaseModel): + """Arguments for one codex turn run inside an activity.""" + + task_id: str + prompt: str + model: str + trace_id: str | None = None + parent_span_id: str | None = None + thread_id: str | None = None + created_at: datetime | None = None + + +class RunCodexTurnResult(BaseModel): + """Result returned from the activity to the workflow.""" + + final_text: str + session_id: str | None = None + model: str | None = None + + +async def _spawn_codex( + model: str, + thread_id: str | None = None, +) -> asyncio.subprocess.Process: + """Spawn ``codex exec --json`` locally and return the live process. + + Injection seam: tests replace this function with a fake that returns a + mock process whose stdout yields pre-recorded event lines. + + The caller writes the prompt to stdin after the process starts, then + closes stdin so codex knows input is complete. + """ + base_flags = [ + "--json", + "--skip-git-repo-check", + "--dangerously-bypass-approvals-and-sandbox", + "--model", + model, + ] + + if thread_id: + cmd = ["codex", "exec", *base_flags, "resume", thread_id, "-"] + else: + cmd = ["codex", "exec", *base_flags, "-"] + + return await asyncio.create_subprocess_exec( + *cmd, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + # Discard stderr: codex --json writes events to stdout; its stderr is + # progress/debug noise. Capturing it with PIPE but never reading it + # would deadlock once codex fills the OS pipe buffer (~64 KB). + stderr=asyncio.subprocess.DEVNULL, + env={**os.environ}, + ) + + +async def _process_stdout(process: asyncio.subprocess.Process) -> AsyncIterator[str]: + """Yield newline-delimited JSON lines from the process stdout. + + Uses an incremental UTF-8 decoder so a multibyte character split across two + 4 KB reads is decoded correctly instead of being corrupted at the boundary. + """ + assert process.stdout is not None + decoder = codecs.getincrementaldecoder("utf-8")(errors="replace") + buffer = "" + while True: + chunk = await process.stdout.read(4096) + if not chunk: + break + buffer += decoder.decode(chunk) + while "\n" in buffer: + line, buffer = buffer.split("\n", 1) + line = line.strip() + if line: + yield line + buffer += decoder.decode(b"", final=True) + if buffer.strip(): + yield buffer.strip() + + +@activity.defn(name=RUN_CODEX_TURN_ACTIVITY) +async def run_codex_turn(params: RunCodexTurnParams) -> dict[str, Any]: + """Run one codex turn end-to-end and stream events to the task. + + Runs in an activity (real asyncio loop) so subprocess I/O is permitted. + """ + process = await _spawn_codex(params.model, thread_id=params.thread_id) + + assert process.stdin is not None + process.stdin.write(params.prompt.encode("utf-8")) + await process.stdin.drain() + process.stdin.close() + + turn = CodexTurn(events=_process_stdout(process), model=params.model) + emitter = UnifiedEmitter( + task_id=params.task_id, + trace_id=params.trace_id, + parent_span_id=params.parent_span_id, + ) + result = await emitter.auto_send_turn(turn, created_at=params.created_at) + + await process.wait() + + return RunCodexTurnResult( + final_text=result.final_text, + session_id=turn.session_id, + model=turn.usage().model, + ).model_dump() diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 new file mode 100644 index 000000000..d86519977 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/run_worker.py.j2 @@ -0,0 +1,41 @@ +"""Temporal worker for {{ agent_name }} — Codex harness. + +Run as a separate long-lived process alongside the ACP HTTP server. The +worker polls Temporal for workflow + activity tasks and executes them. + +The codex CLI subprocess runs in the ``run_codex_turn`` activity (registered +below alongside the built-in Agentex activities), because subprocess I/O is not +permitted on the Temporal workflow event loop. +""" + +import asyncio + +from project.workflow import {{ workflow_class }} +from project.activities import run_codex_turn +from agentex.lib.utils.debug import setup_debug_if_enabled +from agentex.lib.utils.logging import make_logger +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.activities import get_all_activities +from agentex.lib.core.temporal.workers.worker import AgentexWorker + +environment_variables = EnvironmentVariables.refresh() +logger = make_logger(__name__) + + +async def main(): + setup_debug_if_enabled() + + task_queue_name = environment_variables.WORKFLOW_TASK_QUEUE + if task_queue_name is None: + raise ValueError("WORKFLOW_TASK_QUEUE is not set") + + worker = AgentexWorker(task_queue=task_queue_name) + + await worker.run( + activities=[run_codex_turn, *get_all_activities()], + workflow={{ workflow_class }}, + ) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 b/src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 new file mode 100644 index 000000000..6833a5701 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/project/workflow.py.j2 @@ -0,0 +1,145 @@ +"""Temporal workflow for {{ agent_name }} — Codex harness. + +Demonstrates the ``convert_codex_to_agentex_events`` tap + ``CodexTurn`` + +``UnifiedEmitter`` for a Temporal-durable ACP agent. + +KEY CONCEPTS DEMONSTRATED: +- Running ``codex exec --json`` in the ``run_codex_turn`` activity. Subprocess + I/O is not permitted on the Temporal workflow event loop (the deterministic + sandbox loop does not implement ``subprocess_exec``), so the signal handler + delegates the turn to an activity, which also gets Temporal's retry + timeout + guarantees. +- Wrapping the stdout line stream in a ``CodexTurn`` (inside the activity). +- Delivering events via ``UnifiedEmitter.auto_send_turn``, which pushes + ``StreamTaskMessage*`` events to Redis so the UI sees tokens in real time. +- Passing ``created_at=workflow.now()`` for deterministic timestamps under + Temporal replay (required for Temporal-safe delivery). +- Persisting the codex thread ID on the workflow instance itself — Temporal's + workflow state is durable, so no external ``adk.state`` round-trip is needed. +""" + +from __future__ import annotations + +import os +from datetime import timedelta + +from temporalio import workflow + +from agentex.lib import adk +from agentex.lib.types.acp import SendEventParams, CreateTaskParams +from agentex.lib.types.tracing import SGPTracingProcessorConfig +from agentex.lib.utils.logging import make_logger +from agentex.types.text_content import TextContent +from agentex.lib.environment_variables import EnvironmentVariables +from agentex.lib.core.temporal.types.workflow import SignalName +from agentex.lib.core.temporal.workflows.workflow import BaseWorkflow +from agentex.lib.core.tracing.tracing_processor_manager import add_tracing_processor_config + +with workflow.unsafe.imports_passed_through(): + from project.activities import RunCodexTurnParams, run_codex_turn + +add_tracing_processor_config( + SGPTracingProcessorConfig( + sgp_api_key=os.environ.get("SGP_API_KEY", ""), + sgp_account_id=os.environ.get("SGP_ACCOUNT_ID", ""), + sgp_base_url=os.environ.get("SGP_CLIENT_BASE_URL", ""), + ) +) + +environment_variables = EnvironmentVariables.refresh() + +if environment_variables.WORKFLOW_NAME is None: + raise ValueError("Environment variable WORKFLOW_NAME is not set") +if environment_variables.AGENT_NAME is None: + raise ValueError("Environment variable AGENT_NAME is not set") + +logger = make_logger(__name__) + +MODEL = os.environ.get("CODEX_MODEL", "o4-mini") + + +@workflow.defn(name=environment_variables.WORKFLOW_NAME) +class {{ workflow_class }}(BaseWorkflow): + """Long-running Temporal workflow that runs codex exec for each turn. + + Conversation state (codex thread ID + turn counter) is kept on the + workflow instance. Temporal's durable replay reconstructs this state if + the worker crashes, so no external ``adk.state`` round-trip is needed. + """ + + def __init__(self): + super().__init__(display_name=environment_variables.AGENT_NAME) + self._complete_task = False + self._turn_number = 0 + self._codex_thread_id: str | None = None + + @workflow.signal(name=SignalName.RECEIVE_EVENT) + async def on_task_event_send(self, params: SendEventParams) -> None: + """Handle a new user message: spawn codex, stream events via UnifiedEmitter.""" + logger.info("Received task event: %s", params.task.id) + self._turn_number += 1 + + await adk.messages.create(task_id=params.task.id, content=params.event.content) + + user_message = params.event.content.content + + async with adk.tracing.span( + trace_id=params.task.id, + task_id=params.task.id, + name=f"Turn {self._turn_number}", + input={"message": user_message}, + ) as span: + # Delegate the subprocess turn to an activity: subprocess I/O is not + # permitted on the Temporal workflow event loop. The activity streams + # events to the task and returns the final text + codex thread id. + # workflow.now() gives a deterministic timestamp under replay. + result = await workflow.execute_activity( + run_codex_turn, + RunCodexTurnParams( + task_id=params.task.id, + prompt=user_message, + model=MODEL, + trace_id=params.task.id, + parent_span_id=span.id if span else None, + thread_id=self._codex_thread_id, + created_at=workflow.now(), + ), + start_to_close_timeout=timedelta(minutes=5), + ) + + # Persist the codex thread id so the next turn resumes the session. + session_id = result.get("session_id") + if session_id: + self._codex_thread_id = session_id + + if span: + span.output = { + "final_text": result.get("final_text"), + "model": result.get("model"), + } + + @workflow.run + async def on_task_create(self, params: CreateTaskParams) -> str: + """Workflow entry point — keep the conversation alive for incoming signals.""" + logger.info("Task created: %s", params.task.id) + + await adk.messages.create( + task_id=params.task.id, + content=TextContent( + author="agent", + content=( + "Task initialized.\n" + "Send me a message and I'll run codex (local subprocess) " + "to answer, streaming events via the unified harness surface." + ), + ), + ) + + await workflow.wait_condition(lambda: self._complete_task, timeout=None) + return "Task completed" + + @workflow.signal + async def complete_task_signal(self) -> None: + """Graceful workflow shutdown signal.""" + logger.info("Received complete_task signal") + self._complete_task = True diff --git a/src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 b/src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 new file mode 100644 index 000000000..2c6ec9c2f --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/pyproject.toml.j2 @@ -0,0 +1,37 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "{{ project_name }}" +version = "0.1.0" +description = "{{ description }}" +requires-python = ">=3.12" +dependencies = [ + "agentex-sdk", + "scale-gp", + "temporalio>=1.18.2", + "python-dotenv>=1.0,<2", +] + +[project.optional-dependencies] +dev = [ + "pytest", + "pytest-asyncio", + "httpx", + "black", + "isort", + "flake8", + "debugpy>=1.8.15", +] + +[tool.hatch.build.targets.wheel] +packages = ["project"] + +[tool.black] +line-length = 88 +target-version = ['py312'] + +[tool.isort] +profile = "black" +line_length = 88 diff --git a/src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 b/src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 new file mode 100644 index 000000000..a060d2331 --- /dev/null +++ b/src/agentex/lib/cli/templates/temporal-codex/requirements.txt.j2 @@ -0,0 +1,11 @@ +# Agentex SDK +agentex-sdk + +# Scale GenAI Platform Python SDK +scale-gp + +# Temporal workflow engine +temporalio>=1.18.2 + +# Loads .env files for local development +python-dotenv>=1.0,<2 From d669c3471a64e5b0ccc9aebb79154b942e84fd14 Mon Sep 17 00:00:00 2001 From: Declan Brady Date: Mon, 22 Jun 2026 20:41:07 -0400 Subject: [PATCH 12/12] fix(tests): use relative imports for the shared harness fakes (pyright) Pyright can't statically resolve the absolute `tests.lib.core.harness._fakes` import (it only works at pytest runtime via the rootdir on sys.path), failing ./scripts/lint. Switch every consumer to a relative import (matching the conformance package's convention) and re-sort the affected import blocks. Co-Authored-By: Claude Opus 4.8 (1M context) --- tests/lib/adk/test_pydantic_ai_sync.py | 3 ++- tests/lib/core/harness/conformance/runner.py | 3 ++- tests/lib/core/harness/test_auto_send.py | 3 ++- tests/lib/core/harness/test_emitter.py | 3 ++- tests/lib/core/harness/test_harness_claude_code_async.py | 3 ++- tests/lib/core/harness/test_harness_claude_code_sync.py | 3 ++- tests/lib/core/harness/test_harness_codex_async.py | 3 ++- tests/lib/core/harness/test_harness_codex_sync.py | 3 ++- tests/lib/core/harness/test_harness_langgraph_async.py | 3 ++- tests/lib/core/harness/test_harness_langgraph_sync.py | 3 ++- tests/lib/core/harness/test_harness_openai_async.py | 3 ++- tests/lib/core/harness/test_harness_openai_sync.py | 3 ++- tests/lib/core/harness/test_harness_pydantic_ai_async.py | 3 ++- tests/lib/core/harness/test_harness_pydantic_ai_sync.py | 3 ++- tests/lib/core/harness/test_tracer.py | 3 ++- tests/lib/core/harness/test_yield_delivery.py | 3 ++- 16 files changed, 32 insertions(+), 16 deletions(-) diff --git a/tests/lib/adk/test_pydantic_ai_sync.py b/tests/lib/adk/test_pydantic_ai_sync.py index be5cf4f15..ac9986f2b 100644 --- a/tests/lib/adk/test_pydantic_ai_sync.py +++ b/tests/lib/adk/test_pydantic_ai_sync.py @@ -34,7 +34,6 @@ ) from agentex.lib.core.harness import UnifiedEmitter -from tests.lib.core.harness._fakes import FakeTracing from agentex.types.reasoning_content import ReasoningContent from agentex.types.task_message_delta import TextDelta from agentex.types.tool_request_delta import ToolRequestDelta @@ -54,6 +53,8 @@ ) from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn +from ..core.harness._fakes import FakeTracing + async def _aiter(events: list[Any]) -> AsyncIterator[Any]: for e in events: diff --git a/tests/lib/core/harness/conformance/runner.py b/tests/lib/core/harness/conformance/runner.py index 6eb0007ce..e6928669a 100644 --- a/tests/lib/core/harness/conformance/runner.py +++ b/tests/lib/core/harness/conformance/runner.py @@ -66,7 +66,6 @@ from agentex.types.text_delta import TextDelta from agentex.types.task_message import TaskMessage -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import SpanSignal, StreamTaskMessage from agentex.lib.core.harness.tracer import SpanTracer from agentex.types.task_message_update import ( @@ -78,6 +77,8 @@ from agentex.types.reasoning_content_delta import ReasoningContentDelta from agentex.lib.core.harness.span_derivation import SpanDeriver +from .._fakes import FakeTracing + @dataclass class Fixture: diff --git a/tests/lib/core/harness/test_auto_send.py b/tests/lib/core/harness/test_auto_send.py index 3fd68873e..764dae8b3 100644 --- a/tests/lib/core/harness/test_auto_send.py +++ b/tests/lib/core/harness/test_auto_send.py @@ -15,7 +15,6 @@ from agentex.types.task_message import TaskMessage from agentex.types.text_content import TextContent -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.tracer import SpanTracer from agentex.types.task_message_delta import TextDelta from agentex.types.tool_request_delta import ToolRequestDelta @@ -29,6 +28,8 @@ from agentex.types.tool_request_content import ToolRequestContent from agentex.types.tool_response_content import ToolResponseContent +from ._fakes import FakeTracing + class _FakeCtx: """Mirrors StreamingTaskMessageContext: __aenter__ opens (returns self with task_message set), diff --git a/tests/lib/core/harness/test_emitter.py b/tests/lib/core/harness/test_emitter.py index 081ccff5a..3f70660ec 100644 --- a/tests/lib/core/harness/test_emitter.py +++ b/tests/lib/core/harness/test_emitter.py @@ -2,7 +2,6 @@ from agentex.types.task_message import TaskMessage from agentex.types.text_content import TextContent -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import TurnUsage from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.task_message_delta import TextDelta @@ -12,6 +11,8 @@ StreamTaskMessageStart, ) +from ._fakes import FakeTracing + class _FakeCtx: """Minimal StreamingTaskMessageContext fake (see test_auto_send.py).""" diff --git a/tests/lib/core/harness/test_harness_claude_code_async.py b/tests/lib/core/harness/test_harness_claude_code_async.py index 7902971fb..c622d25c1 100644 --- a/tests/lib/core/harness/test_harness_claude_code_async.py +++ b/tests/lib/core/harness/test_harness_claude_code_async.py @@ -35,7 +35,6 @@ import pytest from agentex.types.task_message import TaskMessage -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import TurnResult from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -43,6 +42,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Native claude-code envelope fixtures # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_claude_code_sync.py b/tests/lib/core/harness/test_harness_claude_code_sync.py index 3876fdb87..b53485499 100644 --- a/tests/lib/core/harness/test_harness_claude_code_sync.py +++ b/tests/lib/core/harness/test_harness_claude_code_sync.py @@ -39,7 +39,6 @@ from typing import Any, AsyncIterator, override -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import OpenSpan, CloseSpan from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -52,6 +51,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._claude_code_turn import ClaudeCodeTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Native claude-code envelope fixtures (copied from the turn + conformance tests) # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_codex_async.py b/tests/lib/core/harness/test_harness_codex_async.py index 5f0e60bb3..c31ebfa49 100644 --- a/tests/lib/core/harness/test_harness_codex_async.py +++ b/tests/lib/core/harness/test_harness_codex_async.py @@ -32,7 +32,6 @@ from typing import Any, AsyncIterator from agentex.types.task_message import TaskMessage -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import TurnResult from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -40,6 +39,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._codex_turn import CodexTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Native codex event fixtures # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_codex_sync.py b/tests/lib/core/harness/test_harness_codex_sync.py index 192a36a6b..0209e1e3d 100644 --- a/tests/lib/core/harness/test_harness_codex_sync.py +++ b/tests/lib/core/harness/test_harness_codex_sync.py @@ -46,7 +46,6 @@ from typing import Any, AsyncIterator, override -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import OpenSpan, CloseSpan from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -58,6 +57,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._codex_turn import CodexTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Native codex event fixtures (copied from the turn + conformance tests) # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_langgraph_async.py b/tests/lib/core/harness/test_harness_langgraph_async.py index f94eb75ff..09e92102b 100644 --- a/tests/lib/core/harness/test_harness_langgraph_async.py +++ b/tests/lib/core/harness/test_harness_langgraph_async.py @@ -39,7 +39,6 @@ from agentex.types.task_message import TaskMessage from agentex.types.text_content import TextContent -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import TurnResult from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -47,6 +46,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Remove conftest stubs so real langchain_core types are used # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_langgraph_sync.py b/tests/lib/core/harness/test_harness_langgraph_sync.py index dac6966fe..67d213b6a 100644 --- a/tests/lib/core/harness/test_harness_langgraph_sync.py +++ b/tests/lib/core/harness/test_harness_langgraph_sync.py @@ -36,7 +36,6 @@ import pytest -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter from agentex.types.task_message_update import ( @@ -47,6 +46,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._langgraph_turn import LangGraphTurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Remove conftest stubs so real langchain_core types are used # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_openai_async.py b/tests/lib/core/harness/test_harness_openai_async.py index 593142fba..1329b94b9 100644 --- a/tests/lib/core/harness/test_harness_openai_async.py +++ b/tests/lib/core/harness/test_harness_openai_async.py @@ -39,7 +39,6 @@ from agentex.types.text_delta import TextDelta from agentex.types.task_message import TaskMessage from agentex.types.text_content import TextContent -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import TurnResult, StreamTaskMessage from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -53,6 +52,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._openai_turn import OpenAITurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Canonical event fixtures (copied from the OpenAI converter contract) # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_openai_sync.py b/tests/lib/core/harness/test_harness_openai_sync.py index d0d25c643..34a9b72c6 100644 --- a/tests/lib/core/harness/test_harness_openai_sync.py +++ b/tests/lib/core/harness/test_harness_openai_sync.py @@ -41,7 +41,6 @@ from agentex.types.text_delta import TextDelta from agentex.types.text_content import TextContent -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import OpenSpan, CloseSpan, StreamTaskMessage from agentex.lib.core.harness.tracer import SpanTracer from agentex.types.reasoning_content import ReasoningContent @@ -57,6 +56,8 @@ from agentex.lib.adk._modules._openai_turn import OpenAITurn from agentex.types.reasoning_content_delta import ReasoningContentDelta +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Canonical event fixtures (copied from the OpenAI converter contract) # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_async.py b/tests/lib/core/harness/test_harness_pydantic_ai_async.py index a5781fb23..4b6b86415 100644 --- a/tests/lib/core/harness/test_harness_pydantic_ai_async.py +++ b/tests/lib/core/harness/test_harness_pydantic_ai_async.py @@ -44,7 +44,6 @@ from pydantic_ai.models.test import TestModel from agentex.types.task_message import TaskMessage -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import TurnResult from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -52,6 +51,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Minimal agent under test # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py index ea7de6c28..04beea81d 100644 --- a/tests/lib/core/harness/test_harness_pydantic_ai_sync.py +++ b/tests/lib/core/harness/test_harness_pydantic_ai_sync.py @@ -38,7 +38,6 @@ from pydantic_ai.models.test import TestModel from agentex.types.text_delta import TextDelta -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import OpenSpan, CloseSpan from agentex.lib.core.harness.tracer import SpanTracer from agentex.lib.core.harness.emitter import UnifiedEmitter @@ -50,6 +49,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.adk._modules._pydantic_ai_turn import PydanticAITurn +from ._fakes import FakeTracing + # --------------------------------------------------------------------------- # Minimal agent under test # --------------------------------------------------------------------------- diff --git a/tests/lib/core/harness/test_tracer.py b/tests/lib/core/harness/test_tracer.py index 46023ffb5..b3d9002c4 100644 --- a/tests/lib/core/harness/test_tracer.py +++ b/tests/lib/core/harness/test_tracer.py @@ -2,10 +2,11 @@ import pytest -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.types import OpenSpan, CloseSpan from agentex.lib.core.harness.tracer import SpanTracer +from ._fakes import FakeTracing + @pytest.mark.asyncio async def test_open_then_close_starts_and_ends_span(): diff --git a/tests/lib/core/harness/test_yield_delivery.py b/tests/lib/core/harness/test_yield_delivery.py index eaa064177..ef3861a16 100644 --- a/tests/lib/core/harness/test_yield_delivery.py +++ b/tests/lib/core/harness/test_yield_delivery.py @@ -1,6 +1,5 @@ import pytest -from tests.lib.core.harness._fakes import FakeTracing from agentex.lib.core.harness.tracer import SpanTracer from agentex.types.task_message_update import ( StreamTaskMessageDone, @@ -11,6 +10,8 @@ from agentex.types.tool_response_content import ToolResponseContent from agentex.lib.core.harness.yield_delivery import yield_events +from ._fakes import FakeTracing + async def _gen(events): for e in events: