Skip to content

Commit 4fd2ff4

Browse files
declan-scaleclaude
andcommitted
test(langgraph): adapt conformance test to cross-channel runner (AGX1-373)
Rewrites test_langgraph_conformance.py to use the cross-channel runner from PR #414 (run_cross_channel_conformance, LogicalDelivery) instead of the simpler derive_all-only API it was written against. The four fixtures (text-only, single-tool, reasoning, multi-step) are retained as canonical StreamTaskMessage* sequences. Each is now exercised by test_cross_channel_equivalence (yield_events vs auto_send logical deliveries and span signals) plus the backward-compat test_span_derivation_is_deterministic guard. LangGraph tool requests arrive as Full events from the "updates" stream; auto_send handles them via open+close, yielding the same LogicalDelivery on both channels. No coalesce_tool_requests option is needed. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent fc7d554 commit 4fd2ff4

1 file changed

Lines changed: 48 additions & 25 deletions

File tree

tests/lib/core/harness/conformance/test_langgraph_conformance.py

Lines changed: 48 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,18 @@
1-
"""LangGraph conformance fixtures for the cross-channel span-derivation test.
2-
3-
Registers 4 LangGraph event sequences as conformance fixtures:
4-
- text-only: a plain text response (no tool calls)
5-
- single-tool: one tool call + response
6-
- reasoning: a reasoning block + text
7-
- multi-step: two turns with tool calls
8-
9-
AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull``
10-
(from "updates" events), NOT Start+Delta+Done like pydantic-ai. The SpanDeriver
11-
does not produce tool spans from Full events today; that gap is tracked in
12-
AGX1-373. The fixtures here document the current behavior and will be updated
13-
when AGX1-373 resolves.
1+
"""Cross-channel conformance fixtures for LangGraph harness tap.
2+
3+
Each fixture is built as a canonical sequence of ``StreamTaskMessage*`` events
4+
that matches what ``convert_langgraph_to_agentex_events`` (via ``LangGraphTurn``)
5+
emits for the given scenario. The fixtures are registered with the shared
6+
conformance runner and exercised by both the cross-channel equivalence test
7+
(yield_events vs auto_send) and the backward-compatible span-derivation test.
8+
9+
LangGraph-specific note
10+
-----------------------
11+
LangGraph emits tool *requests* as ``StreamTaskMessageFull`` events (from the
12+
"updates" stream), NOT as Start+Delta+Done like pydantic-ai. ``auto_send``
13+
handles Full events by opening a streaming context with the full content and
14+
closing it immediately, so both channels deliver the same logical payload.
15+
No ``coalesce_tool_requests`` option is needed.
1416
"""
1517

1618
from __future__ import annotations
@@ -30,7 +32,7 @@
3032
from agentex.types.tool_response_content import ToolResponseContent
3133
from agentex.types.reasoning_content_delta import ReasoningContentDelta
3234

33-
from .runner import Fixture, register, derive_all
35+
from .runner import Fixture, register, derive_all, run_cross_channel_conformance
3436

3537
# ---------------------------------------------------------------------------
3638
# Fixtures
@@ -56,7 +58,7 @@
5658
_SINGLE_TOOL = Fixture(
5759
name="langgraph-single-tool",
5860
events=[
59-
# LangGraph tool request is a Full event (AGX1-377)
61+
# LangGraph tool request is a Full event (from "updates" stream)
6062
StreamTaskMessageFull(
6163
type="full",
6264
index=0,
@@ -134,7 +136,7 @@
134136
_MULTI_STEP = Fixture(
135137
name="langgraph-multi-step",
136138
events=[
137-
# Turn 1: text + tool call
139+
# Turn 1: streaming text
138140
StreamTaskMessageStart(
139141
type="start",
140142
index=0,
@@ -146,7 +148,7 @@
146148
delta=TextDelta(type="text", text_delta="Let me search for that."),
147149
),
148150
StreamTaskMessageDone(type="done", index=0),
149-
# Tool request (Full — AGX1-377)
151+
# Tool request (Full — from "updates" stream)
150152
StreamTaskMessageFull(
151153
type="full",
152154
index=1,
@@ -169,7 +171,7 @@
169171
content="LangGraph is a framework for...",
170172
),
171173
),
172-
# Turn 2: final text
174+
# Turn 2: final streaming text
173175
StreamTaskMessageStart(
174176
type="start",
175177
index=3,
@@ -191,16 +193,37 @@
191193

192194

193195
# ---------------------------------------------------------------------------
194-
# Tests
196+
# Cross-channel conformance: logical equivalence + span equivalence
195197
# ---------------------------------------------------------------------------
196198

197199

198200
@pytest.mark.parametrize("fixture", _LANGGRAPH_FIXTURES, ids=lambda f: f.name)
199-
def test_langgraph_span_derivation_is_deterministic(fixture: Fixture):
200-
"""Exercises the cross-channel guarantee: yield and auto-send observe the
201-
same event stream, so span derivation must be deterministic/idempotent.
202-
203-
Deriving twice over the same events yields identical signals (the property
204-
that makes yield vs auto-send equivalent, since both observe the same stream).
201+
@pytest.mark.asyncio
202+
async def test_cross_channel_equivalence(fixture: Fixture) -> None:
203+
"""Assert that yield_events and auto_send produce equivalent logical
204+
deliveries and identical span signals for each LangGraph fixture.
205+
206+
See runner.py for the full contract. The key LangGraph difference: tool
207+
requests arrive as Full events rather than Start+Delta+Done, so auto_send
208+
handles them by opening a streaming context with the full content and
209+
closing it immediately — both channels produce the same LogicalDelivery.
205210
"""
211+
yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture)
212+
213+
assert yield_deliveries == auto_deliveries, (
214+
f"[{fixture.name}] logical deliveries differ:\n yield: {yield_deliveries}\n auto_send: {auto_deliveries}"
215+
)
216+
assert yield_spans == auto_spans, (
217+
f"[{fixture.name}] span signals differ:\n yield: {yield_spans}\n auto_send: {auto_spans}"
218+
)
219+
220+
221+
# ---------------------------------------------------------------------------
222+
# Backward-compatible determinism guard
223+
# ---------------------------------------------------------------------------
224+
225+
226+
@pytest.mark.parametrize("fixture", _LANGGRAPH_FIXTURES, ids=lambda f: f.name)
227+
def test_span_derivation_is_deterministic(fixture: Fixture) -> None:
228+
"""Span derivation over the same event list is idempotent."""
206229
assert derive_all(fixture.events) == derive_all(fixture.events)

0 commit comments

Comments
 (0)