|
1 | | -"""LangGraph conformance fixtures for the cross-channel span-derivation test. |
2 | | -
|
3 | | -Registers 4 LangGraph event sequences as conformance fixtures: |
4 | | -- text-only: a plain text response (no tool calls) |
5 | | -- single-tool: one tool call + response |
6 | | -- reasoning: a reasoning block + text |
7 | | -- multi-step: two turns with tool calls |
8 | | -
|
9 | | -AGX1-377 note: LangGraph emits tool requests as ``StreamTaskMessageFull`` |
10 | | -(from "updates" events), NOT Start+Delta+Done like pydantic-ai. The SpanDeriver |
11 | | -does not produce tool spans from Full events today; that gap is tracked in |
12 | | -AGX1-373. The fixtures here document the current behavior and will be updated |
13 | | -when AGX1-373 resolves. |
| 1 | +"""Cross-channel conformance fixtures for LangGraph harness tap. |
| 2 | +
|
| 3 | +Each fixture is built as a canonical sequence of ``StreamTaskMessage*`` events |
| 4 | +that matches what ``convert_langgraph_to_agentex_events`` (via ``LangGraphTurn``) |
| 5 | +emits for the given scenario. The fixtures are registered with the shared |
| 6 | +conformance runner and exercised by both the cross-channel equivalence test |
| 7 | +(yield_events vs auto_send) and the backward-compatible span-derivation test. |
| 8 | +
|
| 9 | +LangGraph-specific note |
| 10 | +----------------------- |
| 11 | +LangGraph emits tool *requests* as ``StreamTaskMessageFull`` events (from the |
| 12 | +"updates" stream), NOT as Start+Delta+Done like pydantic-ai. ``auto_send`` |
| 13 | +handles Full events by opening a streaming context with the full content and |
| 14 | +closing it immediately, so both channels deliver the same logical payload. |
| 15 | +No ``coalesce_tool_requests`` option is needed. |
14 | 16 | """ |
15 | 17 |
|
16 | 18 | from __future__ import annotations |
|
30 | 32 | from agentex.types.tool_response_content import ToolResponseContent |
31 | 33 | from agentex.types.reasoning_content_delta import ReasoningContentDelta |
32 | 34 |
|
33 | | -from .runner import Fixture, register, derive_all |
| 35 | +from .runner import Fixture, register, derive_all, run_cross_channel_conformance |
34 | 36 |
|
35 | 37 | # --------------------------------------------------------------------------- |
36 | 38 | # Fixtures |
|
56 | 58 | _SINGLE_TOOL = Fixture( |
57 | 59 | name="langgraph-single-tool", |
58 | 60 | events=[ |
59 | | - # LangGraph tool request is a Full event (AGX1-377) |
| 61 | + # LangGraph tool request is a Full event (from "updates" stream) |
60 | 62 | StreamTaskMessageFull( |
61 | 63 | type="full", |
62 | 64 | index=0, |
|
134 | 136 | _MULTI_STEP = Fixture( |
135 | 137 | name="langgraph-multi-step", |
136 | 138 | events=[ |
137 | | - # Turn 1: text + tool call |
| 139 | + # Turn 1: streaming text |
138 | 140 | StreamTaskMessageStart( |
139 | 141 | type="start", |
140 | 142 | index=0, |
|
146 | 148 | delta=TextDelta(type="text", text_delta="Let me search for that."), |
147 | 149 | ), |
148 | 150 | StreamTaskMessageDone(type="done", index=0), |
149 | | - # Tool request (Full — AGX1-377) |
| 151 | + # Tool request (Full — from "updates" stream) |
150 | 152 | StreamTaskMessageFull( |
151 | 153 | type="full", |
152 | 154 | index=1, |
|
169 | 171 | content="LangGraph is a framework for...", |
170 | 172 | ), |
171 | 173 | ), |
172 | | - # Turn 2: final text |
| 174 | + # Turn 2: final streaming text |
173 | 175 | StreamTaskMessageStart( |
174 | 176 | type="start", |
175 | 177 | index=3, |
|
191 | 193 |
|
192 | 194 |
|
193 | 195 | # --------------------------------------------------------------------------- |
194 | | -# Tests |
| 196 | +# Cross-channel conformance: logical equivalence + span equivalence |
195 | 197 | # --------------------------------------------------------------------------- |
196 | 198 |
|
197 | 199 |
|
198 | 200 | @pytest.mark.parametrize("fixture", _LANGGRAPH_FIXTURES, ids=lambda f: f.name) |
199 | | -def test_langgraph_span_derivation_is_deterministic(fixture: Fixture): |
200 | | - """Exercises the cross-channel guarantee: yield and auto-send observe the |
201 | | - same event stream, so span derivation must be deterministic/idempotent. |
202 | | -
|
203 | | - Deriving twice over the same events yields identical signals (the property |
204 | | - that makes yield vs auto-send equivalent, since both observe the same stream). |
| 201 | +@pytest.mark.asyncio |
| 202 | +async def test_cross_channel_equivalence(fixture: Fixture) -> None: |
| 203 | + """Assert that yield_events and auto_send produce equivalent logical |
| 204 | + deliveries and identical span signals for each LangGraph fixture. |
| 205 | +
|
| 206 | + See runner.py for the full contract. The key LangGraph difference: tool |
| 207 | + requests arrive as Full events rather than Start+Delta+Done, so auto_send |
| 208 | + handles them by opening a streaming context with the full content and |
| 209 | + closing it immediately — both channels produce the same LogicalDelivery. |
205 | 210 | """ |
| 211 | + yield_deliveries, auto_deliveries, yield_spans, auto_spans = await run_cross_channel_conformance(fixture) |
| 212 | + |
| 213 | + assert yield_deliveries == auto_deliveries, ( |
| 214 | + f"[{fixture.name}] logical deliveries differ:\n yield: {yield_deliveries}\n auto_send: {auto_deliveries}" |
| 215 | + ) |
| 216 | + assert yield_spans == auto_spans, ( |
| 217 | + f"[{fixture.name}] span signals differ:\n yield: {yield_spans}\n auto_send: {auto_spans}" |
| 218 | + ) |
| 219 | + |
| 220 | + |
| 221 | +# --------------------------------------------------------------------------- |
| 222 | +# Backward-compatible determinism guard |
| 223 | +# --------------------------------------------------------------------------- |
| 224 | + |
| 225 | + |
| 226 | +@pytest.mark.parametrize("fixture", _LANGGRAPH_FIXTURES, ids=lambda f: f.name) |
| 227 | +def test_span_derivation_is_deterministic(fixture: Fixture) -> None: |
| 228 | + """Span derivation over the same event list is idempotent.""" |
206 | 229 | assert derive_all(fixture.events) == derive_all(fixture.events) |
0 commit comments