Skip to content

Commit fc7d554

Browse files
declan-scaleclaude
andcommitted
fix(langgraph): restore created_at + docstring-only deprecation for tracing handler (PR 5/6)
AGX1-378: wire workflow_now_if_in_workflow() into stream_langgraph_events so Temporal callers get deterministic message timestamps, matching the pattern used by the openai/litellm providers. Deprecation alignment: remove runtime warnings.warn from create_langgraph_tracing_handler (and unused import warnings) to match PR 4/6 pydantic-ai convention. Deprecation remains in docstrings on module, class, and function. Callers under -W error are no longer broken. Test alignment after rebase onto unified-harness-surface (b4b8b33): - FakeStreamingModule.streaming_task_message_context in test_langgraph_async.py and test_pydantic_ai_async.py updated to accept **kw (foundation now passes created_at). - Three "no tool spans for Full events" tests updated to assert the new SpanDeriver behaviour: Full(ToolRequestContent) opens a span, Full(ToolResponseContent) closes it. - Two "accumulates all text" multi-step tests corrected to last-segment semantics (auto_send resets final_text_parts on each new Start(TextContent)). Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 24ccfe9 commit fc7d554

8 files changed

Lines changed: 78 additions & 60 deletions

src/agentex/lib/adk/_modules/_langgraph_async.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
handles Full events correctly; no coalescing wrapper is needed.
1717
"""
1818

19+
from agentex.lib.utils.temporal import workflow_now_if_in_workflow
20+
1921

2022
async def stream_langgraph_events(stream, task_id: str) -> str:
2123
"""Stream LangGraph events to Agentex via Redis.
@@ -37,6 +39,11 @@ async def stream_langgraph_events(stream, task_id: str) -> str:
3739
NOT Start+Delta+Done like pydantic-ai. ``auto_send`` handles Full events
3840
correctly; no coalescing wrapper is needed.
3941
42+
AGX1-378 note: ``created_at`` is set from ``workflow.now()`` when called inside a
43+
Temporal workflow, matching the pattern used by the openai/litellm providers.
44+
Outside a workflow (plain async activities, sync agents) it is ``None`` and the
45+
server's wall clock is used.
46+
4047
Args:
4148
stream: Async iterator from graph.astream(..., stream_mode=["messages", "updates"])
4249
task_id: The Agentex task ID to stream messages to.
@@ -50,7 +57,9 @@ async def stream_langgraph_events(stream, task_id: str) -> str:
5057
# AGX1-377 note: LangGraph emits tool requests as Full events (from "updates"),
5158
# NOT Start+Delta+Done like pydantic-ai. auto_send handles Full events correctly;
5259
# no coalescing wrapper is needed.
60+
# AGX1-378: stamp messages with workflow.now() inside Temporal for deterministic
61+
# created_at ordering; falls back to None (server wall clock) outside a workflow.
5362
turn = LangGraphTurn(stream, model=None)
5463
emitter = UnifiedEmitter(task_id=task_id, trace_id=None, parent_span_id=None)
55-
result = await emitter.auto_send_turn(turn)
64+
result = await emitter.auto_send_turn(turn, created_at=workflow_now_if_in_workflow())
5665
return result.final_text

src/agentex/lib/adk/_modules/_langgraph_tracing.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,6 @@
1414

1515
from __future__ import annotations
1616

17-
import warnings
1817
from uuid import UUID
1918
from typing import Any, override
2019

@@ -268,13 +267,6 @@ def create_langgraph_tracing_handler(
268267
269268
This function remains available for backward compatibility.
270269
"""
271-
warnings.warn(
272-
"create_langgraph_tracing_handler is deprecated. Use LangGraphTurn with "
273-
"UnifiedEmitter instead — the unified harness derives equivalent spans from "
274-
"the canonical event stream without a LangChain callback handler.",
275-
DeprecationWarning,
276-
stacklevel=2,
277-
)
278270
return AgentexLangGraphTracingHandler(
279271
trace_id=trace_id,
280272
parent_span_id=parent_span_id,

tests/lib/adk/test_langgraph_async.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ class FakeStreamingModule:
7979
def __init__(self) -> None:
8080
self.contexts: list[FakeContext] = []
8181

82-
def streaming_task_message_context(self, *, task_id: str, initial_content: Any) -> FakeContext:
82+
def streaming_task_message_context(self, *, task_id: str, initial_content: Any, **kw: Any) -> FakeContext:
8383
tm = TaskMessage(
8484
id=f"m{len(self.contexts) + 1}",
8585
task_id=task_id,
@@ -226,14 +226,15 @@ async def test_tool_response_posted_via_streaming_context(
226226
assert content.content == "Sunny, 72F"
227227
assert streaming.contexts[0].closed is True
228228

229-
async def test_multi_step_text_then_tool_then_text_accumulates_all_text(
229+
async def test_multi_step_text_then_tool_then_text_last_segment(
230230
self, fake_adk: tuple[FakeStreamingModule, FakeMessagesModule]
231231
) -> None:
232-
"""Unified surface: final_text accumulates all text across the turn.
232+
"""Unified surface: final_text uses last-segment semantics.
233233
234-
Old bespoke impl only returned the last text segment (reset final_text
235-
each time a new text context opened). The unified surface accumulates
236-
all text because auto_send appends every TextDelta.
234+
auto_send resets final_text_parts when a new Start(TextContent) is seen,
235+
so multi-step turns (text -> tool -> text) return only the LAST text segment.
236+
Both text contexts are still opened and streamed to Redis; only the
237+
return value is last-segment. This matches stream_pydantic_ai_events.
237238
"""
238239
from langchain_core.messages import AIMessage, ToolMessage, AIMessageChunk
239240

@@ -256,10 +257,9 @@ async def test_multi_step_text_then_tool_then_text_accumulates_all_text(
256257

257258
final = await stream_langgraph_events(stream, TASK_ID)
258259

259-
# Unified surface accumulates all text (not just the last segment)
260-
assert "Looking up..." in final
261-
assert "Found it!" in final
262-
# Two text streaming contexts (one per text segment)
260+
# Last segment only — first text segment is NOT in final_text
261+
assert final == "Found it!"
262+
# Two text streaming contexts (one per text segment) — both streamed to Redis
263263
text_ctxs = [c for c in streaming.contexts if isinstance(c.initial_content, TextContent)]
264264
assert len(text_ctxs) == 2
265265
assert all(ctx.closed for ctx in text_ctxs)

tests/lib/adk/test_langgraph_sync.py

Lines changed: 17 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,8 @@
33
Covers:
44
- Basic text, tool call, and tool response emission
55
- on_final_ai_message callback for usage capture
6-
- Deprecation warning emitted by create_langgraph_tracing_handler
6+
- create_langgraph_tracing_handler symbol is importable and functional
7+
(runtime DeprecationWarning removed; deprecation is docstring-only)
78
89
NOTE: langchain_core imports must be deferred to test-function scope because
910
conftest.py stubs out ``langchain_core.messages`` with MagicMock for ADK
@@ -13,7 +14,6 @@
1314
from __future__ import annotations
1415

1516
import sys
16-
import warnings
1717
from typing import Any, AsyncIterator
1818

1919
import pytest
@@ -197,13 +197,21 @@ def _cb(msg):
197197
assert yield_order.index("event") < yield_order.index("callback")
198198

199199

200-
class TestDeprecationWarning:
201-
def test_create_langgraph_tracing_handler_emits_deprecation_warning(self):
200+
class TestLangGraphTracingHandlerBackwardCompat:
201+
def test_create_langgraph_tracing_handler_no_runtime_warning(self):
202+
"""Deprecated symbol remains importable and emits no runtime DeprecationWarning.
203+
204+
The runtime warnings.warn was removed (docstring-only deprecation) to
205+
align with PR 4/6 and avoid breaking callers under warnings-as-errors.
206+
Using ``warnings.simplefilter("error", DeprecationWarning)`` verifies
207+
that calling the function is safe under -W error conditions.
208+
"""
209+
import warnings
210+
202211
from agentex.lib.adk._modules._langgraph_tracing import create_langgraph_tracing_handler
203212

204213
with warnings.catch_warnings(record=True) as w:
205-
warnings.simplefilter("always")
206-
create_langgraph_tracing_handler(trace_id="t1")
207-
assert any(issubclass(warning.category, DeprecationWarning) for warning in w), (
208-
"create_langgraph_tracing_handler must emit a DeprecationWarning"
209-
)
214+
warnings.simplefilter("error", DeprecationWarning)
215+
create_langgraph_tracing_handler(trace_id="t1", parent_span_id="p1")
216+
217+
assert w == [], "create_langgraph_tracing_handler must NOT emit a runtime DeprecationWarning"

tests/lib/adk/test_langgraph_sync_unified.py

Lines changed: 11 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -149,16 +149,13 @@ def fake_tracer(self):
149149
)
150150
return tracer, backend
151151

152-
async def test_tool_span_not_derived_from_full_events(self, fake_tracer):
153-
"""AGX1-377: LangGraph emits tool calls as Full events (not Start+Done).
154-
The SpanDeriver opens tool spans from Start(ToolRequestContent)+Done
155-
sequences. Since LangGraph uses Full, no tool span is opened by the
156-
SpanDeriver -- this is the documented AGX1-377 gap resolved by the
157-
unified surface (Full events are emitted identically; cross-channel
158-
span equivalence arrives with AGX1-373).
159-
160-
The tracer must still be invoked (SpanDeriver.observe is called for each
161-
event); it just produces no open-span signals for LangGraph Full tool events.
152+
async def test_tool_span_derived_from_full_events(self, fake_tracer):
153+
"""AGX1-377: SpanDeriver now handles Full tool events for LangGraph.
154+
155+
Full(ToolRequestContent) opens a tool span keyed by tool_call_id;
156+
Full(ToolResponseContent) closes it. This bridges the previous gap where
157+
LangGraph's Full-event path produced no spans, aligning it with
158+
Start+Done harnesses (pydantic-ai, openai-agents).
162159
"""
163160
from langchain_core.messages import AIMessage, ToolMessage
164161

@@ -175,13 +172,10 @@ async def test_tool_span_not_derived_from_full_events(self, fake_tracer):
175172
emitter = UnifiedEmitter(task_id="t", trace_id="trace1", parent_span_id=None, tracer=tracer)
176173
_ = [e async for e in emitter.yield_turn(LangGraphTurn(_make_stream(events_raw)))]
177174

178-
# AGX1-377: Full events don't produce tool spans via SpanDeriver today.
179-
# This is the documented gap; full cross-channel equivalence arrives with AGX1-373.
180-
assert backend.spans_started == [], (
181-
"Expected no tool spans for LangGraph Full events (AGX1-377); if this "
182-
"assertion fails it means SpanDeriver now handles Full events — update "
183-
"the test to assert the new span names."
184-
)
175+
assert len(backend.spans_started) == 1, "Full(ToolRequestContent) opens one tool span"
176+
started = backend.spans_started[0]
177+
assert started["name"] == "get_weather"
178+
assert started["input"] == {"city": "Paris"}
185179

186180
async def test_no_spans_when_no_tool_calls(self, fake_tracer):
187181
"""yield_turn with tracer but no tool calls emits no spans."""

tests/lib/adk/test_pydantic_ai_async.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ class FakeStreamingModule:
8282
def __init__(self) -> None:
8383
self.contexts: list[FakeContext] = []
8484

85-
def streaming_task_message_context(self, *, task_id: str, initial_content: Any) -> FakeContext:
85+
def streaming_task_message_context(self, *, task_id: str, initial_content: Any, **kw: Any) -> FakeContext:
8686
tm = TaskMessage(
8787
id=f"m{len(self.contexts) + 1}",
8888
task_id=task_id,

tests/lib/core/harness/test_harness_langgraph_async.py

Lines changed: 20 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -213,8 +213,14 @@ async def test_tool_response_posted_via_streaming_context(self):
213213
assert tool_resp_ctxs[0].initial_content.content == "Sunny, 72F"
214214
assert tool_resp_ctxs[0].closed is True
215215

216-
async def test_multi_step_accumulates_all_text(self):
217-
"""Unified surface: final_text accumulates all text, not just last segment."""
216+
async def test_multi_step_final_text_is_last_segment(self):
217+
"""Unified surface: final_text uses last-segment semantics.
218+
219+
auto_send resets final_text_parts when a new Start(TextContent) is seen,
220+
so multi-step turns (text -> tool -> text) return only the LAST text segment.
221+
This matches the behaviour documented in auto_send.py and mirrors
222+
stream_pydantic_ai_events.
223+
"""
218224
from langchain_core.messages import AIMessage, ToolMessage, AIMessageChunk
219225

220226
chunk1 = AIMessageChunk(content="Searching...")
@@ -232,11 +238,10 @@ async def test_multi_step_accumulates_all_text(self):
232238
]
233239
result, fake_streaming, _ = await _run_auto_send_turn(events)
234240

235-
# All text accumulated
236-
assert "Searching..." in result.final_text
237-
assert "Found it!" in result.final_text
241+
# Last segment only — first text segment is NOT in final_text
242+
assert result.final_text == "Found it!"
238243

239-
# Two text streaming contexts
244+
# Two text streaming contexts still opened (both streamed to Redis)
240245
text_ctxs = [c for c in fake_streaming.contexts if isinstance(c.initial_content, TextContent)]
241246
assert len(text_ctxs) == 2
242247

@@ -269,8 +274,12 @@ async def test_turn_usage_populated_after_events_consumed(self):
269274
assert usage.output_tokens == 5
270275
assert usage.total_tokens == 15
271276

272-
async def test_tracer_does_not_produce_tool_spans_for_full_events(self):
273-
"""AGX1-377: Full events don't trigger SpanDeriver tool spans."""
277+
async def test_tracer_produces_tool_spans_for_full_events(self):
278+
"""AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes).
279+
280+
Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it.
281+
This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents).
282+
"""
274283
from langchain_core.messages import AIMessage, ToolMessage
275284

276285
tc = {"id": "c1", "name": "t", "args": {}}
@@ -284,4 +293,6 @@ async def test_tracer_does_not_produce_tool_spans_for_full_events(self):
284293
_, _, fake_tracing = await _run_auto_send_turn(events, trace_id="trace-1")
285294

286295
assert fake_tracing is not None
287-
assert fake_tracing.started == [], "AGX1-377: Full events don't trigger tool spans"
296+
assert len(fake_tracing.started) == 1, "Full(ToolRequestContent) opens one tool span"
297+
assert fake_tracing.started[0][0] == "t", "span name matches the tool name"
298+
assert len(fake_tracing.ended) == 1, "Full(ToolResponseContent) closes the span"

tests/lib/core/harness/test_harness_langgraph_sync.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -189,9 +189,12 @@ async def test_empty_stream_yields_nothing(self):
189189
out, _ = await _run_yield_turn([])
190190
assert out == []
191191

192-
async def test_tracer_invoked_but_no_tool_spans_for_full_events(self):
193-
"""AGX1-377: tool spans are NOT derived from Full events (SpanDeriver uses Start+Done).
194-
This is the documented gap; full cross-channel equivalence arrives with AGX1-373."""
192+
async def test_tracer_produces_tool_spans_for_full_events(self):
193+
"""AGX1-377: SpanDeriver now handles Full tool events (request opens, response closes).
194+
195+
Full(ToolRequestContent) opens a tool span; Full(ToolResponseContent) closes it.
196+
This aligns LangGraph tracing with Start+Done harnesses (pydantic-ai, openai-agents).
197+
"""
195198
from langchain_core.messages import AIMessage, ToolMessage
196199

197200
tc = {"id": "c1", "name": "t", "args": {}}
@@ -205,8 +208,9 @@ async def test_tracer_invoked_but_no_tool_spans_for_full_events(self):
205208
_, fake_tracing = await _run_yield_turn(events, trace_id="trace-1")
206209

207210
assert fake_tracing is not None
208-
# No tool spans opened — Full events don't trigger OpenSpan in SpanDeriver
209-
assert fake_tracing.started == [], "Expected no tool spans for LangGraph Full events (AGX1-377)"
211+
assert len(fake_tracing.started) == 1, "Full(ToolRequestContent) opens one tool span"
212+
assert fake_tracing.started[0][0] == "t", "span name matches the tool name"
213+
assert len(fake_tracing.ended) == 1, "Full(ToolResponseContent) closes the span"
210214

211215
async def test_usage_captured_after_yield(self):
212216
from langchain_core.messages import AIMessage

0 commit comments

Comments
 (0)