From 5f0d6b22f4807dfceee9aa20ed9fbe2fb62b0044 Mon Sep 17 00:00:00 2001 From: "congxiao.wxx" Date: Mon, 1 Jun 2026 20:15:12 +0800 Subject: [PATCH] Return reasoning content from model output Protocol conversion should use the returned reasoning payload as the source of truth. MODEL_PARAMETER_RULES can still control model-side thinking, but OpenAI and AG-UI responses should not hide non-empty reasoning_content when the env flag says thinking is false. Constraint: User requested removal of protocol-level thinking_enabled = is_thinking_enabled_from_env() gating. Rejected: Keep env-based response suppression | runtime parameters can drift from the model output and hide returned reasoning. Confidence: high Scope-risk: narrow Directive: Do not reintroduce protocol-level reasoning env gates; only emit reasoning when returned reasoning_content is non-empty. Tested: uv run --python 3.11 --dev --extra server pytest -q tests/unittests/server/test_openai_protocol.py tests/unittests/server/test_agui_protocol.py tests/unittests/server/test_reasoning.py Tested: uv run --python 3.11 --dev --extra server pytest -q tests/unittests/server Tested: uv run --python 3.11 --dev --extra server ruff check agentrun/server/openai_protocol.py agentrun/server/agui_protocol.py tests/unittests/server/test_openai_protocol.py tests/unittests/server/test_agui_protocol.py Tested: git diff --check Change-Id: I638efa7ca19bf8ed9417fb1922d43205d4d52b65 Not-tested: Remote GitHub CI result pending after push. --- agentrun/server/agui_protocol.py | 51 +++++++----------- agentrun/server/openai_protocol.py | 48 +++++++---------- tests/unittests/server/test_agui_protocol.py | 29 +++++++---- .../unittests/server/test_openai_protocol.py | 52 +++++++++++++++---- 4 files changed, 98 insertions(+), 82 deletions(-) diff --git a/agentrun/server/agui_protocol.py b/agentrun/server/agui_protocol.py index 047cce4..5e8ccb4 100644 --- a/agentrun/server/agui_protocol.py +++ b/agentrun/server/agui_protocol.py @@ -31,10 +31,7 @@ import pydash from ..utils.helper import merge, MergeOptions -from ..utils.reasoning import ( - get_reasoning_content, - is_thinking_enabled_from_env, -) +from ..utils.reasoning import get_reasoning_content from .model import ( AgentEvent, AgentRequest, @@ -466,8 +463,6 @@ def _process_event_with_boundaries( ToolCallStartEvent, ) - thinking_enabled = is_thinking_enabled_from_env() - # RAW 事件直接透传 if event.event == EventType.RAW: raw_data = event.data.get("raw", "") @@ -478,34 +473,31 @@ def _process_event_with_boundaries( return if event.event == EventType.REASONING: - if thinking_enabled: - reasoning_content = ( - event.data.get("delta") - or get_reasoning_content(event.data) - or "" - ) - if reasoning_content: - for sse_data in state.end_text_if_open(self._encoder): - yield sse_data - for sse_data in state.end_all_tools(self._encoder): - yield sse_data - for sse_data in state.ensure_reasoning_started(): - yield sse_data - yield _encode_reasoning_event( - "REASONING_MESSAGE_CONTENT", - messageId=state.reasoning.message_id, - delta=reasoning_content, - ) + reasoning_content = ( + event.data.get("delta") + or get_reasoning_content(event.data) + or "" + ) + if reasoning_content: + for sse_data in state.end_text_if_open(self._encoder): + yield sse_data + for sse_data in state.end_all_tools(self._encoder): + yield sse_data + for sse_data in state.ensure_reasoning_started(): + yield sse_data + yield _encode_reasoning_event( + "REASONING_MESSAGE_CONTENT", + messageId=state.reasoning.message_id, + delta=reasoning_content, + ) return # TEXT 事件:在首个 TEXT 前注入 TEXT_MESSAGE_START # AG-UI 协议要求:发送 TEXT_MESSAGE_START 前必须先结束所有未结束的 TOOL_CALL if event.event == EventType.TEXT: - addition = self._strip_reasoning_from_addition( - event.addition, thinking_enabled - ) + addition = self._strip_reasoning_from_addition(event.addition) addition_reasoning = get_reasoning_content(event.addition or {}) - if thinking_enabled and addition_reasoning: + if addition_reasoning: for sse_data in state.ensure_reasoning_started(): yield sse_data yield _encode_reasoning_event( @@ -874,7 +866,6 @@ def _apply_addition( def _strip_reasoning_from_addition( self, addition: Optional[Dict[str, Any]], - thinking_enabled: bool, ) -> Optional[Dict[str, Any]]: if not addition: return addition @@ -890,8 +881,6 @@ def _strip_reasoning_from_addition( else: stripped.pop("additional_kwargs", None) - if not thinking_enabled: - return stripped return stripped or None async def _error_stream(self, message: str) -> AsyncIterator[str]: diff --git a/agentrun/server/openai_protocol.py b/agentrun/server/openai_protocol.py index 99977c7..6ef5da6 100644 --- a/agentrun/server/openai_protocol.py +++ b/agentrun/server/openai_protocol.py @@ -15,10 +15,7 @@ from fastapi.responses import JSONResponse, StreamingResponse import pydash -from ..utils.reasoning import ( - get_reasoning_content, - is_thinking_enabled_from_env, -) +from ..utils.reasoning import get_reasoning_content from ..utils.helper import merge, MergeOptions from .model import ( AgentEvent, @@ -304,7 +301,6 @@ async def _format_stream( # 状态追踪 sent_role = False has_text = False - thinking_enabled = is_thinking_enabled_from_env() tool_call_index = -1 # 从 -1 开始,第一个工具调用时变为 0 # 工具调用状态:{tool_id: {"started": bool, "index": int}} tool_call_states: Dict[str, Dict[str, Any]] = {} @@ -341,19 +337,18 @@ async def _format_stream( event.addition_merge_options, ) - self._apply_reasoning_gate(delta, thinking_enabled) + self._promote_reasoning_content(delta) yield self._build_chunk(context, delta) continue if event.event == EventType.REASONING: - if thinking_enabled: - reasoning_content = event.data.get("delta", "") - if reasoning_content: - has_text = True - yield self._build_chunk( - context, - {"reasoning_content": reasoning_content}, - ) + reasoning_content = event.data.get("delta", "") + if reasoning_content: + has_text = True + yield self._build_chunk( + context, + {"reasoning_content": reasoning_content}, + ) continue # TOOL_CALL_CHUNK 事件 @@ -401,7 +396,7 @@ async def _format_stream( event.addition_merge_options, ) - self._apply_reasoning_gate(delta, thinking_enabled) + self._promote_reasoning_content(delta) yield self._build_chunk(context, delta) continue @@ -477,7 +472,6 @@ def _format_non_stream( """ content_parts: List[str] = [] reasoning_parts: List[str] = [] - thinking_enabled = is_thinking_enabled_from_env() # 工具调用状态:{tool_id: {id, name, arguments}} tool_call_map: Dict[str, Dict[str, Any]] = {} has_tool_calls = False @@ -486,12 +480,12 @@ def _format_non_stream( if event.event == EventType.TEXT: content_parts.append(event.data.get("delta", "")) reasoning_content = get_reasoning_content(event.addition or {}) - if thinking_enabled and reasoning_content: + if reasoning_content: reasoning_parts.append(reasoning_content) elif event.event == EventType.REASONING: reasoning_content = event.data.get("delta", "") - if thinking_enabled and reasoning_content: + if reasoning_content: reasoning_parts.append(reasoning_content) elif event.event == EventType.TOOL_CALL_CHUNK: @@ -564,18 +558,14 @@ def _apply_addition( return merge(delta, addition, **(merge_options or {})) - def _apply_reasoning_gate( - self, - payload: Dict[str, Any], - thinking_enabled: bool, - ) -> None: - if thinking_enabled: - reasoning_content = get_reasoning_content(payload) - if reasoning_content is not None: - payload["reasoning_content"] = reasoning_content - return - + def _promote_reasoning_content(self, payload: Dict[str, Any]) -> None: + reasoning_content = get_reasoning_content(payload) payload.pop("reasoning_content", None) additional_kwargs = payload.get("additional_kwargs") if isinstance(additional_kwargs, dict): additional_kwargs.pop("reasoning_content", None) + if not additional_kwargs: + payload.pop("additional_kwargs", None) + + if reasoning_content: + payload["reasoning_content"] = reasoning_content diff --git a/tests/unittests/server/test_agui_protocol.py b/tests/unittests/server/test_agui_protocol.py index eefc20b..0896a08 100644 --- a/tests/unittests/server/test_agui_protocol.py +++ b/tests/unittests/server/test_agui_protocol.py @@ -1196,7 +1196,7 @@ async def invoke_agent(request: AgentRequest): class TestAGUIReasoningContent: - """测试 AG-UI reasoning 事件输出开关""" + """测试 AG-UI reasoning 事件输出""" def get_client(self, invoke_agent): server = AgentRunServer(invoke_agent=invoke_agent) @@ -1228,7 +1228,7 @@ async def invoke_agent(request: AgentRequest): assert reasoning_event["delta"] == "thinking" assert "TEXT_MESSAGE_CONTENT" in types - def test_stream_suppresses_reasoning_when_thinking_disabled( + def test_stream_includes_reasoning_when_thinking_disabled( self, monkeypatch ): monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}') @@ -1246,9 +1246,14 @@ async def invoke_agent(request: AgentRequest): ) events = _agui_sse_events(response) - assert "REASONING_MESSAGE_CONTENT" not in [ - event["type"] for event in events - ] + types = [event["type"] for event in events] + reasoning_event = next( + event + for event in events + if event["type"] == "REASONING_MESSAGE_CONTENT" + ) + assert "REASONING_START" in types + assert reasoning_event["delta"] == "thinking" text_event = next( event for event in events if event["type"] == "TEXT_MESSAGE_CONTENT" ) @@ -1257,7 +1262,7 @@ async def invoke_agent(request: AgentRequest): def test_stream_promotes_chunk_additional_kwargs_reasoning( self, monkeypatch ): - monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": true}') + monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}') async def invoke_agent(request: AgentRequest): yield SimpleNamespace( @@ -1282,9 +1287,7 @@ async def invoke_agent(request: AgentRequest): assert reasoning_event["delta"] == "thinking" assert text_event["delta"] == "answer" - def test_text_addition_reasoning_is_emitted_before_text( - self, monkeypatch - ): + def test_text_addition_reasoning_is_emitted_before_text(self, monkeypatch): monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": true}') async def invoke_agent(request: AgentRequest): @@ -1314,7 +1317,7 @@ async def invoke_agent(request: AgentRequest): assert text_event["delta"] == "answer" assert "additional_kwargs" not in text_event - def test_text_addition_reasoning_is_stripped_when_thinking_disabled( + def test_text_addition_reasoning_is_emitted_when_thinking_disabled( self, monkeypatch ): monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}') @@ -1335,7 +1338,11 @@ async def invoke_agent(request: AgentRequest): events = _agui_sse_events(response) types = [event["type"] for event in events] - assert all(not event_type.startswith("REASONING") for event_type in types) + assert types.index("REASONING_MESSAGE_CONTENT") < types.index( + "TEXT_MESSAGE_START" + ) + assert "REASONING_MESSAGE_END" in types + assert "REASONING_END" in types text_event = next( event for event in events if event["type"] == "TEXT_MESSAGE_CONTENT" ) diff --git a/tests/unittests/server/test_openai_protocol.py b/tests/unittests/server/test_openai_protocol.py index 1b15052..38f602d 100644 --- a/tests/unittests/server/test_openai_protocol.py +++ b/tests/unittests/server/test_openai_protocol.py @@ -1017,7 +1017,7 @@ def invoke_agent(request: AgentRequest): class TestOpenAIReasoningContent: - """测试 OpenAI reasoning_content 输出开关""" + """测试 OpenAI reasoning_content 输出""" def get_client(self, invoke_agent): server = AgentRunServer(invoke_agent=invoke_agent) @@ -1042,10 +1042,12 @@ async def invoke_agent(request: AgentRequest): ) events = _openai_sse_events(response) - assert events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking" + assert ( + events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking" + ) assert events[1]["choices"][0]["delta"]["content"] == "answer" - def test_stream_suppresses_reasoning_when_thinking_disabled( + def test_stream_includes_reasoning_when_thinking_disabled( self, monkeypatch ): monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}') @@ -1066,11 +1068,10 @@ async def invoke_agent(request: AgentRequest): ) events = _openai_sse_events(response) - assert all( - "reasoning_content" not in event["choices"][0]["delta"] - for event in events + assert ( + events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking" ) - assert events[0]["choices"][0]["delta"]["content"] == "answer" + assert events[1]["choices"][0]["delta"]["content"] == "answer" def test_non_stream_includes_reasoning_when_thinking_enabled( self, monkeypatch @@ -1098,7 +1099,7 @@ def invoke_agent(request: AgentRequest): assert message["content"] == "answer" assert message["reasoning_content"] == "thinking" - def test_non_stream_suppresses_reasoning_when_thinking_disabled( + def test_non_stream_includes_reasoning_when_thinking_disabled( self, monkeypatch ): monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}') @@ -1122,12 +1123,12 @@ def invoke_agent(request: AgentRequest): message = response.json()["choices"][0]["message"] assert message["content"] == "answer" - assert "reasoning_content" not in message + assert message["reasoning_content"] == "thinking" def test_stream_promotes_chunk_additional_kwargs_reasoning( self, monkeypatch ): - monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": true}') + monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}') async def invoke_agent(request: AgentRequest): yield SimpleNamespace( @@ -1144,9 +1145,38 @@ async def invoke_agent(request: AgentRequest): ) events = _openai_sse_events(response) - assert events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking" + assert ( + events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking" + ) assert events[1]["choices"][0]["delta"]["content"] == "answer" + def test_stream_promotes_text_addition_reasoning_when_thinking_disabled( + self, monkeypatch + ): + monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}') + + async def invoke_agent(request: AgentRequest): + yield AgentEvent( + event=EventType.TEXT, + data={"delta": "answer"}, + addition={ + "additional_kwargs": {"reasoning_content": "thinking"} + }, + ) + + response = self.get_client(invoke_agent).post( + "/openai/v1/chat/completions", + json={ + "messages": [{"role": "user", "content": "Hi"}], + "stream": True, + }, + ) + + delta = _openai_sse_events(response)[0]["choices"][0]["delta"] + assert delta["content"] == "answer" + assert delta["reasoning_content"] == "thinking" + assert "additional_kwargs" not in delta + def test_parses_request_message_reasoning_content(self): captured_request = {}