From 5f0d6b22f4807dfceee9aa20ed9fbe2fb62b0044 Mon Sep 17 00:00:00 2001
From: "congxiao.wxx" <congxiao.wxx@alibaba-inc.com>
Date: Mon, 1 Jun 2026 20:15:12 +0800
Subject: [PATCH] Return reasoning content from model output

Protocol conversion should use the returned reasoning payload as the source of truth. MODEL_PARAMETER_RULES can still control model-side thinking, but OpenAI and AG-UI responses should not hide non-empty reasoning_content when the env flag says thinking is false.

Constraint: User requested removal of protocol-level thinking_enabled = is_thinking_enabled_from_env() gating.

Rejected: Keep env-based response suppression | runtime parameters can drift from the model output and hide returned reasoning.

Confidence: high

Scope-risk: narrow

Directive: Do not reintroduce protocol-level reasoning env gates; only emit reasoning when returned reasoning_content is non-empty.

Tested: uv run --python 3.11 --dev --extra server pytest -q tests/unittests/server/test_openai_protocol.py tests/unittests/server/test_agui_protocol.py tests/unittests/server/test_reasoning.py

Tested: uv run --python 3.11 --dev --extra server pytest -q tests/unittests/server

Tested: uv run --python 3.11 --dev --extra server ruff check agentrun/server/openai_protocol.py agentrun/server/agui_protocol.py tests/unittests/server/test_openai_protocol.py tests/unittests/server/test_agui_protocol.py

Tested: git diff --check

Change-Id: I638efa7ca19bf8ed9417fb1922d43205d4d52b65
Not-tested: Remote GitHub CI result pending after push.
---
 agentrun/server/agui_protocol.py              | 51 +++++++-----------
 agentrun/server/openai_protocol.py            | 48 +++++++----------
 tests/unittests/server/test_agui_protocol.py  | 29 +++++++----
 .../unittests/server/test_openai_protocol.py  | 52 +++++++++++++++----
 4 files changed, 98 insertions(+), 82 deletions(-)

diff --git a/agentrun/server/agui_protocol.py b/agentrun/server/agui_protocol.py
index 047cce4..5e8ccb4 100644
--- a/agentrun/server/agui_protocol.py
+++ b/agentrun/server/agui_protocol.py
@@ -31,10 +31,7 @@
 import pydash
 
 from ..utils.helper import merge, MergeOptions
-from ..utils.reasoning import (
-    get_reasoning_content,
-    is_thinking_enabled_from_env,
-)
+from ..utils.reasoning import get_reasoning_content
 from .model import (
     AgentEvent,
     AgentRequest,
@@ -466,8 +463,6 @@ def _process_event_with_boundaries(
             ToolCallStartEvent,
         )
 
-        thinking_enabled = is_thinking_enabled_from_env()
-
         # RAW 事件直接透传
         if event.event == EventType.RAW:
             raw_data = event.data.get("raw", "")
@@ -478,34 +473,31 @@ def _process_event_with_boundaries(
             return
 
         if event.event == EventType.REASONING:
-            if thinking_enabled:
-                reasoning_content = (
-                    event.data.get("delta")
-                    or get_reasoning_content(event.data)
-                    or ""
-                )
-                if reasoning_content:
-                    for sse_data in state.end_text_if_open(self._encoder):
-                        yield sse_data
-                    for sse_data in state.end_all_tools(self._encoder):
-                        yield sse_data
-                    for sse_data in state.ensure_reasoning_started():
-                        yield sse_data
-                    yield _encode_reasoning_event(
-                        "REASONING_MESSAGE_CONTENT",
-                        messageId=state.reasoning.message_id,
-                        delta=reasoning_content,
-                    )
+            reasoning_content = (
+                event.data.get("delta")
+                or get_reasoning_content(event.data)
+                or ""
+            )
+            if reasoning_content:
+                for sse_data in state.end_text_if_open(self._encoder):
+                    yield sse_data
+                for sse_data in state.end_all_tools(self._encoder):
+                    yield sse_data
+                for sse_data in state.ensure_reasoning_started():
+                    yield sse_data
+                yield _encode_reasoning_event(
+                    "REASONING_MESSAGE_CONTENT",
+                    messageId=state.reasoning.message_id,
+                    delta=reasoning_content,
+                )
             return
 
         # TEXT 事件：在首个 TEXT 前注入 TEXT_MESSAGE_START
         # AG-UI 协议要求：发送 TEXT_MESSAGE_START 前必须先结束所有未结束的 TOOL_CALL
         if event.event == EventType.TEXT:
-            addition = self._strip_reasoning_from_addition(
-                event.addition, thinking_enabled
-            )
+            addition = self._strip_reasoning_from_addition(event.addition)
             addition_reasoning = get_reasoning_content(event.addition or {})
-            if thinking_enabled and addition_reasoning:
+            if addition_reasoning:
                 for sse_data in state.ensure_reasoning_started():
                     yield sse_data
                 yield _encode_reasoning_event(
@@ -874,7 +866,6 @@ def _apply_addition(
     def _strip_reasoning_from_addition(
         self,
         addition: Optional[Dict[str, Any]],
-        thinking_enabled: bool,
     ) -> Optional[Dict[str, Any]]:
         if not addition:
             return addition
@@ -890,8 +881,6 @@ def _strip_reasoning_from_addition(
             else:
                 stripped.pop("additional_kwargs", None)
 
-        if not thinking_enabled:
-            return stripped
         return stripped or None
 
     async def _error_stream(self, message: str) -> AsyncIterator[str]:
diff --git a/agentrun/server/openai_protocol.py b/agentrun/server/openai_protocol.py
index 99977c7..6ef5da6 100644
--- a/agentrun/server/openai_protocol.py
+++ b/agentrun/server/openai_protocol.py
@@ -15,10 +15,7 @@
 from fastapi.responses import JSONResponse, StreamingResponse
 import pydash
 
-from ..utils.reasoning import (
-    get_reasoning_content,
-    is_thinking_enabled_from_env,
-)
+from ..utils.reasoning import get_reasoning_content
 from ..utils.helper import merge, MergeOptions
 from .model import (
     AgentEvent,
@@ -304,7 +301,6 @@ async def _format_stream(
         # 状态追踪
         sent_role = False
         has_text = False
-        thinking_enabled = is_thinking_enabled_from_env()
         tool_call_index = -1  # 从 -1 开始，第一个工具调用时变为 0
         # 工具调用状态：{tool_id: {"started": bool, "index": int}}
         tool_call_states: Dict[str, Dict[str, Any]] = {}
@@ -341,19 +337,18 @@ async def _format_stream(
                             event.addition_merge_options,
                         )
 
-                    self._apply_reasoning_gate(delta, thinking_enabled)
+                    self._promote_reasoning_content(delta)
                     yield self._build_chunk(context, delta)
                 continue
 
             if event.event == EventType.REASONING:
-                if thinking_enabled:
-                    reasoning_content = event.data.get("delta", "")
-                    if reasoning_content:
-                        has_text = True
-                        yield self._build_chunk(
-                            context,
-                            {"reasoning_content": reasoning_content},
-                        )
+                reasoning_content = event.data.get("delta", "")
+                if reasoning_content:
+                    has_text = True
+                    yield self._build_chunk(
+                        context,
+                        {"reasoning_content": reasoning_content},
+                    )
                 continue
 
             # TOOL_CALL_CHUNK 事件
@@ -401,7 +396,7 @@ async def _format_stream(
                             event.addition_merge_options,
                         )
 
-                    self._apply_reasoning_gate(delta, thinking_enabled)
+                    self._promote_reasoning_content(delta)
                     yield self._build_chunk(context, delta)
                 continue
 
@@ -477,7 +472,6 @@ def _format_non_stream(
         """
         content_parts: List[str] = []
         reasoning_parts: List[str] = []
-        thinking_enabled = is_thinking_enabled_from_env()
         # 工具调用状态：{tool_id: {id, name, arguments}}
         tool_call_map: Dict[str, Dict[str, Any]] = {}
         has_tool_calls = False
@@ -486,12 +480,12 @@ def _format_non_stream(
             if event.event == EventType.TEXT:
                 content_parts.append(event.data.get("delta", ""))
                 reasoning_content = get_reasoning_content(event.addition or {})
-                if thinking_enabled and reasoning_content:
+                if reasoning_content:
                     reasoning_parts.append(reasoning_content)
 
             elif event.event == EventType.REASONING:
                 reasoning_content = event.data.get("delta", "")
-                if thinking_enabled and reasoning_content:
+                if reasoning_content:
                     reasoning_parts.append(reasoning_content)
 
             elif event.event == EventType.TOOL_CALL_CHUNK:
@@ -564,18 +558,14 @@ def _apply_addition(
 
         return merge(delta, addition, **(merge_options or {}))
 
-    def _apply_reasoning_gate(
-        self,
-        payload: Dict[str, Any],
-        thinking_enabled: bool,
-    ) -> None:
-        if thinking_enabled:
-            reasoning_content = get_reasoning_content(payload)
-            if reasoning_content is not None:
-                payload["reasoning_content"] = reasoning_content
-            return
-
+    def _promote_reasoning_content(self, payload: Dict[str, Any]) -> None:
+        reasoning_content = get_reasoning_content(payload)
         payload.pop("reasoning_content", None)
         additional_kwargs = payload.get("additional_kwargs")
         if isinstance(additional_kwargs, dict):
             additional_kwargs.pop("reasoning_content", None)
+            if not additional_kwargs:
+                payload.pop("additional_kwargs", None)
+
+        if reasoning_content:
+            payload["reasoning_content"] = reasoning_content
diff --git a/tests/unittests/server/test_agui_protocol.py b/tests/unittests/server/test_agui_protocol.py
index eefc20b..0896a08 100644
--- a/tests/unittests/server/test_agui_protocol.py
+++ b/tests/unittests/server/test_agui_protocol.py
@@ -1196,7 +1196,7 @@ async def invoke_agent(request: AgentRequest):
 
 
 class TestAGUIReasoningContent:
-    """测试 AG-UI reasoning 事件输出开关"""
+    """测试 AG-UI reasoning 事件输出"""
 
     def get_client(self, invoke_agent):
         server = AgentRunServer(invoke_agent=invoke_agent)
@@ -1228,7 +1228,7 @@ async def invoke_agent(request: AgentRequest):
         assert reasoning_event["delta"] == "thinking"
         assert "TEXT_MESSAGE_CONTENT" in types
 
-    def test_stream_suppresses_reasoning_when_thinking_disabled(
+    def test_stream_includes_reasoning_when_thinking_disabled(
         self, monkeypatch
     ):
         monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}')
@@ -1246,9 +1246,14 @@ async def invoke_agent(request: AgentRequest):
         )
 
         events = _agui_sse_events(response)
-        assert "REASONING_MESSAGE_CONTENT" not in [
-            event["type"] for event in events
-        ]
+        types = [event["type"] for event in events]
+        reasoning_event = next(
+            event
+            for event in events
+            if event["type"] == "REASONING_MESSAGE_CONTENT"
+        )
+        assert "REASONING_START" in types
+        assert reasoning_event["delta"] == "thinking"
         text_event = next(
             event for event in events if event["type"] == "TEXT_MESSAGE_CONTENT"
         )
@@ -1257,7 +1262,7 @@ async def invoke_agent(request: AgentRequest):
     def test_stream_promotes_chunk_additional_kwargs_reasoning(
         self, monkeypatch
     ):
-        monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": true}')
+        monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}')
 
         async def invoke_agent(request: AgentRequest):
             yield SimpleNamespace(
@@ -1282,9 +1287,7 @@ async def invoke_agent(request: AgentRequest):
         assert reasoning_event["delta"] == "thinking"
         assert text_event["delta"] == "answer"
 
-    def test_text_addition_reasoning_is_emitted_before_text(
-        self, monkeypatch
-    ):
+    def test_text_addition_reasoning_is_emitted_before_text(self, monkeypatch):
         monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": true}')
 
         async def invoke_agent(request: AgentRequest):
@@ -1314,7 +1317,7 @@ async def invoke_agent(request: AgentRequest):
         assert text_event["delta"] == "answer"
         assert "additional_kwargs" not in text_event
 
-    def test_text_addition_reasoning_is_stripped_when_thinking_disabled(
+    def test_text_addition_reasoning_is_emitted_when_thinking_disabled(
         self, monkeypatch
     ):
         monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}')
@@ -1335,7 +1338,11 @@ async def invoke_agent(request: AgentRequest):
 
         events = _agui_sse_events(response)
         types = [event["type"] for event in events]
-        assert all(not event_type.startswith("REASONING") for event_type in types)
+        assert types.index("REASONING_MESSAGE_CONTENT") < types.index(
+            "TEXT_MESSAGE_START"
+        )
+        assert "REASONING_MESSAGE_END" in types
+        assert "REASONING_END" in types
         text_event = next(
             event for event in events if event["type"] == "TEXT_MESSAGE_CONTENT"
         )
diff --git a/tests/unittests/server/test_openai_protocol.py b/tests/unittests/server/test_openai_protocol.py
index 1b15052..38f602d 100644
--- a/tests/unittests/server/test_openai_protocol.py
+++ b/tests/unittests/server/test_openai_protocol.py
@@ -1017,7 +1017,7 @@ def invoke_agent(request: AgentRequest):
 
 
 class TestOpenAIReasoningContent:
-    """测试 OpenAI reasoning_content 输出开关"""
+    """测试 OpenAI reasoning_content 输出"""
 
     def get_client(self, invoke_agent):
         server = AgentRunServer(invoke_agent=invoke_agent)
@@ -1042,10 +1042,12 @@ async def invoke_agent(request: AgentRequest):
         )
 
         events = _openai_sse_events(response)
-        assert events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking"
+        assert (
+            events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking"
+        )
         assert events[1]["choices"][0]["delta"]["content"] == "answer"
 
-    def test_stream_suppresses_reasoning_when_thinking_disabled(
+    def test_stream_includes_reasoning_when_thinking_disabled(
         self, monkeypatch
     ):
         monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}')
@@ -1066,11 +1068,10 @@ async def invoke_agent(request: AgentRequest):
         )
 
         events = _openai_sse_events(response)
-        assert all(
-            "reasoning_content" not in event["choices"][0]["delta"]
-            for event in events
+        assert (
+            events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking"
         )
-        assert events[0]["choices"][0]["delta"]["content"] == "answer"
+        assert events[1]["choices"][0]["delta"]["content"] == "answer"
 
     def test_non_stream_includes_reasoning_when_thinking_enabled(
         self, monkeypatch
@@ -1098,7 +1099,7 @@ def invoke_agent(request: AgentRequest):
         assert message["content"] == "answer"
         assert message["reasoning_content"] == "thinking"
 
-    def test_non_stream_suppresses_reasoning_when_thinking_disabled(
+    def test_non_stream_includes_reasoning_when_thinking_disabled(
         self, monkeypatch
     ):
         monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}')
@@ -1122,12 +1123,12 @@ def invoke_agent(request: AgentRequest):
 
         message = response.json()["choices"][0]["message"]
         assert message["content"] == "answer"
-        assert "reasoning_content" not in message
+        assert message["reasoning_content"] == "thinking"
 
     def test_stream_promotes_chunk_additional_kwargs_reasoning(
         self, monkeypatch
     ):
-        monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": true}')
+        monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}')
 
         async def invoke_agent(request: AgentRequest):
             yield SimpleNamespace(
@@ -1144,9 +1145,38 @@ async def invoke_agent(request: AgentRequest):
         )
 
         events = _openai_sse_events(response)
-        assert events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking"
+        assert (
+            events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking"
+        )
         assert events[1]["choices"][0]["delta"]["content"] == "answer"
 
+    def test_stream_promotes_text_addition_reasoning_when_thinking_disabled(
+        self, monkeypatch
+    ):
+        monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}')
+
+        async def invoke_agent(request: AgentRequest):
+            yield AgentEvent(
+                event=EventType.TEXT,
+                data={"delta": "answer"},
+                addition={
+                    "additional_kwargs": {"reasoning_content": "thinking"}
+                },
+            )
+
+        response = self.get_client(invoke_agent).post(
+            "/openai/v1/chat/completions",
+            json={
+                "messages": [{"role": "user", "content": "Hi"}],
+                "stream": True,
+            },
+        )
+
+        delta = _openai_sse_events(response)[0]["choices"][0]["delta"]
+        assert delta["content"] == "answer"
+        assert delta["reasoning_content"] == "thinking"
+        assert "additional_kwargs" not in delta
+
     def test_parses_request_message_reasoning_content(self):
         captured_request = {}