Cover reasoning protocol behavior with UT and E2E

congxiao-wxx · congxiao-wxx · commit 3755e4ae084e · 2026-05-29T00:29:56.000+08:00
Add deterministic protocol-level tests for MODEL_PARAMETER_RULES.thinking so reasoning_content exposure is guarded in OpenAI and AG-UI outputs.

Constraint: Keep changes limited to test coverage and current e2e expectations.

Rejected: Rely only on the smoke script | it does not make the regression part of the test suite.

Confidence: high

Scope-risk: narrow

Directive: Keep reasoning_content coverage in both unit and e2e layers when changing protocol output.

Tested: uv run --extra server ruff check tests/unittests/server/test_reasoning.py tests/unittests/server/test_openai_protocol.py tests/unittests/server/test_agui_protocol.py tests/e2e/test_reasoning_protocol.py tests/e2e/integration/langchain/test_agent_invoke_methods.py

Tested: uv run --extra server pytest tests/unittests/server -q

Tested: uv run --extra server --extra langchain pytest tests/e2e/test_reasoning_protocol.py tests/e2e/integration/langchain/test_agent_invoke_methods.py -q

Tested: uv run --extra server --extra langchain pytest tests/unittests/integration/langchain tests/unittests/conversation_service/test_langchain_adapter.py -q

Tested: MODEL_PARAMETER_RULES='{"thinking": true}' AGENTRUN_SMOKE_INSECURE_SSL=true uv run --extra server python scripts/smoke_reasoning_protocol.py --env-file /Users/congxiao/workspace/agent-quickstart-langchain/.env --model qwen3-235b-a22b-thinking-2507 --protocol both --response-mode stream --expect-reasoning --expect-content

Tested: MODEL_PARAMETER_RULES='{"thinking": false}' AGENTRUN_SMOKE_INSECURE_SSL=true uv run --extra server python scripts/smoke_reasoning_protocol.py --env-file /Users/congxiao/workspace/agent-quickstart-langchain/.env --model qwen3-235b-a22b-thinking-2507 --protocol both --response-mode stream --expect-no-reasoning --expect-content
Signed-off-by: congxiao.wxx &lt;congxiao.wxx@alibaba-inc.com&gt;

Change-Id: Id273ba4afeb70c63aedcdba01bc31d46f2db3bd1
diff --git a/tests/e2e/integration/langchain/test_agent_invoke_methods.py b/tests/e2e/integration/langchain/test_agent_invoke_methods.py
@@ -430,7 +430,39 @@ def _normalize_agui_event(event: Dict[str, Any]) -> Dict[str, Any]:
             },
             {"type": "TEXT_MESSAGE_END", "hasMessageId": True},
             {"type": "RUN_FINISHED", "hasThreadId": True, "hasRunId": True},
-        ]
+        ],
+        [
+            {"type": "RUN_STARTED", "hasThreadId": True, "hasRunId": True},
+            {
+                "type": "TOOL_CALL_START",
+                "toolCallName": "get_time",
+                "hasToolCallId": True,
+            },
+            {
+                "type": "TOOL_CALL_ARGS",
+                "delta": "{}",
+                "hasToolCallId": True,
+            },
+            {"type": "TOOL_CALL_END", "hasToolCallId": True},
+            {
+                "type": "TOOL_CALL_RESULT",
+                "role": "tool",
+                "hasToolCallId": True,
+                "hasMessageId": True,
+            },
+            {
+                "type": "TEXT_MESSAGE_START",
+                "role": "assistant",
+                "hasMessageId": True,
+            },
+            {
+                "type": "TEXT_MESSAGE_CONTENT",
+                "delta": "工具结果已收到: 2024-01-01 12:00:00",
+                "hasMessageId": True,
+            },
+            {"type": "TEXT_MESSAGE_END", "hasMessageId": True},
+            {"type": "RUN_FINISHED", "hasThreadId": True, "hasRunId": True},
+        ],
     ],
 }
 
@@ -562,6 +594,15 @@ def _normalize_openai_stream(
             }],
             "finish_reason": None,
         },
+        {
+            "object": "chat.completion.chunk",
+            "tool_calls": [{
+                "name": None,
+                "arguments": "{}",
+                "has_id": False,
+            }],
+            "finish_reason": None,
+        },
         {
             "object": "chat.completion.chunk",
             "delta_role": "assistant",
@@ -623,7 +664,7 @@ def _normalize_openai_nonstream(resp: Dict[str, Any]) -> Dict[str, Any]:
         "content": "工具结果已收到: 2024-01-01 12:00:00",
         "tool_calls": [{
             "name": "get_time",
-            "arguments": "",
+            "arguments": "{}",
             "has_id": True,
         }],
         "finish_reason": "tool_calls",
@@ -810,9 +851,7 @@ async def test_astream_events(
     async def test_convert_python_3_10(self):
         from langchain.messages import (
             AIMessage,
-            AIMessageChunk,
             HumanMessage,
-            SystemMessage,
         )
 
         events = [
diff --git a/tests/e2e/test_reasoning_protocol.py b/tests/e2e/test_reasoning_protocol.py
@@ -0,0 +1,153 @@
+"""E2E coverage for reasoning_content protocol output."""
+
+import json
+from types import SimpleNamespace
+from typing import Any, Dict, List
+
+import httpx
+import pytest
+
+from agentrun.server import AgentRequest, AgentRunServer
+
+
+def _parse_sse_events(content: str) -> List[Dict[str, Any]]:
+    events = []
+    for line in content.splitlines():
+        if not line.startswith("data: "):
+            continue
+        payload = line[6:]
+        if payload == "[DONE]":
+            continue
+        events.append(json.loads(payload))
+    return events
+
+
+@pytest.fixture
+def reasoning_app():
+    async def invoke_agent(request: AgentRequest):
+        yield SimpleNamespace(
+            content="",
+            additional_kwargs={"reasoning_content": "thinking"},
+        )
+        yield SimpleNamespace(content="answer", additional_kwargs={})
+
+    return AgentRunServer(invoke_agent=invoke_agent).as_fastapi_app()
+
+
+async def _post_json(app, path: str, payload: Dict[str, Any]) -> httpx.Response:
+    async with httpx.AsyncClient(
+        transport=httpx.ASGITransport(app=app),
+        base_url="http://test",
+    ) as client:
+        return await client.post(path, json=payload, timeout=60.0)
+
+
+def _set_thinking(monkeypatch, enabled: bool) -> None:
+    monkeypatch.setenv(
+        "MODEL_PARAMETER_RULES",
+        json.dumps({"thinking": enabled}),
+    )
+
+
+@pytest.mark.parametrize("thinking_enabled", [True, False])
+@pytest.mark.asyncio
+async def test_openai_stream_reasoning_content_gate(
+    reasoning_app,
+    monkeypatch,
+    thinking_enabled: bool,
+):
+    _set_thinking(monkeypatch, thinking_enabled)
+
+    response = await _post_json(
+        reasoning_app,
+        "/openai/v1/chat/completions",
+        {
+            "model": "mock-model",
+            "messages": [{"role": "user", "content": "Hi"}],
+            "stream": True,
+        },
+    )
+
+    assert response.status_code == 200
+    events = _parse_sse_events(response.text)
+    deltas = [
+        (event.get("choices") or [{}])[0].get("delta") or {}
+        for event in events
+    ]
+    reasoning = "".join(delta.get("reasoning_content", "") for delta in deltas)
+    content = "".join(delta.get("content", "") for delta in deltas)
+
+    assert content == "answer"
+    assert reasoning == ("thinking" if thinking_enabled else "")
+    assert all("additional_kwargs" not in delta for delta in deltas)
+
+
+@pytest.mark.parametrize("thinking_enabled", [True, False])
+@pytest.mark.asyncio
+async def test_openai_non_stream_reasoning_content_gate(
+    reasoning_app,
+    monkeypatch,
+    thinking_enabled: bool,
+):
+    _set_thinking(monkeypatch, thinking_enabled)
+
+    response = await _post_json(
+        reasoning_app,
+        "/openai/v1/chat/completions",
+        {
+            "model": "mock-model",
+            "messages": [{"role": "user", "content": "Hi"}],
+            "stream": False,
+        },
+    )
+
+    assert response.status_code == 200
+    message = response.json()["choices"][0]["message"]
+    assert message["content"] == "answer"
+    if thinking_enabled:
+        assert message["reasoning_content"] == "thinking"
+    else:
+        assert "reasoning_content" not in message
+
+
+@pytest.mark.parametrize("thinking_enabled", [True, False])
+@pytest.mark.asyncio
+async def test_agui_reasoning_events_gate(
+    reasoning_app,
+    monkeypatch,
+    thinking_enabled: bool,
+):
+    _set_thinking(monkeypatch, thinking_enabled)
+
+    response = await _post_json(
+        reasoning_app,
+        "/ag-ui/agent",
+        {"messages": [{"role": "user", "content": "Hi"}]},
+    )
+
+    assert response.status_code == 200
+    events = _parse_sse_events(response.text)
+    event_types = [event["type"] for event in events]
+    reasoning = "".join(
+        event.get("delta", "")
+        for event in events
+        if event["type"] == "REASONING_MESSAGE_CONTENT"
+    )
+    content = "".join(
+        event.get("delta", "")
+        for event in events
+        if event["type"] == "TEXT_MESSAGE_CONTENT"
+    )
+
+    assert content == "answer"
+    if thinking_enabled:
+        assert reasoning == "thinking"
+        assert event_types.index("REASONING_MESSAGE_CONTENT") < event_types.index(
+            "TEXT_MESSAGE_START"
+        )
+    else:
+        assert reasoning == ""
+        assert all(
+            not event_type.startswith("REASONING")
+            for event_type in event_types
+        )
diff --git a/tests/unittests/server/test_agui_protocol.py b/tests/unittests/server/test_agui_protocol.py
@@ -1281,3 +1281,63 @@ async def invoke_agent(request: AgentRequest):
         )
         assert reasoning_event["delta"] == "thinking"
         assert text_event["delta"] == "answer"
+
+    def test_text_addition_reasoning_is_emitted_before_text(
+        self, monkeypatch
+    ):
+        monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": true}')
+
+        async def invoke_agent(request: AgentRequest):
+            yield AgentEvent(
+                event=EventType.TEXT,
+                data={"delta": "answer"},
+                addition={
+                    "additional_kwargs": {"reasoning_content": "thinking"}
+                },
+            )
+
+        response = self.get_client(invoke_agent).post(
+            "/ag-ui/agent",
+            json={"messages": [{"role": "user", "content": "Hi"}]},
+        )
+
+        events = _agui_sse_events(response)
+        types = [event["type"] for event in events]
+        assert types.index("REASONING_MESSAGE_CONTENT") < types.index(
+            "TEXT_MESSAGE_START"
+        )
+        assert "REASONING_MESSAGE_END" in types
+        assert "REASONING_END" in types
+        text_event = next(
+            event for event in events if event["type"] == "TEXT_MESSAGE_CONTENT"
+        )
+        assert text_event["delta"] == "answer"
+        assert "additional_kwargs" not in text_event
+
+    def test_text_addition_reasoning_is_stripped_when_thinking_disabled(
+        self, monkeypatch
+    ):
+        monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}')
+
+        async def invoke_agent(request: AgentRequest):
+            yield AgentEvent(
+                event=EventType.TEXT,
+                data={"delta": "answer"},
+                addition={
+                    "additional_kwargs": {"reasoning_content": "thinking"}
+                },
+            )
+
+        response = self.get_client(invoke_agent).post(
+            "/ag-ui/agent",
+            json={"messages": [{"role": "user", "content": "Hi"}]},
+        )
+
+        events = _agui_sse_events(response)
+        types = [event["type"] for event in events]
+        assert all(not event_type.startswith("REASONING") for event_type in types)
+        text_event = next(
+            event for event in events if event["type"] == "TEXT_MESSAGE_CONTENT"
+        )
+        assert text_event["delta"] == "answer"
+        assert "additional_kwargs" not in text_event
diff --git a/tests/unittests/server/test_openai_protocol.py b/tests/unittests/server/test_openai_protocol.py
@@ -1098,6 +1098,32 @@ def invoke_agent(request: AgentRequest):
         assert message["content"] == "answer"
         assert message["reasoning_content"] == "thinking"
 
+    def test_non_stream_suppresses_reasoning_when_thinking_disabled(
+        self, monkeypatch
+    ):
+        monkeypatch.setenv("MODEL_PARAMETER_RULES", '{"thinking": false}')
+
+        def invoke_agent(request: AgentRequest):
+            return [
+                AgentEvent(
+                    event=EventType.REASONING,
+                    data={"delta": "thinking"},
+                ),
+                AgentEvent(event=EventType.TEXT, data={"delta": "answer"}),
+            ]
+
+        response = self.get_client(invoke_agent).post(
+            "/openai/v1/chat/completions",
+            json={
+                "messages": [{"role": "user", "content": "Hi"}],
+                "stream": False,
+            },
+        )
+
+        message = response.json()["choices"][0]["message"]
+        assert message["content"] == "answer"
+        assert "reasoning_content" not in message
+
     def test_stream_promotes_chunk_additional_kwargs_reasoning(
         self, monkeypatch
     ):
@@ -1120,3 +1146,25 @@ async def invoke_agent(request: AgentRequest):
         events = _openai_sse_events(response)
         assert events[0]["choices"][0]["delta"]["reasoning_content"] == "thinking"
         assert events[1]["choices"][0]["delta"]["content"] == "answer"
+
+    def test_parses_request_message_reasoning_content(self):
+        captured_request = {}
+
+        def invoke_agent(request: AgentRequest):
+            captured_request["messages"] = request.messages
+            return "Done"
+
+        response = self.get_client(invoke_agent).post(
+            "/openai/v1/chat/completions",
+            json={
+                "messages": [{
+                    "role": "assistant",
+                    "content": "answer",
+                    "reasoning_content": "thinking",
+                }],
+                "stream": False,
+            },
+        )
+
+        assert response.status_code == 200
+        assert captured_request["messages"][0].reasoning_content == "thinking"
diff --git a/tests/unittests/server/test_reasoning.py b/tests/unittests/server/test_reasoning.py
@@ -30,12 +30,29 @@ def test_model_parameter_rules_list_enables_thinking():
     assert get_thinking_value_from_env(env) is True
 
 
+def test_model_parameter_rules_nested_parameters_disables_thinking():
+    env = {
+        "MODEL_PARAMETER_RULES": (
+            '{"parameters": [{"name": "thinking", "default": "false"}]}'
+        )
+    }
+
+    assert is_thinking_enabled_from_env(env) is False
+    assert get_thinking_value_from_env(env) is False
+
+
 def test_model_parameter_rules_invalid_json_disables_thinking():
     env = {"MODEL_PARAMETER_RULES": "not json"}
 
     assert is_thinking_enabled_from_env(env) is False
 
 
+def test_get_reasoning_content_from_attribute():
+    chunk = SimpleNamespace(reasoning_content="thinking")
+
+    assert get_reasoning_content(chunk) == "thinking"
+
+
 def test_get_reasoning_content_from_additional_kwargs():
     chunk = {"additional_kwargs": {"reasoning_content": "thinking"}}