diff --git a/docs/mcp.md b/docs/mcp.md index 4990585dd5..3761aa82b5 100644 --- a/docs/mcp.md +++ b/docs/mcp.md @@ -277,6 +277,33 @@ server = MCPServerStreamableHttp( If your run context is a Pydantic model, dataclass, or custom class, read the tenant ID with attribute access instead. +### Reading response `_meta` from MCP tools + +The MCP specification allows a server's `CallToolResult` to include a `_meta` object alongside `content` and `structuredContent`. The SDK captures it on the resulting `ToolCallOutputItem` as `mcp_response_meta`, so applications can read auxiliary payloads (chart configs, trace IDs, frontend-only state) without forwarding them to the model. + +`mcp_response_meta` is a deep copy of the server-returned dict and is never injected into model context — only `content` and `structuredContent` are. Use it for application-side consumers such as streaming UIs. + +```python +from agents import Agent, Runner +from agents.items import ToolCallOutputItem +from agents.mcp import MCPServerStreamableHttp + +server = MCPServerStreamableHttp( + name="meta-emitting server", + params={"url": "http://localhost:8000/mcp"}, +) + +agent = Agent(name="assistant", mcp_servers=[server]) +result = await Runner.run(agent, input="show me a chart of Q1 sales") + +for item in result.new_items: + if isinstance(item, ToolCallOutputItem) and item.mcp_response_meta: + # e.g. {"tool_meta": {"type": "chart", "vis_config": {...}}} + frontend_payload = item.mcp_response_meta +``` + +When streaming, the same item is delivered as a `run_item_stream_event` with `name="tool_output"`; read `event.item.mcp_response_meta` on receipt. + ### MCP tool outputs: text and images When an MCP tool returns image content, the SDK maps it to image tool output entries automatically. Mixed text/image responses are forwarded as a list of output items, so agents can consume MCP image results the same way they consume image output from regular function tools. diff --git a/src/agents/items.py b/src/agents/items.py index c761cc221f..5116a1388d 100644 --- a/src/agents/items.py +++ b/src/agents/items.py @@ -403,6 +403,15 @@ class ToolCallOutputItem(RunItemBase[Any]): tool_origin: ToolOrigin | None = None """Optional metadata describing the source of a function-tool-backed item.""" + mcp_response_meta: dict[str, Any] | None = None + """Optional ``_meta`` returned by an MCP server in ``CallToolResult``. + + Populated when this item was produced by an MCP tool call whose server returned a + non-empty ``_meta`` field, per the MCP specification. The value is a deep copy and is + not forwarded to the model; it is intended for application-side consumers (e.g. + streaming UIs that need auxiliary payloads without polluting LLM context). + """ + @property def call_id(self) -> str | None: """Return the call identifier from the raw item, if available.""" diff --git a/src/agents/mcp/util.py b/src/agents/mcp/util.py index bf00cb2b79..25267e3bfb 100644 --- a/src/agents/mcp/util.py +++ b/src/agents/mcp/util.py @@ -663,6 +663,10 @@ async def invoke_mcp_tool( else: logger.debug(f"MCP tool {tool_name_for_display} returned {result}") + response_meta = getattr(result, "meta", None) + if isinstance(response_meta, dict) and response_meta and isinstance(context, ToolContext): + context._mcp_response_meta = copy.deepcopy(response_meta) + # If structured content is requested and available, use it exclusively tool_output: ToolOutput if server.use_structured_content and result.structuredContent: diff --git a/src/agents/run_internal/tool_execution.py b/src/agents/run_internal/tool_execution.py index 8f30e4a01f..bce8b5b601 100644 --- a/src/agents/run_internal/tool_execution.py +++ b/src/agents/run_internal/tool_execution.py @@ -1380,6 +1380,7 @@ def __init__( self.task_states: dict[asyncio.Task[Any], _FunctionToolTaskState] = {} self.teardown_cancelled_tasks: set[asyncio.Task[Any]] = set() self.results_by_tool_run: dict[int, Any] = {} + self.mcp_response_meta_by_tool_run: dict[int, dict[str, Any]] = {} self.pending_tasks: set[asyncio.Task[Any]] = set() self.propagating_failure: BaseException | None = None self.available_function_tools: list[FunctionTool] = [] @@ -1758,6 +1759,9 @@ async def _invoke_tool_and_run_post_invoke( context=tool_context, arguments=tool_call.arguments, ) + response_meta = getattr(tool_context, "_mcp_response_meta", None) + if response_meta: + self.mcp_response_meta_by_tool_run[id(task_state.tool_run)] = response_meta except asyncio.CancelledError as e: if outer_task in self.teardown_cancelled_tasks: raise @@ -1898,6 +1902,7 @@ def _build_function_tool_results(self) -> list[FunctionToolResult]: raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, result), agent=self.public_agent, tool_origin=get_function_tool_origin(tool_run.function_tool), + mcp_response_meta=self.mcp_response_meta_by_tool_run.get(id(tool_run)), ) else: # Skip tool output until nested interruptions are resolved. diff --git a/src/agents/run_state.py b/src/agents/run_state.py index c5bb8c9faf..4ad47d1e35 100644 --- a/src/agents/run_state.py +++ b/src/agents/run_state.py @@ -128,7 +128,7 @@ # 3. to_json() always emits CURRENT_SCHEMA_VERSION. # 4. Forward compatibility is intentionally fail-fast (older SDKs reject newer or unsupported # versions). -CURRENT_SCHEMA_VERSION = "1.10" +CURRENT_SCHEMA_VERSION = "1.11" # Keep this mapping in chronological order. Every schema bump must add a one-line summary here. SCHEMA_VERSION_SUMMARIES: dict[str, str] = { "1.0": "Initial RunState snapshot format for HITL pause/resume flows.", @@ -145,6 +145,7 @@ "1.8": "Persists SDK-generated prompt cache keys across resume flows.", "1.9": "Persists pending custom tool calls and tool origin metadata across resume flows.", "1.10": "Allows serialized RunState snapshots to disable max_turns with null.", + "1.11": "Persists MCP CallToolResult `_meta` on ToolCallOutputItem across resume flows.", } SUPPORTED_SCHEMA_VERSIONS = frozenset(SCHEMA_VERSION_SUMMARIES) @@ -908,6 +909,9 @@ def _serialize_item( tool_origin = getattr(item, "tool_origin", None) if isinstance(tool_origin, ToolOrigin): result["tool_origin"] = tool_origin.to_json_dict() + mcp_response_meta = getattr(item, "mcp_response_meta", None) + if isinstance(mcp_response_meta, dict) and mcp_response_meta: + result["mcp_response_meta"] = _ensure_json_compatible(mcp_response_meta) return result @@ -3192,12 +3196,19 @@ def _resolve_agent_info( raw_item_output = _deserialize_tool_call_output_raw_item(normalized_raw_item) if raw_item_output is None: continue + stored_mcp_response_meta = item_data.get("mcp_response_meta") + mcp_response_meta_value = ( + stored_mcp_response_meta + if isinstance(stored_mcp_response_meta, dict) and stored_mcp_response_meta + else None + ) result.append( ToolCallOutputItem( agent=agent, raw_item=raw_item_output, output=item_data.get("output", ""), tool_origin=_deserialize_tool_origin(item_data.get("tool_origin")), + mcp_response_meta=mcp_response_meta_value, ) ) diff --git a/src/agents/tool_context.py b/src/agents/tool_context.py index eaad0cc167..de61094a8d 100644 --- a/src/agents/tool_context.py +++ b/src/agents/tool_context.py @@ -103,6 +103,9 @@ def __init__( ) self.agent = agent self.run_config = run_config + # Populated by MCP tool invocation when the server returns ``CallToolResult._meta``. + # Surfaced on the resulting ToolCallOutputItem; not part of the public constructor. + self._mcp_response_meta: dict[str, Any] | None = None @property def qualified_tool_name(self) -> str: diff --git a/tests/mcp/helpers.py b/tests/mcp/helpers.py index ef820fad99..6559d2c854 100644 --- a/tests/mcp/helpers.py +++ b/tests/mcp/helpers.py @@ -90,6 +90,7 @@ def __init__( self.tool_filter = tool_filter self._server_name = server_name self._custom_content: list[Content] | None = None + self._response_meta: dict[str, Any] | None = None def add_tool(self, name: str, input_schema: dict[str, Any]): self.tools.append(MCPTool(name=name, inputSchema=input_schema)) @@ -123,10 +124,14 @@ async def call_tool( # Allow testing custom content scenarios if self._custom_content is not None: - return CallToolResult(content=self._custom_content) + return CallToolResult( + content=self._custom_content, + _meta=self._response_meta, + ) return CallToolResult( content=[TextContent(text=self.tool_results[-1], type="text")], + _meta=self._response_meta, ) async def list_prompts(self, run_context=None, agent=None) -> ListPromptsResult: diff --git a/tests/mcp/test_mcp_util.py b/tests/mcp/test_mcp_util.py index e41884375d..a0e1fc36be 100644 --- a/tests/mcp/test_mcp_util.py +++ b/tests/mcp/test_mcp_util.py @@ -477,6 +477,69 @@ async def test_invoke_mcp_tool(): # Just making sure it doesn't crash +@pytest.mark.asyncio +async def test_invoke_mcp_tool_captures_response_meta_on_tool_context(): + """Server-returned ``_meta`` is captured on ToolContext for the executor to surface.""" + server = FakeMCPServer() + server.add_tool("test_tool_1", {}) + server._response_meta = {"chart": {"type": "line"}, "trace_id": "abc-123"} + + ctx = ToolContext( + context=None, + tool_name="test_tool_1", + tool_call_id="test_call_response_meta", + tool_arguments="{}", + ) + tool = MCPTool(name="test_tool_1", inputSchema={}) + + await MCPUtil.invoke_mcp_tool(server, tool, ctx, "{}") + + assert ctx._mcp_response_meta == {"chart": {"type": "line"}, "trace_id": "abc-123"} + # Ensure the captured payload is a deep copy: mutating the server-side dict must + # not affect what the SDK exposed to the executor. + server._response_meta["mutated"] = True + assert "mutated" not in ctx._mcp_response_meta + + +@pytest.mark.asyncio +async def test_invoke_mcp_tool_leaves_response_meta_unset_when_server_omits_it(): + """When the server returns no ``_meta``, the ToolContext stash stays None.""" + server = FakeMCPServer() + server.add_tool("test_tool_1", {}) + + ctx = ToolContext( + context=None, + tool_name="test_tool_1", + tool_call_id="test_call_no_meta", + tool_arguments="{}", + ) + tool = MCPTool(name="test_tool_1", inputSchema={}) + + await MCPUtil.invoke_mcp_tool(server, tool, ctx, "{}") + + assert ctx._mcp_response_meta is None + + +@pytest.mark.asyncio +async def test_invoke_mcp_tool_ignores_empty_response_meta(): + """An empty ``_meta`` dict is treated the same as no meta (no stash, no propagation).""" + server = FakeMCPServer() + server.add_tool("test_tool_1", {}) + server._response_meta = {} + + ctx = ToolContext( + context=None, + tool_name="test_tool_1", + tool_call_id="test_call_empty_meta", + tool_arguments="{}", + ) + tool = MCPTool(name="test_tool_1", inputSchema={}) + + await MCPUtil.invoke_mcp_tool(server, tool, ctx, "{}") + + assert ctx._mcp_response_meta is None + + @pytest.mark.asyncio async def test_mcp_meta_resolver_merges_and_passes(): captured: dict[str, Any] = {} diff --git a/tests/mcp/test_runner_calls_mcp.py b/tests/mcp/test_runner_calls_mcp.py index 9a97900d48..cbaa7cbcc1 100644 --- a/tests/mcp/test_runner_calls_mcp.py +++ b/tests/mcp/test_runner_calls_mcp.py @@ -400,3 +400,67 @@ async def test_runner_emits_mcp_error_tool_call_output_item(streaming: bool): wrapped_error, ) assert tool_output_items[0].output == expected_error_message + + +@pytest.mark.asyncio +@pytest.mark.parametrize("streaming", [False, True]) +async def test_runner_surfaces_mcp_response_meta_on_tool_output_item(streaming: bool): + """Server-returned ``_meta`` on CallToolResult should appear on ToolCallOutputItem.""" + server = FakeMCPServer() + server.add_tool("meta_tool", {}) + server._response_meta = {"tool_meta": {"type": "chart", "vis_config": {"chart_type": "line"}}} + + model = FakeModel() + agent = Agent( + name="test", + model=model, + mcp_servers=[server], + ) + + model.add_multiple_turn_outputs( + [ + [get_text_message("a_message"), get_function_tool_call("meta_tool", "{}")], + [get_text_message("done")], + ] + ) + + if streaming: + streamed_result = Runner.run_streamed(agent, input="user_message") + async for _ in streamed_result.stream_events(): + pass + new_items = streamed_result.new_items + else: + non_streamed_result = await Runner.run(agent, input="user_message") + new_items = non_streamed_result.new_items + + tool_output_items = [item for item in new_items if item.type == "tool_call_output_item"] + assert len(tool_output_items) == 1 + assert tool_output_items[0].mcp_response_meta == { + "tool_meta": {"type": "chart", "vis_config": {"chart_type": "line"}} + } + + +@pytest.mark.asyncio +async def test_runner_leaves_mcp_response_meta_none_when_server_omits_it(): + """When the server omits ``_meta``, the item's ``mcp_response_meta`` stays ``None``.""" + server = FakeMCPServer() + server.add_tool("plain_tool", {}) + + model = FakeModel() + agent = Agent( + name="test", + model=model, + mcp_servers=[server], + ) + + model.add_multiple_turn_outputs( + [ + [get_text_message("a_message"), get_function_tool_call("plain_tool", "{}")], + [get_text_message("done")], + ] + ) + + result = await Runner.run(agent, input="user_message") + tool_output_items = [item for item in result.new_items if item.type == "tool_call_output_item"] + assert len(tool_output_items) == 1 + assert tool_output_items[0].mcp_response_meta is None diff --git a/tests/test_run_state.py b/tests/test_run_state.py index 7b2de6b859..245838a2b6 100644 --- a/tests/test_run_state.py +++ b/tests/test_run_state.py @@ -3665,6 +3665,54 @@ async def test_deserialize_tool_call_output_item_different_types(self): result_shell = _deserialize_items([item_data_shell], {"TestAgent": agent}) assert len(result_shell) == 1 + async def test_deserialize_tool_call_output_item_preserves_mcp_response_meta(self): + """``mcp_response_meta`` round-trips through serialization for MCP-backed items.""" + agent = Agent(name="TestAgent") + + item_data = { + "type": "tool_call_output_item", + "agent": {"name": "TestAgent"}, + "raw_item": { + "type": "function_call_output", + "call_id": "call_meta", + "output": "ok", + }, + "output": "ok", + "mcp_response_meta": { + "tool_meta": {"type": "chart", "vis_config": {"chart_type": "line"}} + }, + } + + restored = _deserialize_items([item_data], {"TestAgent": agent}) + assert len(restored) == 1 + item = restored[0] + assert isinstance(item, ToolCallOutputItem) + assert item.mcp_response_meta == { + "tool_meta": {"type": "chart", "vis_config": {"chart_type": "line"}} + } + + async def test_deserialize_tool_call_output_item_treats_empty_mcp_response_meta_as_none(self): + """An empty ``mcp_response_meta`` payload should restore as ``None`` for consistency.""" + agent = Agent(name="TestAgent") + + item_data = { + "type": "tool_call_output_item", + "agent": {"name": "TestAgent"}, + "raw_item": { + "type": "function_call_output", + "call_id": "call_no_meta", + "output": "ok", + }, + "output": "ok", + "mcp_response_meta": {}, + } + + restored = _deserialize_items([item_data], {"TestAgent": agent}) + assert len(restored) == 1 + item = restored[0] + assert isinstance(item, ToolCallOutputItem) + assert item.mcp_response_meta is None + async def test_deserialize_reasoning_item(self): """Test deserialization of reasoning_item.""" agent = Agent(name="TestAgent") @@ -4636,6 +4684,7 @@ def test_supported_schema_versions_match_released_boundary(self): "1.7", "1.8", "1.9", + "1.10", CURRENT_SCHEMA_VERSION, } )