Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions docs/mcp.md
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,33 @@ server = MCPServerStreamableHttp(

If your run context is a Pydantic model, dataclass, or custom class, read the tenant ID with attribute access instead.

### Reading response `_meta` from MCP tools

The MCP specification allows a server's `CallToolResult` to include a `_meta` object alongside `content` and `structuredContent`. The SDK captures it on the resulting `ToolCallOutputItem` as `mcp_response_meta`, so applications can read auxiliary payloads (chart configs, trace IDs, frontend-only state) without forwarding them to the model.

`mcp_response_meta` is a deep copy of the server-returned dict and is never injected into model context — only `content` and `structuredContent` are. Use it for application-side consumers such as streaming UIs.

```python
from agents import Agent, Runner
from agents.items import ToolCallOutputItem
from agents.mcp import MCPServerStreamableHttp

server = MCPServerStreamableHttp(
name="meta-emitting server",
params={"url": "http://localhost:8000/mcp"},
)

agent = Agent(name="assistant", mcp_servers=[server])
result = await Runner.run(agent, input="show me a chart of Q1 sales")

for item in result.new_items:
if isinstance(item, ToolCallOutputItem) and item.mcp_response_meta:
# e.g. {"tool_meta": {"type": "chart", "vis_config": {...}}}
frontend_payload = item.mcp_response_meta
```

When streaming, the same item is delivered as a `run_item_stream_event` with `name="tool_output"`; read `event.item.mcp_response_meta` on receipt.

### MCP tool outputs: text and images

When an MCP tool returns image content, the SDK maps it to image tool output entries automatically. Mixed text/image responses are forwarded as a list of output items, so agents can consume MCP image results the same way they consume image output from regular function tools.
Expand Down
9 changes: 9 additions & 0 deletions src/agents/items.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,15 @@ class ToolCallOutputItem(RunItemBase[Any]):
tool_origin: ToolOrigin | None = None
"""Optional metadata describing the source of a function-tool-backed item."""

mcp_response_meta: dict[str, Any] | None = None
"""Optional ``_meta`` returned by an MCP server in ``CallToolResult``.

Populated when this item was produced by an MCP tool call whose server returned a
non-empty ``_meta`` field, per the MCP specification. The value is a deep copy and is
not forwarded to the model; it is intended for application-side consumers (e.g.
streaming UIs that need auxiliary payloads without polluting LLM context).
"""

@property
def call_id(self) -> str | None:
"""Return the call identifier from the raw item, if available."""
Expand Down
4 changes: 4 additions & 0 deletions src/agents/mcp/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -663,6 +663,10 @@ async def invoke_mcp_tool(
else:
logger.debug(f"MCP tool {tool_name_for_display} returned {result}")

response_meta = getattr(result, "meta", None)
if isinstance(response_meta, dict) and response_meta and isinstance(context, ToolContext):
context._mcp_response_meta = copy.deepcopy(response_meta)

# If structured content is requested and available, use it exclusively
tool_output: ToolOutput
if server.use_structured_content and result.structuredContent:
Expand Down
5 changes: 5 additions & 0 deletions src/agents/run_internal/tool_execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -1380,6 +1380,7 @@ def __init__(
self.task_states: dict[asyncio.Task[Any], _FunctionToolTaskState] = {}
self.teardown_cancelled_tasks: set[asyncio.Task[Any]] = set()
self.results_by_tool_run: dict[int, Any] = {}
self.mcp_response_meta_by_tool_run: dict[int, dict[str, Any]] = {}
self.pending_tasks: set[asyncio.Task[Any]] = set()
self.propagating_failure: BaseException | None = None
self.available_function_tools: list[FunctionTool] = []
Expand Down Expand Up @@ -1758,6 +1759,9 @@ async def _invoke_tool_and_run_post_invoke(
context=tool_context,
arguments=tool_call.arguments,
)
response_meta = getattr(tool_context, "_mcp_response_meta", None)
if response_meta:
self.mcp_response_meta_by_tool_run[id(task_state.tool_run)] = response_meta
except asyncio.CancelledError as e:
if outer_task in self.teardown_cancelled_tasks:
raise
Expand Down Expand Up @@ -1898,6 +1902,7 @@ def _build_function_tool_results(self) -> list[FunctionToolResult]:
raw_item=ItemHelpers.tool_call_output_item(tool_run.tool_call, result),
agent=self.public_agent,
tool_origin=get_function_tool_origin(tool_run.function_tool),
mcp_response_meta=self.mcp_response_meta_by_tool_run.get(id(tool_run)),
)
else:
# Skip tool output until nested interruptions are resolved.
Expand Down
13 changes: 12 additions & 1 deletion src/agents/run_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@
# 3. to_json() always emits CURRENT_SCHEMA_VERSION.
# 4. Forward compatibility is intentionally fail-fast (older SDKs reject newer or unsupported
# versions).
CURRENT_SCHEMA_VERSION = "1.10"
CURRENT_SCHEMA_VERSION = "1.11"
# Keep this mapping in chronological order. Every schema bump must add a one-line summary here.
SCHEMA_VERSION_SUMMARIES: dict[str, str] = {
"1.0": "Initial RunState snapshot format for HITL pause/resume flows.",
Expand All @@ -145,6 +145,7 @@
"1.8": "Persists SDK-generated prompt cache keys across resume flows.",
"1.9": "Persists pending custom tool calls and tool origin metadata across resume flows.",
"1.10": "Allows serialized RunState snapshots to disable max_turns with null.",
"1.11": "Persists MCP CallToolResult `_meta` on ToolCallOutputItem across resume flows.",
}
SUPPORTED_SCHEMA_VERSIONS = frozenset(SCHEMA_VERSION_SUMMARIES)

Expand Down Expand Up @@ -908,6 +909,9 @@ def _serialize_item(
tool_origin = getattr(item, "tool_origin", None)
if isinstance(tool_origin, ToolOrigin):
result["tool_origin"] = tool_origin.to_json_dict()
mcp_response_meta = getattr(item, "mcp_response_meta", None)
if isinstance(mcp_response_meta, dict) and mcp_response_meta:
result["mcp_response_meta"] = _ensure_json_compatible(mcp_response_meta)

return result

Expand Down Expand Up @@ -3192,12 +3196,19 @@ def _resolve_agent_info(
raw_item_output = _deserialize_tool_call_output_raw_item(normalized_raw_item)
if raw_item_output is None:
continue
stored_mcp_response_meta = item_data.get("mcp_response_meta")
mcp_response_meta_value = (
stored_mcp_response_meta
if isinstance(stored_mcp_response_meta, dict) and stored_mcp_response_meta
else None
)
result.append(
ToolCallOutputItem(
agent=agent,
raw_item=raw_item_output,
output=item_data.get("output", ""),
tool_origin=_deserialize_tool_origin(item_data.get("tool_origin")),
mcp_response_meta=mcp_response_meta_value,
)
)

Expand Down
3 changes: 3 additions & 0 deletions src/agents/tool_context.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,9 @@ def __init__(
)
self.agent = agent
self.run_config = run_config
# Populated by MCP tool invocation when the server returns ``CallToolResult._meta``.
# Surfaced on the resulting ToolCallOutputItem; not part of the public constructor.
self._mcp_response_meta: dict[str, Any] | None = None

@property
def qualified_tool_name(self) -> str:
Expand Down
7 changes: 6 additions & 1 deletion tests/mcp/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ def __init__(
self.tool_filter = tool_filter
self._server_name = server_name
self._custom_content: list[Content] | None = None
self._response_meta: dict[str, Any] | None = None

def add_tool(self, name: str, input_schema: dict[str, Any]):
self.tools.append(MCPTool(name=name, inputSchema=input_schema))
Expand Down Expand Up @@ -123,10 +124,14 @@ async def call_tool(

# Allow testing custom content scenarios
if self._custom_content is not None:
return CallToolResult(content=self._custom_content)
return CallToolResult(
content=self._custom_content,
_meta=self._response_meta,
)

return CallToolResult(
content=[TextContent(text=self.tool_results[-1], type="text")],
_meta=self._response_meta,
)

async def list_prompts(self, run_context=None, agent=None) -> ListPromptsResult:
Expand Down
63 changes: 63 additions & 0 deletions tests/mcp/test_mcp_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,69 @@ async def test_invoke_mcp_tool():
# Just making sure it doesn't crash


@pytest.mark.asyncio
async def test_invoke_mcp_tool_captures_response_meta_on_tool_context():
"""Server-returned ``_meta`` is captured on ToolContext for the executor to surface."""
server = FakeMCPServer()
server.add_tool("test_tool_1", {})
server._response_meta = {"chart": {"type": "line"}, "trace_id": "abc-123"}

ctx = ToolContext(
context=None,
tool_name="test_tool_1",
tool_call_id="test_call_response_meta",
tool_arguments="{}",
)
tool = MCPTool(name="test_tool_1", inputSchema={})

await MCPUtil.invoke_mcp_tool(server, tool, ctx, "{}")

assert ctx._mcp_response_meta == {"chart": {"type": "line"}, "trace_id": "abc-123"}
# Ensure the captured payload is a deep copy: mutating the server-side dict must
# not affect what the SDK exposed to the executor.
server._response_meta["mutated"] = True
assert "mutated" not in ctx._mcp_response_meta


@pytest.mark.asyncio
async def test_invoke_mcp_tool_leaves_response_meta_unset_when_server_omits_it():
"""When the server returns no ``_meta``, the ToolContext stash stays None."""
server = FakeMCPServer()
server.add_tool("test_tool_1", {})

ctx = ToolContext(
context=None,
tool_name="test_tool_1",
tool_call_id="test_call_no_meta",
tool_arguments="{}",
)
tool = MCPTool(name="test_tool_1", inputSchema={})

await MCPUtil.invoke_mcp_tool(server, tool, ctx, "{}")

assert ctx._mcp_response_meta is None


@pytest.mark.asyncio
async def test_invoke_mcp_tool_ignores_empty_response_meta():
"""An empty ``_meta`` dict is treated the same as no meta (no stash, no propagation)."""
server = FakeMCPServer()
server.add_tool("test_tool_1", {})
server._response_meta = {}

ctx = ToolContext(
context=None,
tool_name="test_tool_1",
tool_call_id="test_call_empty_meta",
tool_arguments="{}",
)
tool = MCPTool(name="test_tool_1", inputSchema={})

await MCPUtil.invoke_mcp_tool(server, tool, ctx, "{}")

assert ctx._mcp_response_meta is None


@pytest.mark.asyncio
async def test_mcp_meta_resolver_merges_and_passes():
captured: dict[str, Any] = {}
Expand Down
64 changes: 64 additions & 0 deletions tests/mcp/test_runner_calls_mcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -400,3 +400,67 @@ async def test_runner_emits_mcp_error_tool_call_output_item(streaming: bool):
wrapped_error,
)
assert tool_output_items[0].output == expected_error_message


@pytest.mark.asyncio
@pytest.mark.parametrize("streaming", [False, True])
async def test_runner_surfaces_mcp_response_meta_on_tool_output_item(streaming: bool):
"""Server-returned ``_meta`` on CallToolResult should appear on ToolCallOutputItem."""
server = FakeMCPServer()
server.add_tool("meta_tool", {})
server._response_meta = {"tool_meta": {"type": "chart", "vis_config": {"chart_type": "line"}}}

model = FakeModel()
agent = Agent(
name="test",
model=model,
mcp_servers=[server],
)

model.add_multiple_turn_outputs(
[
[get_text_message("a_message"), get_function_tool_call("meta_tool", "{}")],
[get_text_message("done")],
]
)

if streaming:
streamed_result = Runner.run_streamed(agent, input="user_message")
async for _ in streamed_result.stream_events():
pass
new_items = streamed_result.new_items
else:
non_streamed_result = await Runner.run(agent, input="user_message")
new_items = non_streamed_result.new_items

tool_output_items = [item for item in new_items if item.type == "tool_call_output_item"]
assert len(tool_output_items) == 1
assert tool_output_items[0].mcp_response_meta == {
"tool_meta": {"type": "chart", "vis_config": {"chart_type": "line"}}
}


@pytest.mark.asyncio
async def test_runner_leaves_mcp_response_meta_none_when_server_omits_it():
"""When the server omits ``_meta``, the item's ``mcp_response_meta`` stays ``None``."""
server = FakeMCPServer()
server.add_tool("plain_tool", {})

model = FakeModel()
agent = Agent(
name="test",
model=model,
mcp_servers=[server],
)

model.add_multiple_turn_outputs(
[
[get_text_message("a_message"), get_function_tool_call("plain_tool", "{}")],
[get_text_message("done")],
]
)

result = await Runner.run(agent, input="user_message")
tool_output_items = [item for item in result.new_items if item.type == "tool_call_output_item"]
assert len(tool_output_items) == 1
assert tool_output_items[0].mcp_response_meta is None
49 changes: 49 additions & 0 deletions tests/test_run_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -3665,6 +3665,54 @@ async def test_deserialize_tool_call_output_item_different_types(self):
result_shell = _deserialize_items([item_data_shell], {"TestAgent": agent})
assert len(result_shell) == 1

async def test_deserialize_tool_call_output_item_preserves_mcp_response_meta(self):
"""``mcp_response_meta`` round-trips through serialization for MCP-backed items."""
agent = Agent(name="TestAgent")

item_data = {
"type": "tool_call_output_item",
"agent": {"name": "TestAgent"},
"raw_item": {
"type": "function_call_output",
"call_id": "call_meta",
"output": "ok",
},
"output": "ok",
"mcp_response_meta": {
"tool_meta": {"type": "chart", "vis_config": {"chart_type": "line"}}
},
}

restored = _deserialize_items([item_data], {"TestAgent": agent})
assert len(restored) == 1
item = restored[0]
assert isinstance(item, ToolCallOutputItem)
assert item.mcp_response_meta == {
"tool_meta": {"type": "chart", "vis_config": {"chart_type": "line"}}
}

async def test_deserialize_tool_call_output_item_treats_empty_mcp_response_meta_as_none(self):
"""An empty ``mcp_response_meta`` payload should restore as ``None`` for consistency."""
agent = Agent(name="TestAgent")

item_data = {
"type": "tool_call_output_item",
"agent": {"name": "TestAgent"},
"raw_item": {
"type": "function_call_output",
"call_id": "call_no_meta",
"output": "ok",
},
"output": "ok",
"mcp_response_meta": {},
}

restored = _deserialize_items([item_data], {"TestAgent": agent})
assert len(restored) == 1
item = restored[0]
assert isinstance(item, ToolCallOutputItem)
assert item.mcp_response_meta is None

async def test_deserialize_reasoning_item(self):
"""Test deserialization of reasoning_item."""
agent = Agent(name="TestAgent")
Expand Down Expand Up @@ -4636,6 +4684,7 @@ def test_supported_schema_versions_match_released_boundary(self):
"1.7",
"1.8",
"1.9",
"1.10",
CURRENT_SCHEMA_VERSION,
}
)
Expand Down