From a81da85aa120982efb8da1018328b18e99f640b9 Mon Sep 17 00:00:00 2001
From: xiaosu <xiaosu@xiaosudeMacBook-Pro-2.local>
Date: Wed, 13 May 2026 15:22:40 +0800
Subject: [PATCH 1/3] fix: replace RuntimeError with cancel_tool to prevent
 memory corruption on tool limit

When tool calls reach the 20-call limit, raising RuntimeError in
AfterToolCallEvent breaks the message history, leaving toolUse blocks
without matching toolResult blocks. When AgentCore Memory restores this
corrupted history in subsequent requests, Bedrock's ConverseStream API
rejects it with ValidationException.

Fix: Use BeforeToolCallEvent with event.cancel_tool instead. This
cancels the tool gracefully by returning an error message to the model,
which then responds using already-gathered information. The conversation
history remains consistent and Memory can safely restore it.
---
 main.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/main.py b/main.py
index 8d09e8d..4a21045 100644
--- a/main.py
+++ b/main.py
@@ -176,15 +176,18 @@ async def invoke(payload):
         # Limit tool calls to prevent infinite loops
         tool_call_count = {"n": 0}
 
-        from strands.hooks import AfterToolCallEvent
+        from strands.hooks import BeforeToolCallEvent
 
-        def check_tool_limit(event: AfterToolCallEvent):
+        def check_tool_limit(event: BeforeToolCallEvent):
             tool_call_count["n"] += 1
-            if tool_call_count["n"] >= 20:
+            if tool_call_count["n"] > 20:
                 logger.warning(f"⚠️ Tool call limit reached (20)")
-                raise RuntimeError("工具调用次数超过上限（20次），已强制停止。请简化问题后重试。")
+                event.cancel_tool = (
+                    "工具调用次数已超过上限（20次）。"
+                    "DO NOT CALL ANY MORE TOOLS. 请直接根据已有信息回答用户。"
+                )
 
-        agent.hooks.add_callback(AfterToolCallEvent, check_tool_limit)
+        agent.hooks.add_callback(BeforeToolCallEvent, check_tool_limit)
 
         healthy_status.value = "HealthyBusy"
         logger.info(f"🚀 Agent job starts | actor={actor_id} session={session_id}")

From c085cc4e7103e3cc9f290a6235d1b4885946da0f Mon Sep 17 00:00:00 2001
From: xiaosu <xiaosu@xiaosudeMacBook-Pro-2.local>
Date: Fri, 15 May 2026 10:15:54 +0800
Subject: [PATCH 2/3] fix: add message history validation to handle orphaned
 toolUse from Memory

When MCP tool calls are interrupted (timeout, network error), Memory
saves incomplete history with toolUse but no toolResult. On restoration,
Strands SDK's repair logic can add incorrect toolResult counts
(strands-agents/sdk-python#2296), causing Bedrock API rejection.

Add fix_message_history() that validates toolUse/toolResult pairing
before each invocation and corrects any mismatches.
---
 main.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

diff --git a/main.py b/main.py
index 4a21045..f82ed1f 100644
--- a/main.py
+++ b/main.py
@@ -189,6 +189,41 @@ def check_tool_limit(event: BeforeToolCallEvent):
 
         agent.hooks.add_callback(BeforeToolCallEvent, check_tool_limit)
 
+        # Fix corrupted message history from Memory restoration
+        # Workaround for https://github.com/strands-agents/sdk-python/issues/2296
+        def fix_message_history(agent):
+            """Validate and fix toolUse/toolResult pairing in restored history."""
+            messages = getattr(agent, 'messages', None)
+            if not messages or len(messages) < 2:
+                return
+            for i, msg in enumerate(messages):
+                content = msg.get("content", [])
+                if msg.get("role") == "assistant" and i + 1 < len(messages):
+                    tool_use_ids = [b["toolUse"]["toolUseId"] for b in content if "toolUse" in b]
+                    if not tool_use_ids:
+                        continue
+                    next_msg = messages[i + 1]
+                    if next_msg.get("role") != "user":
+                        continue
+                    next_content = next_msg.get("content", [])
+                    tool_results = [b for b in next_content if "toolResult" in b]
+                    if len(tool_results) != len(tool_use_ids):
+                        logger.warning(f"⚠️ Fixing toolUse/toolResult mismatch at msg {i}: "
+                                       f"{len(tool_use_ids)} toolUse vs {len(tool_results)} toolResult")
+                        non_tool = [b for b in next_content if "toolResult" not in b]
+                        fixed_results = []
+                        for tid in tool_use_ids:
+                            existing = next((b for b in tool_results
+                                             if b.get("toolResult", {}).get("toolUseId") == tid), None)
+                            if existing:
+                                fixed_results.append(existing)
+                            else:
+                                fixed_results.append({"toolResult": {"toolUseId": tid,
+                                    "content": [{"text": "Tool execution was interrupted."}], "status": "error"}})
+                        messages[i + 1]["content"] = non_tool + fixed_results
+
+        fix_message_history(agent)
+
         healthy_status.value = "HealthyBusy"
         logger.info(f"🚀 Agent job starts | actor={actor_id} session={session_id}")
 

From 3d9cfe94575625ae6e184ba3eb03819fafef414b Mon Sep 17 00:00:00 2001
From: xiaosu <xiaosu@xiaosudeMacBook-Pro-2.local>
Date: Fri, 15 May 2026 10:22:31 +0800
Subject: [PATCH 3/3] fix: move message history fix to BeforeModelCallEvent
 hook

The previous fix_message_history() ran after Agent creation but before
invoke_async(). However, session_manager restores history inside
invoke_async(), so the fix ran too early.

Move to BeforeModelCallEvent hook which fires right before each model
call, after history restoration and SDK's own (buggy) repair logic.
This ensures messages are always valid when sent to Bedrock.
---
 main.py | 67 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 33 insertions(+), 34 deletions(-)

diff --git a/main.py b/main.py
index f82ed1f..e3fb980 100644
--- a/main.py
+++ b/main.py
@@ -176,7 +176,7 @@ async def invoke(payload):
         # Limit tool calls to prevent infinite loops
         tool_call_count = {"n": 0}
 
-        from strands.hooks import BeforeToolCallEvent
+        from strands.hooks import BeforeToolCallEvent, BeforeModelCallEvent
 
         def check_tool_limit(event: BeforeToolCallEvent):
             tool_call_count["n"] += 1
@@ -187,42 +187,41 @@ def check_tool_limit(event: BeforeToolCallEvent):
                     "DO NOT CALL ANY MORE TOOLS. 请直接根据已有信息回答用户。"
                 )
 
-        agent.hooks.add_callback(BeforeToolCallEvent, check_tool_limit)
+        def fix_messages_before_model(event: BeforeModelCallEvent):
+            """Fix toolUse/toolResult mismatch right before model call.
 
-        # Fix corrupted message history from Memory restoration
-        # Workaround for https://github.com/strands-agents/sdk-python/issues/2296
-        def fix_message_history(agent):
-            """Validate and fix toolUse/toolResult pairing in restored history."""
-            messages = getattr(agent, 'messages', None)
+            Workaround for https://github.com/strands-agents/sdk-python/issues/2296
+            """
+            messages = agent.messages
             if not messages or len(messages) < 2:
                 return
-            for i, msg in enumerate(messages):
-                content = msg.get("content", [])
-                if msg.get("role") == "assistant" and i + 1 < len(messages):
-                    tool_use_ids = [b["toolUse"]["toolUseId"] for b in content if "toolUse" in b]
-                    if not tool_use_ids:
-                        continue
-                    next_msg = messages[i + 1]
-                    if next_msg.get("role") != "user":
-                        continue
-                    next_content = next_msg.get("content", [])
-                    tool_results = [b for b in next_content if "toolResult" in b]
-                    if len(tool_results) != len(tool_use_ids):
-                        logger.warning(f"⚠️ Fixing toolUse/toolResult mismatch at msg {i}: "
-                                       f"{len(tool_use_ids)} toolUse vs {len(tool_results)} toolResult")
-                        non_tool = [b for b in next_content if "toolResult" not in b]
-                        fixed_results = []
-                        for tid in tool_use_ids:
-                            existing = next((b for b in tool_results
-                                             if b.get("toolResult", {}).get("toolUseId") == tid), None)
-                            if existing:
-                                fixed_results.append(existing)
-                            else:
-                                fixed_results.append({"toolResult": {"toolUseId": tid,
-                                    "content": [{"text": "Tool execution was interrupted."}], "status": "error"}})
-                        messages[i + 1]["content"] = non_tool + fixed_results
-
-        fix_message_history(agent)
+            for i in range(len(messages) - 1):
+                msg = messages[i]
+                if msg.get("role") != "assistant":
+                    continue
+                tool_use_ids = [b["toolUse"]["toolUseId"] for b in msg.get("content", []) if "toolUse" in b]
+                if not tool_use_ids:
+                    continue
+                next_msg = messages[i + 1]
+                if next_msg.get("role") != "user":
+                    continue
+                next_content = next_msg.get("content", [])
+                tool_results = [b for b in next_content if "toolResult" in b]
+                if len(tool_results) == len(tool_use_ids):
+                    continue
+                logger.warning(f"⚠️ Fixing toolUse/toolResult mismatch at msg {i}: "
+                               f"{len(tool_use_ids)} toolUse vs {len(tool_results)} toolResult")
+                non_tool = [b for b in next_content if "toolResult" not in b]
+                fixed_results = []
+                for tid in tool_use_ids:
+                    existing = next((b for b in tool_results
+                                     if b.get("toolResult", {}).get("toolUseId") == tid), None)
+                    fixed_results.append(existing if existing else {"toolResult": {"toolUseId": tid,
+                        "content": [{"text": "Tool execution was interrupted."}], "status": "error"}})
+                messages[i + 1]["content"] = non_tool + fixed_results
+
+        agent.hooks.add_callback(BeforeToolCallEvent, check_tool_limit)
+        agent.hooks.add_callback(BeforeModelCallEvent, fix_messages_before_model)
 
         healthy_status.value = "HealthyBusy"
         logger.info(f"🚀 Agent job starts | actor={actor_id} session={session_id}")