Skip to content

Commit 397cd51

Browse files
committed
Preserve tool_result ordering before reminders
1 parent aa61ecd commit 397cd51

3 files changed

Lines changed: 112 additions & 2 deletions

File tree

agents/s03_todo_write.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ def agent_loop(messages: list):
186186
used_todo = True
187187
rounds_since_todo = 0 if used_todo else rounds_since_todo + 1
188188
if rounds_since_todo >= 3:
189-
results.insert(0, {"type": "text", "text": "<reminder>Update your todos.</reminder>"})
189+
results.append({"type": "text", "text": "<reminder>Update your todos.</reminder>"})
190190
messages.append({"role": "user", "content": results})
191191

192192

agents/s_full.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -698,7 +698,7 @@ def agent_loop(messages: list):
698698
# s03: nag reminder (only when todo workflow is active)
699699
rounds_without_todo = 0 if used_todo else rounds_without_todo + 1
700700
if TODO.has_open_items() and rounds_without_todo >= 3:
701-
results.insert(0, {"type": "text", "text": "<reminder>Update your todos.</reminder>"})
701+
results.append({"type": "text", "text": "<reminder>Update your todos.</reminder>"})
702702
messages.append({"role": "user", "content": results})
703703
# s06: manual compress
704704
if manual_compress:

tests/test_tool_result_ordering.py

Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
import os
2+
import sys
3+
import types
4+
import unittest
5+
from pathlib import Path
6+
from types import SimpleNamespace
7+
8+
9+
REPO_ROOT = Path(__file__).resolve().parents[1]
10+
if str(REPO_ROOT) not in sys.path:
11+
sys.path.insert(0, str(REPO_ROOT))
12+
13+
os.environ.setdefault("MODEL_ID", "test-model")
14+
15+
fake_anthropic = types.ModuleType("anthropic")
16+
17+
18+
class FakeAnthropic:
19+
def __init__(self, *args, **kwargs):
20+
self.messages = SimpleNamespace(create=None)
21+
22+
23+
setattr(fake_anthropic, "Anthropic", FakeAnthropic)
24+
sys.modules.setdefault("anthropic", fake_anthropic)
25+
26+
fake_dotenv = types.ModuleType("dotenv")
27+
setattr(fake_dotenv, "load_dotenv", lambda *args, **kwargs: None)
28+
sys.modules.setdefault("dotenv", fake_dotenv)
29+
30+
import agents.s03_todo_write as s03_todo_write
31+
import agents.s_full as s_full
32+
33+
34+
class FakeMessagesAPI:
35+
def __init__(self, responses):
36+
self._responses = iter(responses)
37+
38+
def create(self, **kwargs):
39+
return next(self._responses)
40+
41+
42+
def make_tool_use_response(tool_id: str, tool_name: str, tool_input: dict):
43+
return SimpleNamespace(
44+
stop_reason="tool_use",
45+
content=[
46+
SimpleNamespace(
47+
type="tool_use", id=tool_id, name=tool_name, input=tool_input
48+
)
49+
],
50+
)
51+
52+
53+
class ToolResultOrderingTests(unittest.TestCase):
54+
def test_s03_places_tool_results_before_reminders(self):
55+
messages = [{"role": "user", "content": "do work"}]
56+
fake_api = FakeMessagesAPI(
57+
[
58+
make_tool_use_response("tool-1", "bash", {"command": "pwd"}),
59+
make_tool_use_response("tool-2", "bash", {"command": "pwd"}),
60+
make_tool_use_response("tool-3", "bash", {"command": "pwd"}),
61+
SimpleNamespace(stop_reason="end_turn", content="done"),
62+
]
63+
)
64+
original_client = s03_todo_write.client
65+
original_handlers = s03_todo_write.TOOL_HANDLERS
66+
try:
67+
s03_todo_write.client = SimpleNamespace(messages=fake_api)
68+
s03_todo_write.TOOL_HANDLERS = {
69+
**original_handlers,
70+
"bash": lambda **kwargs: "ok",
71+
}
72+
s03_todo_write.agent_loop(messages)
73+
finally:
74+
s03_todo_write.client = original_client
75+
s03_todo_write.TOOL_HANDLERS = original_handlers
76+
77+
third_user_message = messages[-2]["content"]
78+
self.assertEqual(third_user_message[0]["type"], "tool_result")
79+
self.assertEqual(third_user_message[-1]["type"], "text")
80+
81+
def test_s_full_places_tool_results_before_reminders(self):
82+
messages = [{"role": "user", "content": "do work"}]
83+
fake_api = FakeMessagesAPI(
84+
[
85+
make_tool_use_response("tool-1", "bash", {"command": "pwd"}),
86+
make_tool_use_response("tool-2", "bash", {"command": "pwd"}),
87+
make_tool_use_response("tool-3", "bash", {"command": "pwd"}),
88+
SimpleNamespace(stop_reason="end_turn", content="done"),
89+
]
90+
)
91+
original_client = s_full.client
92+
original_handlers = s_full.TOOL_HANDLERS
93+
original_has_open_items = s_full.TODO.has_open_items
94+
try:
95+
s_full.client = SimpleNamespace(messages=fake_api)
96+
s_full.TOOL_HANDLERS = {**original_handlers, "bash": lambda **kwargs: "ok"}
97+
s_full.TODO.has_open_items = lambda: True
98+
s_full.agent_loop(messages)
99+
finally:
100+
s_full.client = original_client
101+
s_full.TOOL_HANDLERS = original_handlers
102+
s_full.TODO.has_open_items = original_has_open_items
103+
104+
third_user_message = messages[-2]["content"]
105+
self.assertEqual(third_user_message[0]["type"], "tool_result")
106+
self.assertEqual(third_user_message[-1]["type"], "text")
107+
108+
109+
if __name__ == "__main__":
110+
unittest.main()

0 commit comments

Comments
 (0)