Skip to content

Commit 4e638d4

Browse files
committed
style: apply ruff formatting
1 parent e0bd1fa commit 4e638d4

7 files changed

Lines changed: 262 additions & 121 deletions

File tree

src/hawk/tools.py

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -138,12 +138,21 @@ def chat_with_tools(
138138
tool_results = []
139139
for tc in response.tool_calls:
140140
tool_name = tc.get("name") if isinstance(tc, dict) else getattr(tc, "name", None)
141-
arguments = tc.get("arguments", {}) if isinstance(tc, dict) else getattr(tc, "arguments", {})
141+
arguments = (
142+
tc.get("arguments", {}) if isinstance(tc, dict) else getattr(tc, "arguments", {})
143+
)
142144
if tool_name and tool_name in tool_map:
143145
result = _execute_tool(tool_map[tool_name], arguments)
144146
else:
145147
result = json.dumps({"error": f"Unknown tool: {tool_name}"})
146-
tool_results.append({"tool_use_id": tc.get("id", "") if isinstance(tc, dict) else getattr(tc, "id", ""), "content": result})
148+
tool_results.append(
149+
{
150+
"tool_use_id": tc.get("id", "")
151+
if isinstance(tc, dict)
152+
else getattr(tc, "id", ""),
153+
"content": result,
154+
}
155+
)
147156

148157
# Send tool results back to continue the conversation.
149158
response = client.chat(
@@ -201,12 +210,21 @@ async def chat_with_tools_async(
201210
tool_results = []
202211
for tc in response.tool_calls:
203212
tool_name = tc.get("name") if isinstance(tc, dict) else getattr(tc, "name", None)
204-
arguments = tc.get("arguments", {}) if isinstance(tc, dict) else getattr(tc, "arguments", {})
213+
arguments = (
214+
tc.get("arguments", {}) if isinstance(tc, dict) else getattr(tc, "arguments", {})
215+
)
205216
if tool_name and tool_name in tool_map:
206217
result = await _execute_tool_async(tool_map[tool_name], arguments)
207218
else:
208219
result = json.dumps({"error": f"Unknown tool: {tool_name}"})
209-
tool_results.append({"tool_use_id": tc.get("id", "") if isinstance(tc, dict) else getattr(tc, "id", ""), "content": result})
220+
tool_results.append(
221+
{
222+
"tool_use_id": tc.get("id", "")
223+
if isinstance(tc, dict)
224+
else getattr(tc, "id", ""),
225+
"content": result,
226+
}
227+
)
210228

211229
response = await client.chat(
212230
prompt,

src/hawk/workflow.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -114,9 +114,7 @@ def _run_step_with_timeout(self, s: Step, input_val: Any) -> Any:
114114
try:
115115
return future.result(timeout=s.timeout)
116116
except concurrent.futures.TimeoutError:
117-
raise TimeoutError(
118-
f"Step '{s.name}' timed out after {s.timeout}s"
119-
) from None
117+
raise TimeoutError(f"Step '{s.name}' timed out after {s.timeout}s") from None
120118

121119
def _run_step_sync(self, s: Step, input_val: Any) -> Any:
122120
"""Run a single step with optional retry."""

tests/test_agent.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -120,9 +120,7 @@ def test_chat_with_tools(self, mock_cwt: MagicMock) -> None:
120120
agent = Agent(client, cfg)
121121
resp = agent.chat("find something")
122122

123-
mock_cwt.assert_called_once_with(
124-
client, "find something", [t], max_rounds=3
125-
)
123+
mock_cwt.assert_called_once_with(client, "find something", [t], max_rounds=3)
126124
assert resp.response == "search result"
127125

128126
def test_chat_passes_session_id_after_first_turn(self) -> None:
@@ -195,9 +193,7 @@ async def test_chat_with_tools(self, mock_cwt: MagicMock) -> None:
195193
agent = AsyncAgent(client, cfg)
196194
resp = await agent.chat("find something")
197195

198-
mock_cwt.assert_called_once_with(
199-
client, "find something", [t], max_rounds=5
200-
)
196+
mock_cwt.assert_called_once_with(client, "find something", [t], max_rounds=5)
201197
assert resp.response == "async result"
202198

203199
async def test_chat_session_continuity(self) -> None:

tests/test_evaluate.py

Lines changed: 58 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,7 @@ def test_defaults(self) -> None:
4646
assert result.error is None
4747

4848
def test_with_error(self) -> None:
49-
result = EvalResult(
50-
task_name="t1", success=False, duration_ms=50.0, error="timeout"
51-
)
49+
result = EvalResult(task_name="t1", success=False, duration_ms=50.0, error="timeout")
5250
assert result.success is False
5351
assert result.error == "timeout"
5452

@@ -66,10 +64,16 @@ def test_empty(self) -> None:
6664
assert br.total_tokens == 0
6765

6866
def test_all_passed(self) -> None:
69-
br = BenchmarkResults(results=[
70-
EvalResult(task_name="t1", success=True, duration_ms=100.0, tokens_in=10, tokens_out=5),
71-
EvalResult(task_name="t2", success=True, duration_ms=200.0, tokens_in=20, tokens_out=10),
72-
])
67+
br = BenchmarkResults(
68+
results=[
69+
EvalResult(
70+
task_name="t1", success=True, duration_ms=100.0, tokens_in=10, tokens_out=5
71+
),
72+
EvalResult(
73+
task_name="t2", success=True, duration_ms=200.0, tokens_in=20, tokens_out=10
74+
),
75+
]
76+
)
7377
assert br.total_tasks == 2
7478
assert br.passed == 2
7579
assert br.failed == 0
@@ -78,39 +82,49 @@ def test_all_passed(self) -> None:
7882
assert br.total_tokens == 45
7983

8084
def test_mixed_results(self) -> None:
81-
br = BenchmarkResults(results=[
82-
EvalResult(task_name="t1", success=True, duration_ms=100.0),
83-
EvalResult(task_name="t2", success=False, duration_ms=200.0, error="fail"),
84-
])
85+
br = BenchmarkResults(
86+
results=[
87+
EvalResult(task_name="t1", success=True, duration_ms=100.0),
88+
EvalResult(task_name="t2", success=False, duration_ms=200.0, error="fail"),
89+
]
90+
)
8591
assert br.passed == 1
8692
assert br.failed == 1
8793
assert br.pass_rate == 0.5
8894

8995
def test_by_category(self) -> None:
9096
# by_category splits on "/" — names without "/" get "general"
91-
br = BenchmarkResults(results=[
92-
EvalResult(task_name="math/add", success=True, duration_ms=100.0),
93-
EvalResult(task_name="math/mul", success=True, duration_ms=100.0),
94-
EvalResult(task_name="general/weather", success=False, duration_ms=100.0),
95-
])
97+
br = BenchmarkResults(
98+
results=[
99+
EvalResult(task_name="math/add", success=True, duration_ms=100.0),
100+
EvalResult(task_name="math/mul", success=True, duration_ms=100.0),
101+
EvalResult(task_name="general/weather", success=False, duration_ms=100.0),
102+
]
103+
)
96104
cats = br.by_category()
97105
assert len(cats["math"]) == 2
98106
assert len(cats["general"]) == 1
99107

100108
def test_summary(self) -> None:
101-
br = BenchmarkResults(results=[
102-
EvalResult(task_name="t1", success=True, duration_ms=100.0, tokens_in=10, tokens_out=5),
103-
])
109+
br = BenchmarkResults(
110+
results=[
111+
EvalResult(
112+
task_name="t1", success=True, duration_ms=100.0, tokens_in=10, tokens_out=5
113+
),
114+
]
115+
)
104116
summary = br.summary()
105117
assert "1/1 passed" in summary
106118
assert "100ms" in summary
107119
assert "15" in summary # total tokens
108120

109121
def test_summary_with_failures(self) -> None:
110-
br = BenchmarkResults(results=[
111-
EvalResult(task_name="t1", success=True, duration_ms=100.0),
112-
EvalResult(task_name="t2", success=False, duration_ms=50.0, error="bad output"),
113-
])
122+
br = BenchmarkResults(
123+
results=[
124+
EvalResult(task_name="t1", success=True, duration_ms=100.0),
125+
EvalResult(task_name="t2", success=False, duration_ms=50.0, error="bad output"),
126+
]
127+
)
114128
summary = br.summary()
115129
assert "1/2 passed" in summary
116130
assert "Failures:" in summary
@@ -178,21 +192,25 @@ def test_no_reset(self) -> None:
178192

179193
def test_validation_pass(self) -> None:
180194
agent = _make_mock_agent(response="The temperature is 72F")
181-
tasks = [EvalTask(
182-
name="t1",
183-
prompt="weather?",
184-
validate=lambda r: "temperature" in r.response,
185-
)]
195+
tasks = [
196+
EvalTask(
197+
name="t1",
198+
prompt="weather?",
199+
validate=lambda r: "temperature" in r.response,
200+
)
201+
]
186202
results = run_benchmark(agent, tasks)
187203
assert results.passed == 1
188204

189205
def test_validation_fail(self) -> None:
190206
agent = _make_mock_agent(response="ok")
191-
tasks = [EvalTask(
192-
name="t1",
193-
prompt="weather?",
194-
validate=lambda r: "temperature" in r.response,
195-
)]
207+
tasks = [
208+
EvalTask(
209+
name="t1",
210+
prompt="weather?",
211+
validate=lambda r: "temperature" in r.response,
212+
)
213+
]
196214
results = run_benchmark(agent, tasks)
197215
assert results.passed == 0
198216
assert results.failed == 1
@@ -253,11 +271,13 @@ async def test_validation_fail(self) -> None:
253271
resp.turns_taken = 1
254272
resp.duration = "1.0s"
255273
agent.chat.return_value = resp
256-
tasks = [EvalTask(
257-
name="t1",
258-
prompt="test",
259-
validate=lambda r: "target" in r.response,
260-
)]
274+
tasks = [
275+
EvalTask(
276+
name="t1",
277+
prompt="test",
278+
validate=lambda r: "target" in r.response,
279+
)
280+
]
261281
results = await run_benchmark_async(agent, tasks)
262282
assert results.failed == 1
263283

0 commit comments

Comments
 (0)