-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathreasoning_agent.py
More file actions
122 lines (101 loc) · 5.41 KB
/
reasoning_agent.py
File metadata and controls
122 lines (101 loc) · 5.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import random
import re
from typing import Dict, Any
class ReasoningAgent:
"""Low-level agent responsible for detailed execution."""
def __init__(self, skill_level: float = 0.6, improvement_rate: float = 0.1):
self.skill_level = skill_level
self.improvement_rate = improvement_rate
self.error_probability = 1.0 - self.skill_level
self.step_verbosity = 0.7 # Probability of showing intermediate steps
def solve_problem(self, problem: Dict[str, Any], strategic_plan: Dict[str, Any]) -> Dict[str, Any]:
"""Solve the problem following the strategic plan."""
if problem["problem_type"] == "linear_equation":
return self._solve_linear_equation(problem, strategic_plan)
else:
return {"error": "Unsupported problem type"}
def _solve_linear_equation(self, problem: Dict[str, Any], strategic_plan: Dict[str, Any]) -> Dict[str, Any]:
"""Solve a linear equation following the strategic plan."""
equation = problem["equation"]
# Parse the equation
match = re.match(r"(\d+)x \+ (\d+) = (\d+)", equation)
if not match:
return {"error": "Failed to parse equation"}
a, b, c = map(int, match.groups())
# Determine if we'll make an error based on skill level
make_error = random.random() < self.error_probability
# Determine if we'll show steps based on verbosity
show_steps = random.random() < self.step_verbosity
steps = []
# Follow the strategic plan
strategies = strategic_plan.get("strategies", [])
if show_steps:
steps.append(f"Starting with the equation: {a}x + {b} = {c}")
# Strategy 1: Isolate variable term
if strategies and "Isolate variable" in strategies[0]:
steps.append(f"Following strategy: {strategies[0]}")
steps.append(f"Subtracting {b} from both sides: {a}x = {c} - {b}")
# Possible error in subtraction
if make_error and random.random() < 0.5:
error_c_minus_b = c - b + random.choice([-1, 1])
steps.append(f"This gives us: {a}x = {error_c_minus_b}")
c_minus_b = error_c_minus_b
else:
steps.append(f"This gives us: {a}x = {c - b}")
c_minus_b = c - b
else:
# No strategy for isolation, just do it
steps.append(f"Subtracting {b} from both sides: {a}x = {c - b}")
c_minus_b = c - b
# Strategy 2: Divide by coefficient
if len(strategies) > 1 and "Divide both sides" in strategies[1]:
steps.append(f"Following strategy: {strategies[1]}")
steps.append(f"Dividing both sides by {a}: x = {c_minus_b} / {a}")
# Possible error in division
if make_error and random.random() < 0.5:
error_x = c_minus_b / a + random.choice([-1, 1])
steps.append(f"Therefore, x = {error_x}")
solution = error_x
else:
steps.append(f"Therefore, x = {c_minus_b / a}")
solution = c_minus_b / a
else:
# No strategy for division, just do it
steps.append(f"Dividing both sides by {a}: x = {c_minus_b / a}")
solution = c_minus_b / a
# Strategy 3: Check solution
if len(strategies) > 2 and "Check the solution" in strategies[2]:
steps.append(f"Following strategy: {strategies[2]}")
check_result = a * solution + b
steps.append(f"Checking: {a} × {solution} + {b} = {check_result}")
if abs(check_result - c) < 0.001:
steps.append(f"The solution checks out: {check_result} ≈ {c}")
else:
steps.append(f"The solution doesn't check out: {check_result} ≠ {c}")
steps.append(f"Let me recalculate...")
solution = (c - b) / a # Correct the solution
steps.append(f"The correct solution is x = {solution}")
else:
# Just give the answer with possible error
if make_error:
solution = (c - b) / a + random.choice([-1, 1])
else:
solution = (c - b) / a
# Ensure solution is an integer if the correct answer is an integer
if isinstance(problem["correct_solution"], int):
solution = round(solution)
return {
"problem": problem["problem_text"],
"strategic_plan": strategic_plan["approach"],
"steps": steps,
"solution": solution,
"correct_solution": problem["correct_solution"]
}
def update(self, reward: float) -> None:
"""Update the agent's skill based on reward."""
if self.skill_level < 1.0:
improvement = self.improvement_rate * reward
self.skill_level = min(1.0, self.skill_level + improvement)
self.error_probability = 1.0 - self.skill_level
# Also increase step verbosity as skill improves
self.step_verbosity = min(1.0, self.step_verbosity + (improvement * 0.5))