MetaMathAgent/reasoning_agent.py at main · Praagnya/MetaMathAgent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import random
import re
from typing import Dict, Any


class ReasoningAgent:
    """Low-level agent responsible for detailed execution."""

    def __init__(self, skill_level: float = 0.6, improvement_rate: float = 0.1):
        self.skill_level = skill_level
        self.improvement_rate = improvement_rate
        self.error_probability = 1.0 - self.skill_level
        self.step_verbosity = 0.7  # Probability of showing intermediate steps

    def solve_problem(self, problem: Dict[str, Any], strategic_plan: Dict[str, Any]) -> Dict[str, Any]:
        """Solve the problem following the strategic plan."""
        if problem["problem_type"] == "linear_equation":
            return self._solve_linear_equation(problem, strategic_plan)
        else:
            return {"error": "Unsupported problem type"}

    def _solve_linear_equation(self, problem: Dict[str, Any], strategic_plan: Dict[str, Any]) -> Dict[str, Any]:
        """Solve a linear equation following the strategic plan."""
        equation = problem["equation"]
        # Parse the equation
        match = re.match(r"(\d+)x \+ (\d+) = (\d+)", equation)
        if not match:
            return {"error": "Failed to parse equation"}

        a, b, c = map(int, match.groups())

        # Determine if we'll make an error based on skill level
        make_error = random.random() < self.error_probability

        # Determine if we'll show steps based on verbosity
        show_steps = random.random() < self.step_verbosity

        steps = []

        # Follow the strategic plan
        strategies = strategic_plan.get("strategies", [])

        if show_steps:
            steps.append(f"Starting with the equation: {a}x + {b} = {c}")

            # Strategy 1: Isolate variable term
            if strategies and "Isolate variable" in strategies[0]:
                steps.append(f"Following strategy: {strategies[0]}")
                steps.append(f"Subtracting {b} from both sides: {a}x = {c} - {b}")

                # Possible error in subtraction
                if make_error and random.random() < 0.5:
                    error_c_minus_b = c - b + random.choice([-1, 1])
                    steps.append(f"This gives us: {a}x = {error_c_minus_b}")
                    c_minus_b = error_c_minus_b
                else:
                    steps.append(f"This gives us: {a}x = {c - b}")
                    c_minus_b = c - b
            else:
                # No strategy for isolation, just do it
                steps.append(f"Subtracting {b} from both sides: {a}x = {c - b}")
                c_minus_b = c - b

            # Strategy 2: Divide by coefficient
            if len(strategies) > 1 and "Divide both sides" in strategies[1]:
                steps.append(f"Following strategy: {strategies[1]}")
                steps.append(f"Dividing both sides by {a}: x = {c_minus_b} / {a}")

                # Possible error in division
                if make_error and random.random() < 0.5:
                    error_x = c_minus_b / a + random.choice([-1, 1])
                    steps.append(f"Therefore, x = {error_x}")
                    solution = error_x
                else:
                    steps.append(f"Therefore, x = {c_minus_b / a}")
                    solution = c_minus_b / a
            else:
                # No strategy for division, just do it
                steps.append(f"Dividing both sides by {a}: x = {c_minus_b / a}")
                solution = c_minus_b / a

            # Strategy 3: Check solution
            if len(strategies) > 2 and "Check the solution" in strategies[2]:
                steps.append(f"Following strategy: {strategies[2]}")
                check_result = a * solution + b
                steps.append(f"Checking: {a} × {solution} + {b} = {check_result}")

                if abs(check_result - c) < 0.001:
                    steps.append(f"The solution checks out: {check_result} ≈ {c}")
                else:
                    steps.append(f"The solution doesn't check out: {check_result} ≠ {c}")
                    steps.append(f"Let me recalculate...")
                    solution = (c - b) / a  # Correct the solution
                    steps.append(f"The correct solution is x = {solution}")
        else:
            # Just give the answer with possible error
            if make_error:
                solution = (c - b) / a + random.choice([-1, 1])
            else:
                solution = (c - b) / a

        # Ensure solution is an integer if the correct answer is an integer
        if isinstance(problem["correct_solution"], int):
            solution = round(solution)

        return {
            "problem": problem["problem_text"],
            "strategic_plan": strategic_plan["approach"],
            "steps": steps,
            "solution": solution,
            "correct_solution": problem["correct_solution"]
        }

    def update(self, reward: float) -> None:
        """Update the agent's skill based on reward."""
        if self.skill_level < 1.0:
            improvement = self.improvement_rate * reward
            self.skill_level = min(1.0, self.skill_level + improvement)
            self.error_probability = 1.0 - self.skill_level

            # Also increase step verbosity as skill improves
            self.step_verbosity = min(1.0, self.step_verbosity + (improvement * 0.5))