DeFi-Simulation-Lab/behavior_profile.py at main · Arpit-R-Doshi/DeFi-Simulation-Lab · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
"""
behavior_profile.py
===================
Configurable behavioral models for every agent type.

Each agent receives a BehaviorProfile at construction time that controls:
  - risk_appetite        (0.0=ultra-conservative … 1.0=reckless)
  - reaction_speed       (0.0=slow/misses opportunities … 1.0=instant reaction)
  - capital_allocation   (max fraction of wallet deployed per trade)
  - strategy             (agent-type specific string tag)

Agents are also ADAPTIVE: they track their rolling P&L over a short window
and use a simple reinforcement rule to adjust capital_allocation each step:
  - Recent positive P&L  → become more aggressive (raise allocation)
  - Recent negative P&L  → become more conservative (lower allocation)

This satisfies the PS requirement for "AI-driven agents with configurable
behavioral models including risk appetite, reaction speed, arbitrage strategy,
and capital allocation logic."
"""

from dataclasses import dataclass, field
from typing import Deque, Optional
from collections import deque


@dataclass
class BehaviorProfile:
    """
    Configurable behavioral model that every agent carries.

    Parameters
    ----------
    risk_appetite : float  [0.0, 1.0]
        How aggressively the agent bets. Controls trigger thresholds and
        whether the agent takes trades near the boundary condition.
        0.0 = never takes borderline risks
        1.0 = takes any opportunity regardless of risk

    reaction_speed : float  [0.0, 1.0]
        Probability that the agent actually acts when an opportunity arises.
        Models latency / attention / infrastructure quality.
        0.0 = always misses the trade window
        1.0 = always catches the trade window

    capital_allocation : float  [0.0, 1.0]
        Maximum fraction of available wallet balance deployed per trade.
        0.1 = uses at most 10% of funds per trade
        1.0 = all-in every time

    strategy : str
        Agent-type tag that selects sub-variant behaviour:
          Arbitrageur  → "aggressive" | "conservative" | "balanced"
          Whale        → "dump" | "pump" | "random"
          Liquidator   → "greedy" | "fair"
          RetailTrader → "bull" | "bear" | "neutral"
          MEVActor     → "sandwich" | "frontrun" | "backrun"

    pnl_window : int
        Number of recent P&L observations to track for the adaptive logic.
    """

    risk_appetite:      float = 0.5
    reaction_speed:     float = 0.8
    capital_allocation: float = 0.2
    strategy:           str   = "balanced"
    pnl_window:         int   = 8

    # ---- Internal adaptive state (not set by caller) ---- #
    _pnl_history: Deque[float] = field(default_factory=lambda: deque(maxlen=8),
                                       init=False, repr=False)

    def record_pnl(self, pnl: float) -> None:
        """Record a single trade's realised P&L for the adaptive loop."""
        self._pnl_history.append(pnl)

    def adapt(self) -> None:
        """
        Reinforcement-style adaptation:
        If average recent P&L is positive → raise capital_allocation by 5%
        If average recent P&L is negative → lower capital_allocation by 8%
        Clamped to [0.05, 0.95].
        """
        if len(self._pnl_history) < 3:
            return  # not enough history yet
        avg = sum(self._pnl_history) / len(self._pnl_history)
        if avg > 0:
            self.capital_allocation = min(0.95, self.capital_allocation * 1.05)
        else:
            self.capital_allocation = max(0.05, self.capital_allocation * 0.92)

    def effective_threshold(self, base_threshold: float) -> float:
        """
        Adjust a trigger threshold by risk appetite.
        High risk_appetite → tighter threshold (trades on smaller signals).
        Low  risk_appetite → looser threshold  (only trades on strong signals).

        effective = base × (2 - risk_appetite)   for conservative scaling
        e.g. base=0.01, risk=0.0 → 0.02 (needs 2% to act)
             base=0.01, risk=1.0 → 0.01 (acts on 1%)
             base=0.01, risk=0.5 → 0.015
        """
        return base_threshold * (2.0 - self.risk_appetite)

    def will_act(self, rng) -> bool:
        """
        Returns True if the agent responds to an opportunity this step.
        Drawn from a Bernoulli(reaction_speed) distribution.
        """
        return rng.random() < self.reaction_speed


# ------------------------------------------------------------------ #
# DEFAULT PROFILES PER AGENT TYPE                                     #
# These are the starting points — they evolve during the simulation.  #
# ------------------------------------------------------------------ #

def default_arbitrageur_profile(variant: str = "balanced") -> BehaviorProfile:
    """Three arbitrageur personalities."""
    profiles = {
        "aggressive":   BehaviorProfile(risk_appetite=0.9, reaction_speed=1.0,
                                        capital_allocation=0.35, strategy="aggressive"),
        "conservative": BehaviorProfile(risk_appetite=0.25, reaction_speed=0.7,
                                        capital_allocation=0.08, strategy="conservative"),
        "balanced":     BehaviorProfile(risk_appetite=0.55, reaction_speed=0.85,
                                        capital_allocation=0.15, strategy="balanced"),
    }
    return profiles.get(variant, profiles["balanced"])


def default_whale_profile(variant: str = "random") -> BehaviorProfile:
    profiles = {
        "dump":   BehaviorProfile(risk_appetite=0.95, reaction_speed=1.0,
                                  capital_allocation=0.5, strategy="dump"),
        "pump":   BehaviorProfile(risk_appetite=0.95, reaction_speed=1.0,
                                  capital_allocation=0.5, strategy="pump"),
        "random": BehaviorProfile(risk_appetite=0.8,  reaction_speed=0.9,
                                  capital_allocation=0.35, strategy="random"),
    }
    return profiles.get(variant, profiles["random"])


def default_liquidator_profile(variant: str = "greedy") -> BehaviorProfile:
    profiles = {
        "greedy": BehaviorProfile(risk_appetite=0.8, reaction_speed=1.0,
                                  capital_allocation=0.9, strategy="greedy"),
        "fair":   BehaviorProfile(risk_appetite=0.4, reaction_speed=0.7,
                                  capital_allocation=0.5, strategy="fair"),
    }
    return profiles.get(variant, profiles["greedy"])


def default_retail_profile(variant: str = "neutral") -> BehaviorProfile:
    profiles = {
        "bull":    BehaviorProfile(risk_appetite=0.7, reaction_speed=0.6,
                                   capital_allocation=0.15, strategy="bull"),
        "bear":    BehaviorProfile(risk_appetite=0.6, reaction_speed=0.6,
                                   capital_allocation=0.12, strategy="bear"),
        "neutral": BehaviorProfile(risk_appetite=0.4, reaction_speed=0.5,
                                   capital_allocation=0.08, strategy="neutral"),
    }
    return profiles.get(variant, profiles["neutral"])


def default_mev_profile(variant: str = "sandwich") -> BehaviorProfile:
    profiles = {
        "sandwich": BehaviorProfile(risk_appetite=0.85, reaction_speed=1.0,
                                    capital_allocation=0.25, strategy="sandwich"),
        "frontrun": BehaviorProfile(risk_appetite=0.9,  reaction_speed=1.0,
                                    capital_allocation=0.20, strategy="frontrun"),
        "backrun":  BehaviorProfile(risk_appetite=0.7,  reaction_speed=0.95,
                                    capital_allocation=0.15, strategy="backrun"),
    }
    return profiles.get(variant, profiles["sandwich"])