diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 00000000000..817e8729b4f --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,4 @@ + +## 2025-02-23 - Avoid dataclasses.asdict in Hot Paths +**Learning:** `dataclasses.asdict` does recursive deepcopy internally and is incredibly slow for large dataclasses or objects instantiated frequently. In FastDeploy, it was used in `RequestMetrics.to_dict()`, creating significant overhead. +**Action:** When defining `to_dict()` or custom serialization methods for fast/frequent dataclasses, avoid `asdict`. Instead, iterate through `self.__dataclass_fields__` with `getattr` and do shallow copying for basic types (`int`, `float`, `str`, `bool`, `type(None)`). For nested dataclasses, ensure they also implement their own `to_dict()` method to skip the `asdict` recursive penalty. diff --git a/fastdeploy/engine/request.py b/fastdeploy/engine/request.py index 0e95cd5e1fb..214cb401e24 100644 --- a/fastdeploy/engine/request.py +++ b/fastdeploy/engine/request.py @@ -897,7 +897,26 @@ def to_dict(self): """ Convert the RequestMetrics object to a dictionary. """ - return {k: v for k, v in asdict(self).items()} + import dataclasses + + res = {} + for k in self.__dataclass_fields__: + v = getattr(self, k) + if type(v) in (int, float, str, bool, type(None)): + res[k] = v + elif isinstance(v, list): + res[k] = list(v) + elif isinstance(v, dict): + res[k] = dict(v) + else: + if dataclasses.is_dataclass(v): + if hasattr(v, "to_dict"): + res[k] = v.to_dict() + else: + res[k] = dataclasses.asdict(v) + else: + res[k] = v + return res def record_recv_first_token(self): cur_time = time.time() diff --git a/fastdeploy/worker/output.py b/fastdeploy/worker/output.py index 365fec12475..902ee301603 100644 --- a/fastdeploy/worker/output.py +++ b/fastdeploy/worker/output.py @@ -164,6 +164,20 @@ class SpeculateMetrics: """ accept_ratio_per_head: list[float] + def to_dict(self): + return { + "accepted_tokens": self.accepted_tokens, + "rejected_tokens": self.rejected_tokens, + "accept_ratio": self.accept_ratio, + "average_accept_length": self.average_accept_length, + "accepted_tokens_per_head": ( + list(self.accepted_tokens_per_head) if self.accepted_tokens_per_head is not None else None + ), + "accept_ratio_per_head": ( + list(self.accept_ratio_per_head) if self.accept_ratio_per_head is not None else None + ), + } + @dataclass class SamplerOutput: