From a77c9d42987dc5bc5970407e648296df0254cb49 Mon Sep 17 00:00:00 2001 From: Sunny-bot1 <592045536@qq.com> Date: Wed, 1 Apr 2026 17:21:25 +0800 Subject: [PATCH 1/2] rl gate fp32 --- fastdeploy/engine/args_utils.py | 2 ++ fastdeploy/envs.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/fastdeploy/engine/args_utils.py b/fastdeploy/engine/args_utils.py index ff0965c56bb..26157d7d23d 100644 --- a/fastdeploy/engine/args_utils.py +++ b/fastdeploy/engine/args_utils.py @@ -624,6 +624,8 @@ def __post_init__(self): raise NotImplementedError( f"not support model_impl: '{self.model_impl}'. " f"Must be one of: {', '.join(valid_model_impls)}" ) + if envs.FD_ENABLE_RL == 1: + self.moe_gate_fp32 = True self.post_init_all_ports() diff --git a/fastdeploy/envs.py b/fastdeploy/envs.py index 0c7ac3e22b1..5534871fcb3 100644 --- a/fastdeploy/envs.py +++ b/fastdeploy/envs.py @@ -266,6 +266,8 @@ def _validate_split_kv_size(value: int) -> int: "FD_SAVE_OUTPUT_CACHE_FOR_PREEMPTED_REQUEST": lambda: bool( int(os.getenv("FD_SAVE_OUTPUT_CACHE_FOR_PREEMPTED_REQUEST", "1")) ), + # Whether to align RoPE and moe gate precision with training + "FD_ENABLE_RL": lambda: int(os.getenv("FD_ENABLE_RL", "0")), } From c3891cc85e30c9d09b412e9522439ca6a69abee5 Mon Sep 17 00:00:00 2001 From: Sunny-bot1 <592045536@qq.com> Date: Wed, 1 Apr 2026 19:10:16 +0800 Subject: [PATCH 2/2] clean --- fastdeploy/model_executor/models/glm4_moe.py | 4 +--- fastdeploy/model_executor/models/qwen3moe.py | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/fastdeploy/model_executor/models/glm4_moe.py b/fastdeploy/model_executor/models/glm4_moe.py index 3f45e9df614..64b82229d37 100644 --- a/fastdeploy/model_executor/models/glm4_moe.py +++ b/fastdeploy/model_executor/models/glm4_moe.py @@ -150,9 +150,7 @@ def __init__( output_size=fd_config.model_config.n_routed_experts, with_bias=False, skip_quant=True, - weight_dtype=( - "float32" if fd_config.load_config.dynamic_load_weight or fd_config.model_config.moe_gate_fp32 else "" - ), + weight_dtype=("float32" if fd_config.model_config.moe_gate_fp32 else ""), ) self.gate.e_score_correction_bias = self.create_parameter( shape=[1, fd_config.model_config.n_routed_experts], diff --git a/fastdeploy/model_executor/models/qwen3moe.py b/fastdeploy/model_executor/models/qwen3moe.py index 74ca37ab695..6c443d68bcc 100644 --- a/fastdeploy/model_executor/models/qwen3moe.py +++ b/fastdeploy/model_executor/models/qwen3moe.py @@ -77,9 +77,7 @@ def __init__( output_size=fd_config.model_config.num_experts, with_bias=False, skip_quant=True, - weight_dtype=( - "float32" if fd_config.load_config.dynamic_load_weight or fd_config.model_config.moe_gate_fp32 else "" - ), + weight_dtype=("float32" if fd_config.model_config.moe_gate_fp32 else ""), ) def forward(self, x, forward_meta):