Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions fastdeploy/engine/args_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -624,6 +624,8 @@ def __post_init__(self):
raise NotImplementedError(
f"not support model_impl: '{self.model_impl}'. " f"Must be one of: {', '.join(valid_model_impls)}"
)
if envs.FD_ENABLE_RL == 1:
self.moe_gate_fp32 = True

self.post_init_all_ports()

Expand Down
2 changes: 2 additions & 0 deletions fastdeploy/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,8 @@ def _validate_split_kv_size(value: int) -> int:
"FD_SAVE_OUTPUT_CACHE_FOR_PREEMPTED_REQUEST": lambda: bool(
int(os.getenv("FD_SAVE_OUTPUT_CACHE_FOR_PREEMPTED_REQUEST", "1"))
),
# Whether to align RoPE and moe gate precision with training
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟡 建议 注释中提到 "Whether to align RoPE and moe gate precision with training",但当前代码只处理了 moe gate 精度,RoPE 部分未体现。

如果后续会添加 RoPE 相关逻辑,建议保留此注释;否则建议修改为更准确的描述:

# Whether to enable RL mode (moe gate uses fp32 precision)

"FD_ENABLE_RL": lambda: int(os.getenv("FD_ENABLE_RL", "0")),
}


Expand Down
4 changes: 1 addition & 3 deletions fastdeploy/model_executor/models/glm4_moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,9 +150,7 @@ def __init__(
output_size=fd_config.model_config.n_routed_experts,
with_bias=False,
skip_quant=True,
weight_dtype=(
"float32" if fd_config.load_config.dynamic_load_weight or fd_config.model_config.moe_gate_fp32 else ""
),
weight_dtype=("float32" if fd_config.model_config.moe_gate_fp32 else ""),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 兼容性 移除 dynamic_load_weight 条件是 breaking change。

原有逻辑中,当 load_config.dynamic_load_weight=True 时会自动启用 fp32。本次变更后,这些用户必须显式设置 FD_ENABLE_RL=1 才能保持原有行为,可能导致现有 RL 训练流程出现精度不一致问题。

建议:在 args_utils.py__post_init__ 中同时检查两个条件:

if envs.FD_ENABLE_RL == 1 or self.dynamic_load_weight:
    self.moe_gate_fp32 = True

或者在 PR 描述中明确说明此为 intentional breaking change,并在文档中注明迁移方式。

)
self.gate.e_score_correction_bias = self.create_parameter(
shape=[1, fd_config.model_config.n_routed_experts],
Expand Down
4 changes: 1 addition & 3 deletions fastdeploy/model_executor/models/qwen3moe.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,9 +77,7 @@ def __init__(
output_size=fd_config.model_config.num_experts,
with_bias=False,
skip_quant=True,
weight_dtype=(
"float32" if fd_config.load_config.dynamic_load_weight or fd_config.model_config.moe_gate_fp32 else ""
),
weight_dtype=("float32" if fd_config.model_config.moe_gate_fp32 else ""),
)

def forward(self, x, forward_meta):
Expand Down
Loading