From 407a47df22546080331b15f381e194da2b9cf3cd Mon Sep 17 00:00:00 2001 From: Yiwen Xie Date: Mon, 12 Jan 2026 15:25:43 -0800 Subject: [PATCH] Support case when cache_len == 0 (#16546) Summary: There's a discrepancy in the SDPA impl when computing w/wo paddings as invalid cache, even it's masked out. For verification purposes, we need to test eager model without paddings to match results with QAT model. The change is to handle cases where `cache_len == 0`, preventing crashes in cache updates. Differential Revision: D90526985 --- examples/models/llama/static_attention.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py index f97873ce646..9eef4413a63 100644 --- a/examples/models/llama/static_attention.py +++ b/examples/models/llama/static_attention.py @@ -632,6 +632,8 @@ def _run_once( return y, attn_updates def _update_states(self, attn_updates, update_pos, update_len): + if attn_updates["out_cache_state"] is None: + return for mask in self._masks.values(): mask.unmask(update_len) k_cache_updates, v_cache_updates = attn_updates["out_cache_state"]