From 407a47df22546080331b15f381e194da2b9cf3cd Mon Sep 17 00:00:00 2001
From: Yiwen Xie <yiwenx@meta.com>
Date: Mon, 12 Jan 2026 15:25:43 -0800
Subject: [PATCH] Support case when cache_len == 0 (#16546)

Summary:

There's a discrepancy in the SDPA impl when computing w/wo paddings as invalid cache, even it's masked out. For verification purposes, we need to test eager model without paddings to match results with QAT model. The change is to handle cases where `cache_len == 0`, preventing crashes in cache updates.

Differential Revision: D90526985
---
 examples/models/llama/static_attention.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/examples/models/llama/static_attention.py b/examples/models/llama/static_attention.py
index f97873ce646..9eef4413a63 100644
--- a/examples/models/llama/static_attention.py
+++ b/examples/models/llama/static_attention.py
@@ -632,6 +632,8 @@ def _run_once(
         return y, attn_updates
 
     def _update_states(self, attn_updates, update_pos, update_len):
+        if attn_updates["out_cache_state"] is None:
+            return
         for mask in self._masks.values():
             mask.unmask(update_len)
         k_cache_updates, v_cache_updates = attn_updates["out_cache_state"]