diff --git a/src/strands/models/anthropic.py b/src/strands/models/anthropic.py index 812171a0c..ae028064d 100644 --- a/src/strands/models/anthropic.py +++ b/src/strands/models/anthropic.py @@ -359,14 +359,27 @@ def format_chunk(self, event: dict[str, Any]) -> StreamEvent: case "metadata": usage = event["usage"] + input_tokens = usage["input_tokens"] + output_tokens = usage["output_tokens"] + cache_read = usage.get("cache_read_input_tokens") or 0 + cache_write = usage.get("cache_creation_input_tokens") or 0 + # Anthropic reports `input_tokens` as the NON-CACHED portion only. + # `totalTokens` should reflect everything billed on the input side: + # uncached + cache reads + cache writes. + total_input = input_tokens + cache_read + cache_write + usage_chunk: dict[str, int] = { + "inputTokens": input_tokens, + "outputTokens": output_tokens, + "totalTokens": total_input + output_tokens, + } + if cache_read: + usage_chunk["cacheReadInputTokens"] = cache_read + if cache_write: + usage_chunk["cacheWriteInputTokens"] = cache_write return { "metadata": { - "usage": { - "inputTokens": usage["input_tokens"], - "outputTokens": usage["output_tokens"], - "totalTokens": usage["input_tokens"] + usage["output_tokens"], - }, + "usage": usage_chunk, "metrics": { "latencyMs": 0, # TODO }, diff --git a/tests/strands/models/test_anthropic.py b/tests/strands/models/test_anthropic.py index 0ebdb161c..357d58266 100644 --- a/tests/strands/models/test_anthropic.py +++ b/tests/strands/models/test_anthropic.py @@ -727,6 +727,60 @@ def test_format_chunk_metadata(model): assert tru_chunk == exp_chunk +def test_format_chunk_metadata_with_cache_tokens(model): + """When prompt caching is active, Anthropic returns cache_read_input_tokens + and cache_creation_input_tokens alongside input_tokens; surface them so + downstream cost accounting reflects what the user is billed for.""" + event = { + "type": "metadata", + "usage": { + "input_tokens": 5, + "output_tokens": 7, + "cache_read_input_tokens": 100, + "cache_creation_input_tokens": 50, + }, + } + + tru_chunk = model.format_chunk(event) + exp_chunk = { + "metadata": { + "usage": { + "inputTokens": 5, + "outputTokens": 7, + # 5 (uncached) + 100 (cache read) + 50 (cache write) + 7 (output) + "totalTokens": 162, + "cacheReadInputTokens": 100, + "cacheWriteInputTokens": 50, + }, + "metrics": { + "latencyMs": 0, + }, + }, + } + + assert tru_chunk == exp_chunk + + +def test_format_chunk_metadata_omits_zero_cache_tokens(model): + """When cache fields are absent or zero, keep the legacy chunk shape so + consumers expecting only inputTokens/outputTokens keep working.""" + event = { + "type": "metadata", + "usage": { + "input_tokens": 5, + "output_tokens": 7, + "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 0, + }, + } + + tru_chunk = model.format_chunk(event) + + assert "cacheReadInputTokens" not in tru_chunk["metadata"]["usage"] + assert "cacheWriteInputTokens" not in tru_chunk["metadata"]["usage"] + assert tru_chunk["metadata"]["usage"]["totalTokens"] == 12 + + def test_format_chunk_unknown(model): event = {"type": "unknown"}