skip token-count request for deepseek flash freebuff

jahooma · jahooma · commit 4ef7e801025a · 2026-05-12T12:05:26.000-07:00
diff --git a/common/src/__tests__/free-agents.test.ts b/common/src/__tests__/free-agents.test.ts
@@ -12,6 +12,7 @@ import {
   getFreebuffRootAgentIdForModel,
   isFreebuffGeminiThinkerAgent,
   isFreeModeAllowedAgentModel,
+  shouldUseLocalTokenCountForFreebuffDeepseekFlash,
 } from '../constants/free-agents'
 
 describe('free mode agent model allowlist', () => {
@@ -168,4 +169,37 @@ describe('free mode agent model allowlist', () => {
       ),
     ).toBe(false)
   })
+
+  test('uses local token count only for the DeepSeek Flash freebuff root', () => {
+    expect(
+      shouldUseLocalTokenCountForFreebuffDeepseekFlash({
+        agentId: 'base2-free-deepseek-flash',
+        model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
+      }),
+    ).toBe(true)
+    expect(
+      shouldUseLocalTokenCountForFreebuffDeepseekFlash({
+        agentId: 'codebuff/base2-free-deepseek-flash@0.0.1',
+        model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
+      }),
+    ).toBe(true)
+    expect(
+      shouldUseLocalTokenCountForFreebuffDeepseekFlash({
+        agentId: 'base2-free-deepseek',
+        model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
+      }),
+    ).toBe(false)
+    expect(
+      shouldUseLocalTokenCountForFreebuffDeepseekFlash({
+        agentId: 'base2-free-deepseek-flash',
+        model: FREEBUFF_DEEPSEEK_V4_PRO_MODEL_ID,
+      }),
+    ).toBe(false)
+    expect(
+      shouldUseLocalTokenCountForFreebuffDeepseekFlash({
+        agentId: 'other/base2-free-deepseek-flash@0.0.1',
+        model: FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID,
+      }),
+    ).toBe(false)
+  })
 })
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
@@ -161,6 +161,20 @@ export function isFreebuffGeminiThinkerAgent(fullAgentId: string): boolean {
   return agentId === FREEBUFF_GEMINI_THINKER_AGENT_ID
 }
 
+export function shouldUseLocalTokenCountForFreebuffDeepseekFlash(params: {
+  agentId: string | undefined
+  model: string | undefined
+}): boolean {
+  const { agentId: fullAgentId, model } = params
+  if (!fullAgentId || model !== FREEBUFF_DEEPSEEK_V4_FLASH_MODEL_ID) {
+    return false
+  }
+
+  const { publisherId, agentId } = parseAgentId(fullAgentId)
+  if (publisherId && publisherId !== 'codebuff') return false
+  return agentId === 'base2-free-deepseek-flash'
+}
+
 /**
  * Check if a specific agent is allowed to use a specific model in FREE mode.
  * This is the strictest check - validates both the agent AND model combination.
diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts
@@ -1,4 +1,5 @@
 import { AnalyticsEvent } from '@codebuff/common/constants/analytics-events'
+import { shouldUseLocalTokenCountForFreebuffDeepseekFlash } from '@codebuff/common/constants/free-agents'
 import { supportsCacheControl } from '@codebuff/common/old-constants'
 import { TOOLS_WHICH_WONT_FORCE_NEXT_STEP } from '@codebuff/common/tools/constants'
 import { buildArray } from '@codebuff/common/util/array'
@@ -864,29 +865,42 @@ export async function loopAgentSteps(
         }),
       )
 
-      // Check context token count via Anthropic API
-      const tokenCountResult = await callTokenCountAPI({
-        messages: messagesWithStepPrompt,
-        system,
-        model: agentTemplate.model,
-        tools: toolsForTokenCount,
-        fetch,
-        logger,
-        env: { clientEnv, ciEnv },
-      })
-      if (tokenCountResult.inputTokens !== undefined) {
-        currentAgentState.contextTokenCount = tokenCountResult.inputTokens
-      } else if (tokenCountResult.error) {
-        logger.warn(
-          { error: tokenCountResult.error },
-          'Failed to get token count from Anthropic API',
-        )
-        // Fall back to local estimate
-        const estimatedTokens =
-          countTokensJson(currentAgentState.messageHistory) +
-          countTokensJson(system) +
-          countTokensJson(toolDefinitions)
-        currentAgentState.contextTokenCount = estimatedTokens
+      const estimateContextTokensLocally = () =>
+        countTokensJson(messagesWithStepPrompt) +
+        countTokensJson(system) +
+        countTokensJson(toolsForTokenCount)
+
+      if (
+        shouldUseLocalTokenCountForFreebuffDeepseekFlash({
+          agentId: agentTemplate.id,
+          model: agentTemplate.model,
+        })
+      ) {
+        currentAgentState.contextTokenCount = estimateContextTokensLocally()
+      } else {
+        // Check context token count via the web API.
+        const tokenCountResult = await callTokenCountAPI({
+          messages: messagesWithStepPrompt,
+          system,
+          model: agentTemplate.model,
+          tools: toolsForTokenCount,
+          fetch,
+          logger,
+          env: { clientEnv, ciEnv },
+        })
+        if (tokenCountResult.inputTokens !== undefined) {
+          currentAgentState.contextTokenCount = tokenCountResult.inputTokens
+        } else if (tokenCountResult.error) {
+          logger.warn(
+            { error: tokenCountResult.error },
+            'Failed to get token count from web API',
+          )
+          const estimatedTokens =
+            countTokensJson(currentAgentState.messageHistory) +
+            countTokensJson(system) +
+            countTokensJson(toolDefinitions)
+          currentAgentState.contextTokenCount = estimatedTokens
+        }
       }
 
       // 1. Run programmatic step first if it exists