code · pull · Apr 23, 2026 · Apr 23, 2026
diff --git a/apps/docs/integrations/ai-sdk.mdx b/apps/docs/integrations/ai-sdk.mdx
@@ -106,15 +106,15 @@ const model = withSupermemory(openai("gpt-4"), "user-123", {
 // Console output shows memory retrieval details
 ```
 
-### When Supermemory errors (optional: continue without memories)
+### When Supermemory errors (default: continue without memories)
 
-If the Supermemory API returns an error (or is unreachable), memory retrieval fails before the LLM runs. By default that error **propagates** (fails the call).
+If the Supermemory API returns an error, is unreachable, or retrieval hits the internal time limit, memory injection is skipped. **`skipMemoryOnError` defaults to `true`**, so the LLM call still runs with the **original** prompt (no injected memories). Use `verbose: true` if you want console output when that happens.
 
-To continue the LLM request **without** injected memories instead, opt in with `skipMemoryOnError: true`. Use `verbose: true` if you want console output when that happens.
+To **fail the call** when memory retrieval fails instead, set `skipMemoryOnError: false`:
 
 ```typescript
 const model = withSupermemory(openai("gpt-5"), "user-123", {
-  skipMemoryOnError: true
+  skipMemoryOnError: false
 })
 ```
 

diff --git a/packages/tools/README.md b/packages/tools/README.md
@@ -656,13 +656,17 @@ interface WithSupermemoryOptions {
   addMemory?: "always" | "never"
   /** Optional Supermemory API key. Use this in browser environments. */
   apiKey?: string
+  baseUrl?: string
+  promptTemplate?: (data: MemoryPromptData) => string
+  skipMemoryOnError?: boolean
 }
 ```
 
 - **conversationId**: Optional conversation ID to group messages into a single document for contextual memory generation
 - **verbose**: Enable detailed logging of memory search and injection process (default: false)
 - **mode**: Memory search mode - "profile" (default), "query", or "full"
 - **addMemory**: Automatic memory storage mode - "always" or "never" (default: "never")
+- **skipMemoryOnError**: If memory retrieval fails or hits the internal timeout, continue with the original prompt (default: true)
 
 ## Available Tools
 

diff --git a/packages/tools/package.json b/packages/tools/package.json
@@ -1,7 +1,7 @@
 {
   "name": "@supermemory/tools",
   "type": "module",
-  "version": "1.4.5",
+  "version": "1.4.6",
   "description": "Memory tools for AI SDK, OpenAI, Voltagent and Mastra with supermemory",
   "scripts": {
     "build": "tsdown",

diff --git a/packages/tools/src/shared/memory-client.ts b/packages/tools/src/shared/memory-client.ts
@@ -18,13 +18,15 @@ import {
  * @param queryText - Optional query text for semantic search
  * @param baseUrl - The API base URL
  * @param apiKey - The API key for authentication
+ * @param signal - Optional AbortSignal to cancel the request (e.g. retrieval timeout)
  * @returns The profile structure with static, dynamic, and search results
  */
 export const supermemoryProfileSearch = async (
 	containerTag: string,
 	queryText: string,
 	baseUrl: string,
 	apiKey: string,
+	signal?: AbortSignal,
 ): Promise<ProfileStructure> => {
 	const payload = queryText
 		? JSON.stringify({
@@ -43,6 +45,7 @@ export const supermemoryProfileSearch = async (
 				Authorization: `Bearer ${apiKey}`,
 			},
 			body: payload,
+			...(signal ? { signal } : {}),
 		})
 
 		if (!response.ok) {
@@ -72,6 +75,7 @@ export interface BuildMemoriesTextOptions {
 	apiKey: string
 	logger: Logger
 	promptTemplate?: PromptTemplate
+	signal?: AbortSignal
 }
 
 /**
@@ -92,13 +96,15 @@ export const buildMemoriesText = async (
 		apiKey,
 		logger,
 		promptTemplate = defaultPromptTemplate,
+		signal,
 	} = options
 
 	const memoriesResponse = await supermemoryProfileSearch(
 		containerTag,
 		queryText,
 		baseUrl,
 		apiKey,
+		signal,
 	)
 
 	const memoryCountStatic = memoriesResponse.profile.static?.length || 0

diff --git a/packages/tools/src/vercel/index.ts b/packages/tools/src/vercel/index.ts
@@ -12,6 +12,8 @@ import {
 } from "./middleware"
 import type { PromptTemplate, MemoryPromptData } from "./memory-prompt"
 
+const DEFAULT_MEMORY_RETRIEVAL_TIMEOUT_MS = 5000
+
 interface WrapVercelLanguageModelOptions {
 	/** Optional conversation ID to group messages for contextual memory generation */
 	conversationId?: string
@@ -51,9 +53,9 @@ interface WrapVercelLanguageModelOptions {
 	 */
 	promptTemplate?: PromptTemplate
 	/**
-	 * When Supermemory memory retrieval / injection fails:
-	 * - `false` (default): propagate the error.
-	 * - `true`: log and call the base model with the original prompt (no memories).
+	 * When Supermemory memory retrieval / injection fails or times out:
+	 * - `true` (default): log and call the base model with the original prompt (no memories).
+	 * - `false`: propagate the error (fail closed on memory).
 	 */
 	skipMemoryOnError?: boolean
 }
@@ -64,7 +66,8 @@ interface WrapVercelLanguageModelOptions {
  *
  * This wrapper searches the supermemory API for relevant memories using the container tag
  * and user message, then either appends memories to an existing system prompt or creates
- * a new system prompt with the memories.
+ * a new system prompt with the memories. Pre-LLM profile retrieval uses a fixed internal
+ * time budget and cannot be configured via options.
  *
  * Supports both Vercel AI SDK 5 (LanguageModelV2) and SDK 6 (LanguageModelV3) via runtime
  * detection of `model.specificationVersion`.
@@ -78,7 +81,7 @@ interface WrapVercelLanguageModelOptions {
  * @param options.addMemory - Optional mode for memory search: "always", "never" (default: "never")
  * @param options.apiKey - Optional Supermemory API key to use instead of the environment variable
  * @param options.baseUrl - Optional base URL for the Supermemory API (default: "https://api.supermemory.ai")
- * @param options.skipMemoryOnError - When memory retrieval fails: `false` (default) throws; `true` continues without injected memories
+ * @param options.skipMemoryOnError - When memory retrieval fails or times out: `true` (default) continues without injected memories; `false` throws
  *
  * @returns A wrapped language model that automatically includes relevant memories in prompts
  *
@@ -100,7 +103,7 @@ interface WrapVercelLanguageModelOptions {
  * ```
  *
  * @throws {Error} When neither `options.apiKey` nor `process.env.SUPERMEMORY_API_KEY` are set
- * @throws {Error} When supermemory memory retrieval fails unless `skipMemoryOnError` is `true`
+ * @throws {Error} When supermemory memory retrieval fails and `skipMemoryOnError` is `false`
  */
 const wrapVercelLanguageModel = <T extends LanguageModel>(
 	model: T,
@@ -124,9 +127,10 @@ const wrapVercelLanguageModel = <T extends LanguageModel>(
 		addMemory: options?.addMemory ?? "never",
 		baseUrl: options?.baseUrl,
 		promptTemplate: options?.promptTemplate,
+		memoryRetrievalTimeoutMs: DEFAULT_MEMORY_RETRIEVAL_TIMEOUT_MS,
 	})
 
-	const skipMemoryOnError = options?.skipMemoryOnError ?? false
+	const skipMemoryOnError = options?.skipMemoryOnError ?? true
 
 	// Proxy keeps prototype/getter fields (e.g. provider, modelId) that `{ ...model }` drops.
 	return new Proxy(model, {

diff --git a/packages/tools/src/vercel/middleware.ts b/packages/tools/src/vercel/middleware.ts
@@ -188,6 +188,8 @@ interface SupermemoryMiddlewareOptions {
 	baseUrl?: string
 	/** Custom function to format memory data into the system prompt */
 	promptTemplate?: PromptTemplate
+	/** Max wait (ms) for the pre-LLM `/v4/profile` retrieval. Omit for no limit (e.g. tests). `withSupermemory` sets this internally. */
+	memoryRetrievalTimeoutMs?: number
 }
 
 interface SupermemoryMiddlewareContext {
@@ -200,6 +202,7 @@ interface SupermemoryMiddlewareContext {
 	normalizedBaseUrl: string
 	apiKey: string
 	promptTemplate?: PromptTemplate
+	memoryRetrievalTimeoutMs?: number
 	/**
 	 * Per-turn memory cache. Stores the injected memories string for each
 	 * user turn (keyed by turnKey) to avoid redundant API calls during tool-call
@@ -219,6 +222,7 @@ export const createSupermemoryContext = (
 		addMemory = "never",
 		baseUrl,
 		promptTemplate,
+		memoryRetrievalTimeoutMs,
 	} = options
 
 	const logger = createLogger(verbose)
@@ -241,6 +245,9 @@ export const createSupermemoryContext = (
 		normalizedBaseUrl,
 		apiKey,
 		promptTemplate,
+		...(memoryRetrievalTimeoutMs !== undefined
+			? { memoryRetrievalTimeoutMs }
+			: {}),
 		memoryCache: new MemoryCache<string>(),
 	}
 }
@@ -304,15 +311,32 @@ export const transformParamsWithMemory = async (
 
 	const queryText = extractQueryText(params, ctx.mode)
 
-	const memories = await buildMemoriesText({
-		containerTag: ctx.containerTag,
-		queryText,
-		mode: ctx.mode,
-		baseUrl: ctx.normalizedBaseUrl,
-		apiKey: ctx.apiKey,
-		logger: ctx.logger,
-		promptTemplate: ctx.promptTemplate,
-	})
+	let fetchSignal: AbortSignal | undefined
+	let timeoutId: ReturnType<typeof setTimeout> | undefined
+	const timeoutMs = ctx.memoryRetrievalTimeoutMs
+	if (timeoutMs !== undefined && timeoutMs > 0) {
+		const controller = new AbortController()
+		fetchSignal = controller.signal
+		timeoutId = setTimeout(() => controller.abort(), timeoutMs)
+	}
+
+	let memories: string
+	try {
+		memories = await buildMemoriesText({
+			containerTag: ctx.containerTag,
+			queryText,
+			mode: ctx.mode,
+			baseUrl: ctx.normalizedBaseUrl,
+			apiKey: ctx.apiKey,
+			logger: ctx.logger,
+			promptTemplate: ctx.promptTemplate,
+			...(fetchSignal ? { signal: fetchSignal } : {}),
+		})
+	} finally {
+		if (timeoutId !== undefined) {
+			clearTimeout(timeoutId)
+		}
+	}
 
 	ctx.memoryCache.set(turnKey, memories)
 	ctx.logger.debug("Cached memories for turn", { turnKey })

diff --git a/packages/tools/test/with-supermemory/integration.test.ts b/packages/tools/test/with-supermemory/integration.test.ts
@@ -578,6 +578,32 @@ describe.skipIf(!shouldRunIntegration)(
 			})
 
 			it("should handle invalid API key gracefully", async () => {
+				const { model, getCapturedGenerateParams } =
+					createIntegrationMockModel()
+
+				const wrapped = withSupermemory(
+					model,
+					INTEGRATION_CONFIG.containerTag,
+					{
+						apiKey: "invalid-api-key-12345",
+						mode: "profile",
+					},
+				)
+
+				await wrapped.doGenerate({
+					prompt: [
+						{
+							role: "user",
+							content: [{ type: "text", text: "Invalid key test" }],
+						},
+					],
+				})
+
+				const captured = getCapturedGenerateParams()
+				expect(captured?.prompt[0]?.role).toBe("user")
+			})
+
+			it("should reject on invalid API key when skipMemoryOnError is false", async () => {
 				const { model } = createIntegrationMockModel()
 
 				const wrapped = withSupermemory(
@@ -586,6 +612,7 @@ describe.skipIf(!shouldRunIntegration)(
 					{
 						apiKey: "invalid-api-key-12345",
 						mode: "profile",
+						skipMemoryOnError: false,
 					},
 				)
 
@@ -594,7 +621,7 @@ describe.skipIf(!shouldRunIntegration)(
 						prompt: [
 							{
 								role: "user",
-								content: [{ type: "text", text: "Invalid key test" }],
+								content: [{ type: "text", text: "Invalid key strict test" }],
 							},
 						],
 					}),

diff --git a/packages/tools/test/with-supermemory/unit.test.ts b/packages/tools/test/with-supermemory/unit.test.ts
@@ -383,4 +383,115 @@ describe("Unit: withSupermemory", () => {
 			).toBe("Last")
 		})
 	})
+
+	describe("Wrapper retrieval resilience", () => {
+		let fetchMock: ReturnType<typeof vi.fn>
+
+		beforeEach(() => {
+			process.env.SUPERMEMORY_API_KEY = "test-key"
+			fetchMock = vi.fn()
+			globalThis.fetch = fetchMock as unknown as typeof fetch
+			vi.clearAllMocks()
+		})
+
+		it("continues without memories when profile fetch fails (default skip)", async () => {
+			fetchMock.mockResolvedValue({
+				ok: false,
+				status: 500,
+				statusText: "Internal Server Error",
+				text: () => Promise.resolve("err"),
+			})
+
+			const inner = createMockLanguageModel()
+			vi.mocked(inner.doGenerate).mockResolvedValue({
+				content: [{ type: "text", text: "ok" }],
+				finishReason: "stop",
+				usage: {
+					inputTokens: 1,
+					outputTokens: 1,
+				},
+				rawCall: { rawPrompt: [], rawSettings: {} },
+				warnings: [],
+			})
+
+			const wrapped = withSupermemory(inner, TEST_CONFIG.containerTag, {
+				apiKey: "k",
+			})
+
+			const params: LanguageModelV2CallOptions = {
+				prompt: [{ role: "user", content: [{ type: "text", text: "Hi" }] }],
+			}
+
+			await wrapped.doGenerate(params)
+
+			expect(inner.doGenerate).toHaveBeenCalledWith(params)
+		})
+
+		it("throws when skipMemoryOnError is false and profile fetch fails", async () => {
+			fetchMock.mockResolvedValue({
+				ok: false,
+				status: 500,
+				statusText: "Internal Server Error",
+				text: () => Promise.resolve("err"),
+			})
+
+			const inner = createMockLanguageModel()
+			const wrapped = withSupermemory(inner, TEST_CONFIG.containerTag, {
+				apiKey: "k",
+				skipMemoryOnError: false,
+			})
+
+			await expect(
+				wrapped.doGenerate({
+					prompt: [{ role: "user", content: [{ type: "text", text: "Hi" }] }],
+				}),
+			).rejects.toThrow("Supermemory profile search failed")
+		})
+
+		it("aborts slow profile fetch after internal timeout and continues by default", async () => {
+			fetchMock.mockImplementation((_url: string, init?: RequestInit) => {
+				return new Promise((_resolve, reject) => {
+					const sig = init?.signal
+					if (!sig) return
+					if (sig.aborted) {
+						reject(new DOMException("Aborted", "AbortError"))
+						return
+					}
+					sig.addEventListener("abort", () => {
+						reject(new DOMException("Aborted", "AbortError"))
+					})
+				})
+			})
+
+			const inner = createMockLanguageModel()
+			vi.mocked(inner.doGenerate).mockResolvedValue({
+				content: [{ type: "text", text: "ok" }],
+				finishReason: "stop",
+				usage: {
+					inputTokens: 1,
+					outputTokens: 1,
+				},
+				rawCall: { rawPrompt: [], rawSettings: {} },
+				warnings: [],
+			})
+
+			const wrapped = withSupermemory(inner, TEST_CONFIG.containerTag, {
+				apiKey: "k",
+			})
+
+			vi.useFakeTimers()
+			try {
+				const params: LanguageModelV2CallOptions = {
+					prompt: [{ role: "user", content: [{ type: "text", text: "Hi" }] }],
+				}
+				const genPromise = wrapped.doGenerate(params)
+				await vi.advanceTimersByTimeAsync(5000)
+				await genPromise
+
+				expect(inner.doGenerate).toHaveBeenCalledWith(params)
+			} finally {
+				vi.useRealTimers()
+			}
+		})
+	})
 })