From 02787c2512618bdd47a197c089e01e7c582bf70d Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Wed, 3 Jun 2026 16:24:01 +0200 Subject: [PATCH 1/4] retry mechanism with retry breaking system --- lib/OnyxUtils.ts | 59 +++---- lib/storage/errors.ts | 76 +++++++++ .../IDBKeyValProvider/createStore.ts | 76 +++------ tests/unit/onyxUtilsTest.ts | 16 +- .../unit/storage/providers/createStoreTest.ts | 158 +++++++++++------- 5 files changed, 227 insertions(+), 158 deletions(-) create mode 100644 lib/storage/errors.ts diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index c5c277cd7..dbf166cb4 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -6,8 +6,8 @@ import * as Logger from './Logger'; import type Onyx from './Onyx'; import cache, {TASK} from './OnyxCache'; import OnyxKeys from './OnyxKeys'; -import * as Str from './Str'; import Storage from './storage'; +import {StorageErrorClass, classifyStorageError} from './storage/errors'; import type { CollectionKeyBase, ConnectOptions, @@ -49,26 +49,6 @@ const METHOD = { CLEAR: 'clear', } as const; -// IndexedDB errors that indicate storage capacity issues where eviction can help -const IDB_STORAGE_ERRORS = [ - 'quotaexceedederror', // Browser storage quota exceeded -] as const; - -// SQLite errors that indicate storage capacity issues where eviction can help -const SQLITE_STORAGE_ERRORS = [ - 'database or disk is full', // Device storage is full -] as const; - -const STORAGE_ERRORS = [...IDB_STORAGE_ERRORS, ...SQLITE_STORAGE_ERRORS]; - -// IndexedDB errors where retrying is futile because the underlying connection/store is broken. -// The healing path (separate from retryOperation) is responsible for recovery. -const IDB_NON_RETRIABLE_ERRORS = [ - 'internal error opening backing store', // LevelDB backing store is broken at the filesystem level -] as const; - -const NON_RETRIABLE_ERRORS = [...IDB_NON_RETRIABLE_ERRORS]; - // Max number of retries for failed storage operations const MAX_STORAGE_OPERATION_RETRY_ATTEMPTS = 5; @@ -791,8 +771,11 @@ function remove(key: TKey, isProcessingCollectionUpdate?: function reportStorageQuota(error?: Error): Promise { return Storage.getDatabaseSize() .then(({bytesUsed, bytesRemaining, usageDetails}) => { + // `bytesRemaining` comes from navigator.storage.estimate() and is an ORIGIN-WIDE estimate, + // not headroom for this database. The browser allocates IndexedDB storage dynamically, so a + // QuotaExceededError can legitimately occur even when this number still looks large. Logger.logInfo( - `Storage Quota Check -- bytesUsed: ${bytesUsed} bytesRemaining: ${bytesRemaining}${ + `Storage Quota Check -- bytesUsed: ${bytesUsed} originWideBytesRemaining (estimate, not per-DB headroom): ${bytesRemaining}${ usageDetails ? ` usageDetails: ${JSON.stringify(usageDetails)}` : '' }. Original error: ${error}`, ); @@ -803,39 +786,39 @@ function reportStorageQuota(error?: Error): Promise { } /** - * Handles storage operation failures based on the error type: - * - Storage capacity errors: evicts data and retries the operation - * - Invalid data errors: logs an alert and throws an error - * - Non-retriable errors: logs an alert and resolves without retrying - * - Other errors: retries the operation + * Handles storage operation failures based on the error class (see lib/storage/errors.ts). + * The connection layer (createStore) owns connection/transport recovery; this operation layer owns + * capacity recovery (eviction) so that a given failure is retried by exactly one layer: + * - INVALID_DATA: logs an alert and throws (the same data will always fail). + * - TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget + * and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly. + * - CAPACITY: evicts the least recently accessed evictable key and retries. + * - UNKNOWN: bounded retry. */ function retryOperation(error: Error, onyxMethod: TMethod, defaultParams: Parameters[0], retryAttempt: number | undefined): Promise { const currentRetryAttempt = retryAttempt ?? 0; const nextRetryAttempt = currentRetryAttempt + 1; + const errorClass = classifyStorageError(error); - Logger.logInfo(`Failed to save to storage. Error: ${error}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`); + Logger.logInfo(`Failed to save to storage. Error: ${error}. class: ${errorClass}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`); - if (error && Str.startsWith(error.message, "Failed to execute 'put' on 'IDBObjectStore'")) { + if (errorClass === StorageErrorClass.INVALID_DATA) { Logger.logAlert(`Attempted to set invalid data set in Onyx. Please ensure all data is serializable. Error: ${error}`); throw error; } - const errorMessage = error?.message?.toLowerCase?.(); - const errorName = error?.name?.toLowerCase?.(); - const isStorageCapacityError = STORAGE_ERRORS.some((storageError) => errorName?.includes(storageError) || errorMessage?.includes(storageError)); - const isNonRetriableError = NON_RETRIABLE_ERRORS.some((nonRetriableError) => errorName?.includes(nonRetriableError) || errorMessage?.includes(nonRetriableError)); - - if (isNonRetriableError) { - Logger.logAlert(`Storage operation skipped retry for non-retriable error. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); + if (errorClass === StorageErrorClass.TRANSIENT || errorClass === StorageErrorClass.FATAL) { + Logger.logInfo(`Storage operation skipped retry; ${errorClass} errors are handled by the connection layer. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); return Promise.resolve(); } if (nextRetryAttempt > MAX_STORAGE_OPERATION_RETRY_ATTEMPTS) { - Logger.logAlert(`Storage operation failed after 5 retries. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); + Logger.logAlert(`Storage operation failed after ${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS} retries. Error: ${error}. onyxMethod: ${onyxMethod.name}.`); return Promise.resolve(); } - if (!isStorageCapacityError) { + if (errorClass !== StorageErrorClass.CAPACITY) { + // UNKNOWN error — bounded retry without eviction. // @ts-expect-error No overload matches this call. return onyxMethod(defaultParams, nextRetryAttempt); } diff --git a/lib/storage/errors.ts b/lib/storage/errors.ts new file mode 100644 index 000000000..7318b5a3e --- /dev/null +++ b/lib/storage/errors.ts @@ -0,0 +1,76 @@ +import type {ValueOf} from 'type-fest'; + +/** + * Single source of truth for classifying storage (IndexedDB / SQLite) write failures. + * + * Both layers that react to storage errors consult this: + * - the connection layer (`createStore`) recovers TRANSIENT and FATAL errors by reopening the DB, and + * - the operation layer (`OnyxUtils.retryOperation`) recovers CAPACITY by eviction and retries UNKNOWN. + * + * Keeping the matchers here (instead of duplicated string lists in each layer) guarantees the two + * layers agree on what an error *is*, even though they react to it differently. This module has no + * Onyx dependencies so it can live in the storage layer without creating an import cycle. + */ +const StorageErrorClass = { + /** Connection/transport failure (stale connection). Owner: connection layer — reopen + retry once. */ + TRANSIENT: 'transient', + /** Quota exceeded / disk full. Owner: operation layer — evict and retry. */ + CAPACITY: 'capacity', + /** Non-serializable payload. Never retriable — the same data will always fail. */ + INVALID_DATA: 'invalidData', + /** Backing-store corruption. Owner: connection layer — budgeted heal, then give up. */ + FATAL: 'fatal', + /** Unmatched. Owner: operation layer — bounded retry. */ + UNKNOWN: 'unknown', +} as const; + +type StorageErrorClassValue = ValueOf; + +function getErrorParts(error: unknown): {name: string; message: string} { + if (error instanceof Error || error instanceof DOMException) { + return {name: (error.name ?? '').toLowerCase(), message: (error.message ?? '').toLowerCase()}; + } + return {name: '', message: String(error ?? '').toLowerCase()}; +} + +/** + * Classifies a storage write error into one of the {@link StorageErrorClass} buckets. + * Matching is done on the lowercased error name and message. + */ +function classifyStorageError(error: unknown): StorageErrorClassValue { + const {name, message} = getErrorParts(error); + + // Non-serializable data passed to IDBObjectStore.put — retrying is futile. + if (message.includes("failed to execute 'put' on 'idbobjectstore'")) { + return StorageErrorClass.INVALID_DATA; + } + + // Storage capacity: browser quota exceeded (IDB) or device disk full (SQLite). + if (name.includes('quotaexceedederror') || message.includes('quotaexceedederror') || message.includes('database or disk is full')) { + return StorageErrorClass.CAPACITY; + } + + // Backing-store corruption (Chromium LevelDB). Recoverable only via a budgeted reopen. + if (message.includes('internal error opening backing store')) { + return StorageErrorClass.FATAL; + } + + // Transient connection/transport failures — the cached connection is stale and a reopen fixes it: + // - InvalidStateError: connection closed between getDB() resolving and db.transaction(). + // - AbortError: write transaction aborted (connection close / versionchange / sibling abort). + // - Safari/WebKit IDB server termination for backgrounded tabs. + if ( + name.includes('invalidstateerror') || + name.includes('aborterror') || + message.includes('connection to indexed database server lost') || + message.includes('connection is closing') || + message.includes('idb write transaction aborted without an error') + ) { + return StorageErrorClass.TRANSIENT; + } + + return StorageErrorClass.UNKNOWN; +} + +export {StorageErrorClass, classifyStorageError}; +export type {StorageErrorClassValue}; diff --git a/lib/storage/providers/IDBKeyValProvider/createStore.ts b/lib/storage/providers/IDBKeyValProvider/createStore.ts index 0d3f07913..cb5b36267 100644 --- a/lib/storage/providers/IDBKeyValProvider/createStore.ts +++ b/lib/storage/providers/IDBKeyValProvider/createStore.ts @@ -1,44 +1,10 @@ import * as IDB from 'idb-keyval'; import type {UseStore} from 'idb-keyval'; import * as Logger from '../../../Logger'; +import {StorageErrorClass, classifyStorageError} from '../../errors'; const HEAL_ATTEMPTS_MAX = 3; -/** - * Detects the Chromium-specific IDB backing store corruption error. - * Fires when LevelDB files backing IndexedDB are corrupted and Chrome's - * internal recovery (RepairDB -> delete -> recreate) also fails. - */ -function isBackingStoreError(error: unknown): boolean { - return (error instanceof Error || error instanceof DOMException) && (error as Error).message.includes('Internal error opening backing store'); -} - -/** - * Detects Safari/WebKit IDB connection termination errors. - * Fires when Safari kills the IDB server process for backgrounded tabs. - * WebKit bugs: https://bugs.webkit.org/show_bug.cgi?id=197050, https://bugs.webkit.org/show_bug.cgi?id=201483 - */ -function isConnectionLostError(error: unknown): boolean { - if (!(error instanceof Error || error instanceof DOMException)) return false; - const msg = (error as Error).message.toLowerCase(); - return msg.includes('connection to indexed database server lost') || msg.includes('connection is closing'); -} - -function isInvalidStateError(error: unknown): boolean { - return (error instanceof Error || error instanceof DOMException) && (error as Error).name === 'InvalidStateError'; -} - -/** Errors that trigger a budgeted heal-and-retry in store(). */ -function isBudgetedHealError(error: unknown): boolean { - return isBackingStoreError(error) || isConnectionLostError(error); -} - -function getBudgetedHealErrorLabel(error: unknown): string { - if (isBackingStoreError(error)) return 'backing store'; - if (isConnectionLostError(error)) return 'connection lost'; - return 'unknown'; -} - // This is a copy of the createStore function from idb-keyval, we need a custom implementation // because we need to create the database manually in order to ensure that the store exists before we use it. // If the store does not exist, idb-keyval will throw an error @@ -127,22 +93,27 @@ function createStore(dbName: string, storeName: string): UseStore { return result; } - // Handles three recoverable error classes: - // 1. InvalidStateError — connection closed between getDB() resolving and db.transaction(). - // Retry once with a fresh connection. No budget limit (transient, always worth one reopen). - // 2. Backing store corruption (Chromium UnknownError) — drop cached connection and reopen. - // 3. Connection lost (Safari UnknownError) — IDB server terminated for backgrounded tabs. - // Both 2 and 3 share a heal budget (3 attempts, reset on success). - // Mirrors Dexie's PR1398_maxLoop pattern: https://github.com/dexie/Dexie.js/blob/master/src/functions/temp-transaction.ts - // Note: concurrent store() calls share the budget. Under overlapping failures each caller + // The connection layer owns recovery for connection/transport failures. It reacts per the shared + // error taxonomy (see lib/storage/errors.ts): + // - TRANSIENT (InvalidStateError, AbortError, Safari connection lost) — the cached connection is + // stale. Drop it and retry once with a fresh one. Unbudgeted: a single reopen is always worth it + // and is bounded per operation. + // - FATAL (Chromium backing-store corruption) — reopening can recover transient corruption, but + // repeating forever is futile, so the heal is budgeted (3 attempts, reset on success). + // Mirrors Dexie's PR1398_maxLoop pattern: https://github.com/dexie/Dexie.js/blob/master/src/functions/temp-transaction.ts + // - CAPACITY / UNKNOWN are NOT the connection layer's responsibility — propagate to the operation + // layer (OnyxUtils.retryOperation) without retrying here, to avoid compounding retries. + // Note: concurrent store() calls share the heal budget. Under overlapping failures each caller // decrements independently, so the budget may drain faster than one-per-incident. This is // acceptable — same as Dexie's approach — and the budget resets on any success. return (txMode, callback) => executeTransaction(txMode, callback) .then(resetHealBudget) .catch((error) => { - if (isInvalidStateError(error)) { - Logger.logInfo('IDB InvalidStateError — dropping cached connection and retrying', { + const errorClass = classifyStorageError(error); + + if (errorClass === StorageErrorClass.TRANSIENT) { + Logger.logInfo('IDB transient error — dropping cached connection and retrying once', { dbName, storeName, txMode, @@ -152,29 +123,30 @@ function createStore(dbName: string, storeName: string): UseStore { return executeTransaction(txMode, callback).then(resetHealBudget); } - if (isBudgetedHealError(error) && healAttemptsRemaining > 0) { + if (errorClass === StorageErrorClass.FATAL && healAttemptsRemaining > 0) { healAttemptsRemaining--; - const label = getBudgetedHealErrorLabel(error); - Logger.logInfo(`IDB heal: ${label} error detected — dropping cached connection and reopening (${healAttemptsRemaining} attempts left)`, { + Logger.logInfo(`IDB heal: backing store error detected — dropping cached connection and reopening (${healAttemptsRemaining} attempts left)`, { dbName, storeName, }); dbp = undefined; return executeTransaction(txMode, callback).then((result) => { - Logger.logInfo(`IDB heal: successfully recovered after ${label} error`, {dbName, storeName}); + Logger.logInfo('IDB heal: successfully recovered after backing store error', {dbName, storeName}); return resetHealBudget(result); }); } - if (isBudgetedHealError(error)) { - Logger.logAlert(`IDB heal: ${getBudgetedHealErrorLabel(error)} error — heal budget exhausted, giving up`, { + if (errorClass === StorageErrorClass.FATAL) { + Logger.logAlert('IDB heal: backing store error — heal budget exhausted, giving up', { dbName, storeName, }); } else { - Logger.logAlert('IDB error is not recoverable, giving up', { + // CAPACITY / UNKNOWN — let the operation layer decide (evict or bounded retry). + Logger.logInfo('IDB error not recoverable at the connection layer, propagating', { dbName, storeName, + errorClass, errorMessage: error instanceof Error ? error.message : String(error), }); } diff --git a/tests/unit/onyxUtilsTest.ts b/tests/unit/onyxUtilsTest.ts index 5be3b93da..d3a9c7baf 100644 --- a/tests/unit/onyxUtilsTest.ts +++ b/tests/unit/onyxUtilsTest.ts @@ -764,15 +764,17 @@ describe('OnyxUtils', () => { expect(retryOperationSpy).toHaveBeenCalledTimes(1); }); - it('should log a single skip alert for non-retriable errors', async () => { + it('should skip retry quietly (info, not alert) for fatal connection-layer errors', async () => { const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + const logInfoSpy = jest.spyOn(Logger, 'logInfo'); StorageMock.setItem = jest.fn().mockRejectedValue(nonRetriableIdbError); await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); - expect(logAlertSpy).toHaveBeenCalledWith(`Storage operation skipped retry for non-retriable error. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`); - // Not paired with the "5 retries exhausted" alert - expect(logAlertSpy).toHaveBeenCalledTimes(1); + // The connection layer (createStore) owns and alerts on fatal errors; the operation layer + // just skips the retry at info level. No alert here, and no "5 retries exhausted" alert. + expect(logInfoSpy).toHaveBeenCalledWith(`Storage operation skipped retry; fatal errors are handled by the connection layer. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`); + expect(logAlertSpy).not.toHaveBeenCalled(); }); it('should include the error in logAlert for IDBObjectStore invalid data errors', async () => { @@ -792,7 +794,7 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logAlertSpy).toHaveBeenCalledWith(`Out of storage. But found no acceptable keys to remove. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 bytesRemaining: Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`); }); it('should include usageDetails in the storage quota log when available', async () => { @@ -804,7 +806,7 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith( - `Storage Quota Check -- bytesUsed: 13289269 bytesRemaining: 5000000 usageDetails: ${JSON.stringify(usageDetails)}. Original error: ${diskFullError}`, + `Storage Quota Check -- bytesUsed: 13289269 originWideBytesRemaining (estimate, not per-DB headroom): 5000000 usageDetails: ${JSON.stringify(usageDetails)}. Original error: ${diskFullError}`, ); }); @@ -1347,7 +1349,7 @@ describe('OnyxUtils', () => { await LocalOnyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith(`Out of storage. Evicting least recently accessed key (${key1}) and retrying. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 bytesRemaining: Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`); }); }); diff --git a/tests/unit/storage/providers/createStoreTest.ts b/tests/unit/storage/providers/createStoreTest.ts index bb6530c3b..3ca68f587 100644 --- a/tests/unit/storage/providers/createStoreTest.ts +++ b/tests/unit/storage/providers/createStoreTest.ts @@ -77,7 +77,7 @@ describe('createStore', () => { }); await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow(DOMException); - expect(logInfoSpy).toHaveBeenCalledWith(expect.stringContaining('IDB InvalidStateError'), expect.anything()); + expect(logInfoSpy).toHaveBeenCalledWith(expect.stringContaining('IDB transient error'), expect.anything()); }); it('should not retry on non-InvalidStateError DOMException', async () => { @@ -96,8 +96,11 @@ describe('createStore', () => { await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow(DOMException); expect(callCount).toBe(1); - expect(logAlertSpy).toHaveBeenCalledWith('IDB error is not recoverable, giving up', expect.objectContaining({errorMessage: 'Not found'})); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection'), expect.anything()); + expect(logInfoSpy).toHaveBeenCalledWith( + 'IDB error not recoverable at the connection layer, propagating', + expect.objectContaining({errorMessage: 'Not found', errorClass: 'unknown'}), + ); + expect(logAlertSpy).not.toHaveBeenCalled(); }); it('should not retry on non-DOMException errors', async () => { @@ -116,8 +119,11 @@ describe('createStore', () => { await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow(TypeError); expect(callCount).toBe(1); - expect(logAlertSpy).toHaveBeenCalledWith('IDB error is not recoverable, giving up', expect.objectContaining({errorMessage: 'Something went wrong'})); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection'), expect.anything()); + expect(logInfoSpy).toHaveBeenCalledWith( + 'IDB error not recoverable at the connection layer, propagating', + expect.objectContaining({errorMessage: 'Something went wrong', errorClass: 'unknown'}), + ); + expect(logAlertSpy).not.toHaveBeenCalled(); }); it('should preserve data integrity after a successful retry', async () => { @@ -176,7 +182,7 @@ describe('createStore', () => { return IDB.promisifyRequest(s.transaction); }); - expect(logInfoSpy).toHaveBeenCalledWith('IDB InvalidStateError — dropping cached connection and retrying', { + expect(logInfoSpy).toHaveBeenCalledWith('IDB transient error — dropping cached connection and retrying once', { dbName, storeName: STORE_NAME, txMode: 'readwrite', @@ -412,24 +418,29 @@ describe('createStore', () => { jest.restoreAllMocks(); logAlertSpy = jest.spyOn(Logger, 'logAlert'); + logInfoSpy = jest.spyOn(Logger, 'logInfo'); - // QuotaExceededError + // QuotaExceededError — a CAPACITY error the connection layer does not own; it propagates + // to the operation layer (OnyxUtils.retryOperation) which handles eviction. jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { throw new DOMException('Quota exceeded', 'QuotaExceededError'); }); await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow('Quota exceeded'); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection and reopening'), expect.anything()); - expect(logAlertSpy).toHaveBeenCalledWith('IDB error is not recoverable, giving up', expect.objectContaining({errorMessage: 'Quota exceeded'})); + expect(logAlertSpy).not.toHaveBeenCalled(); + expect(logInfoSpy).toHaveBeenCalledWith( + 'IDB error not recoverable at the connection layer, propagating', + expect.objectContaining({errorMessage: 'Quota exceeded', errorClass: 'capacity'}), + ); }); }); - describe('connection lost healing', () => { + describe('connection lost recovery (transient, unbudgeted)', () => { function connectionLostError() { return new DOMException('Connection to Indexed Database server lost. Refresh the page to try again', 'UnknownError'); } - it('should heal by dropping cached connection and reopening', async () => { + it('should recover by dropping cached connection and retrying once', async () => { const store = createStore(uniqueDBName(), STORE_NAME); await store('readwrite', (s) => { @@ -451,19 +462,17 @@ describe('createStore', () => { expect(result).toBe('value'); expect(callCount).toBe(2); expect(logInfoSpy).toHaveBeenCalledWith( - expect.stringContaining('connection lost error detected — dropping cached connection and reopening'), + 'IDB transient error — dropping cached connection and retrying once', expect.objectContaining({dbName: expect.any(String)}), ); - expect(logInfoSpy).toHaveBeenCalledWith('IDB heal: successfully recovered after connection lost error', expect.objectContaining({dbName: expect.any(String)})); }); - it('should stop healing after budget exhausts', async () => { + it('should not be budgeted — reopens on every call without ever exhausting a budget', async () => { const store = createStore(uniqueDBName(), STORE_NAME); // All transaction calls fail permanently with connection lost. - // The heal path clears dbp and calls indexedDB.open() again — mock that to - // also fail so fake-indexeddb doesn't deadlock waiting for the old connection - // to close (same pattern as the backing store budget exhaustion test). + // The transient path clears dbp and calls indexedDB.open() again — mock that to + // also fail so fake-indexeddb doesn't deadlock waiting for the old connection to close. jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { throw connectionLostError(); }); @@ -476,19 +485,15 @@ describe('createStore', () => { return req; }); - // Each call drains 1 heal attempt (initial fails, heal retry also fails) - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - - // Budget exhausted — 4th call should NOT attempt healing, but should log budget exhausted - logAlertSpy.mockClear(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); - expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('heal budget exhausted'), expect.anything()); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection and reopening'), expect.anything()); + // Many calls — each attempts a single reopen and propagates. No budget, so it never logs + // "heal budget exhausted" no matter how many times it fails. + for (let i = 0; i < 5; i++) { + await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow('Connection to Indexed Database server lost'); + } + expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('heal budget exhausted'), expect.anything()); }); - it('should also heal "connection is closing" variant', async () => { + it('should also recover the "connection is closing" variant', async () => { const store = createStore(uniqueDBName(), STORE_NAME); await store('readwrite', (s) => { @@ -511,46 +516,77 @@ describe('createStore', () => { expect(callCount).toBe(2); }); - it('should share heal budget with backing store errors', async () => { + it('should not consume the backing-store heal budget', async () => { const store = createStore(uniqueDBName(), STORE_NAME); - // All transaction calls fail permanently, alternating error types. - // The heal path clears dbp and calls indexedDB.open() again — mock that to - // also fail so fake-indexeddb doesn't deadlock waiting for the old connection - // to close. - const txErrors = [ - new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'), - connectionLostError(), - new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'), - ]; - let txErrorIndex = 0; - jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { - const err = txErrors[Math.min(txErrorIndex, txErrors.length - 1)]; - txErrorIndex++; - throw err; + await store('readwrite', (s) => { + s.put('value', 'key1'); + return IDB.promisifyRequest(s.transaction); }); - jest.spyOn(indexedDB, 'open').mockImplementation(() => { - const req = {} as IDBOpenDBRequest; - Promise.resolve().then(() => { - Object.defineProperty(req, 'error', { - value: new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'), - configurable: true, - }); - req.onerror?.(new Event('error') as Event & {target: IDBOpenDBRequest}); + + const original = IDBDatabase.prototype.transaction; + const backingStoreError = () => new DOMException('Internal error opening backing store for indexedDB.open.', 'UnknownError'); + + // Connection-lost errors are transient and must NOT decrement the backing-store budget. + // Fail with connection-lost on the first transaction of 4 separate operations (each recovers + // on its single reopen). If they wrongly shared the budget, the backing-store budget (3) + // would be drained; afterwards a backing-store error must still heal. + for (let i = 0; i < 4; i++) { + let callCount = 0; + const spy = jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(function (this: IDBDatabase, ...args) { + callCount++; + if (callCount === 1) { + throw connectionLostError(); + } + spy.mockRestore(); + return original.apply(this, args); }); - return req; + await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); + } + + // A backing-store error still heals — proving the budget was untouched by the 4 transient failures. + let backingCallCount = 0; + const backingSpy = jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(function (this: IDBDatabase, ...args) { + backingCallCount++; + if (backingCallCount === 1) { + throw backingStoreError(); + } + backingSpy.mockRestore(); + return original.apply(this, args); }); + const result = await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); + expect(result).toBe('value'); + expect(logInfoSpy).toHaveBeenCalledWith( + expect.stringContaining('backing store error detected — dropping cached connection and reopening (2 attempts left)'), + expect.objectContaining({dbName: expect.any(String)}), + ); + }); + }); - // 3 calls drain the budget (each call: fail + heal retry fail = 1 budget) - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); + describe('AbortError recovery (transient)', () => { + it('should treat a normalized write-abort AbortError as transient and retry once', async () => { + const store = createStore(uniqueDBName(), STORE_NAME); - // Budget exhausted — no more healing for either error type - logAlertSpy.mockClear(); - await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('k')))).rejects.toThrow(); - expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('heal budget exhausted'), expect.anything()); - expect(logAlertSpy).not.toHaveBeenCalledWith(expect.stringContaining('dropping cached connection and reopening'), expect.anything()); + await store('readwrite', (s) => { + s.put('value', 'key1'); + return IDB.promisifyRequest(s.transaction); + }); + + const original = IDBDatabase.prototype.transaction; + let callCount = 0; + jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(function (this: IDBDatabase, ...args) { + callCount++; + if (callCount === 1) { + throw new DOMException('IDB write transaction aborted without an error', 'AbortError'); + } + return original.apply(this, args); + }); + + const result = await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); + expect(result).toBe('value'); + expect(callCount).toBe(2); + expect(logInfoSpy).toHaveBeenCalledWith('IDB transient error — dropping cached connection and retrying once', expect.objectContaining({dbName: expect.any(String)})); + expect(logAlertSpy).not.toHaveBeenCalled(); }); }); }); From 624cd666f9d920209baa81ea4aff2830dc38b615 Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 8 Jun 2026 15:26:33 +0200 Subject: [PATCH 2/4] introduce storage circuit breaker --- lib/OnyxUtils.ts | 26 ++++- lib/StorageCircuitBreaker.ts | 106 +++++++++++++++++ lib/storage/errors.ts | 1 + .../IDBKeyValProvider/createStore.ts | 7 +- tests/unit/StorageCircuitBreakerTest.ts | 108 ++++++++++++++++++ tests/unit/onyxUtilsTest.ts | 41 +++++++ .../unit/storage/providers/createStoreTest.ts | 8 +- 7 files changed, 288 insertions(+), 9 deletions(-) create mode 100644 lib/StorageCircuitBreaker.ts create mode 100644 tests/unit/StorageCircuitBreakerTest.ts diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index dbf166cb4..96065e415 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -6,6 +6,7 @@ import * as Logger from './Logger'; import type Onyx from './Onyx'; import cache, {TASK} from './OnyxCache'; import OnyxKeys from './OnyxKeys'; +import StorageCircuitBreaker from './StorageCircuitBreaker'; import Storage from './storage'; import {StorageErrorClass, classifyStorageError} from './storage/errors'; import type { @@ -792,7 +793,9 @@ function reportStorageQuota(error?: Error): Promise { * - INVALID_DATA: logs an alert and throws (the same data will always fail). * - TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget * and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly. - * - CAPACITY: evicts the least recently accessed evictable key and retries. + * - CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level + * circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making + * progress or failures storm — the per-operation budget alone cannot stop a session-wide storm. * - UNKNOWN: bounded retry. */ function retryOperation(error: Error, onyxMethod: TMethod, defaultParams: Parameters[0], retryAttempt: number | undefined): Promise { @@ -800,6 +803,13 @@ function retryOperation(error: Error, on const nextRetryAttempt = currentRetryAttempt + 1; const errorClass = classifyStorageError(error); + // Once the breaker is open, every capacity write is going to fail the same way. Drop it silently — + // the breaker already emitted its single alert, and logging per failed write is exactly the storm + // we are suppressing. (We return before the log line below on purpose.) + if (errorClass === StorageErrorClass.CAPACITY && StorageCircuitBreaker.isTripped()) { + return Promise.resolve(); + } + Logger.logInfo(`Failed to save to storage. Error: ${error}. class: ${errorClass}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`); if (errorClass === StorageErrorClass.INVALID_DATA) { @@ -823,6 +833,16 @@ function retryOperation(error: Error, on return onyxMethod(defaultParams, nextRetryAttempt); } + // CAPACITY: feed the session-level circuit breaker before evicting. The per-operation budget above + // cannot stop a session-wide storm — each evicted key triggers an OnyxDerived recompute that spawns + // a fresh write with its own budget — so the breaker is what actually halts the meltdown. (The + // already-open case returned silently at the top of this function.) + StorageCircuitBreaker.recordCapacityFailure(); + if (StorageCircuitBreaker.isTripped()) { + // This failure tripped the breaker; it already emitted its single alert. Stop here. + return Promise.resolve(); + } + // Find the least recently accessed evictable key that we can remove const keyForRemoval = cache.getKeyForEviction(); if (!keyForRemoval) { @@ -833,9 +853,11 @@ function retryOperation(error: Error, on return reportStorageQuota(error); } - // Remove the least recently accessed key and retry. + // Remove the least recently accessed key and retry. Tell the breaker we evicted so that, if the + // retry comes back as another capacity failure, it counts as a no-progress cycle. Logger.logInfo(`Out of storage. Evicting least recently accessed key (${keyForRemoval}) and retrying. Error: ${error}`); reportStorageQuota(error); + StorageCircuitBreaker.recordEviction(); // @ts-expect-error No overload matches this call. return remove(keyForRemoval).then(() => onyxMethod(defaultParams, nextRetryAttempt)); diff --git a/lib/StorageCircuitBreaker.ts b/lib/StorageCircuitBreaker.ts new file mode 100644 index 000000000..0e4ecfac0 --- /dev/null +++ b/lib/StorageCircuitBreaker.ts @@ -0,0 +1,106 @@ +import * as Logger from './Logger'; + +/** + * Process-scoped circuit breaker for storage CAPACITY failures. + * + * The per-operation retry budget in `OnyxUtils.retryOperation` cannot stop a session-level storm: + * each evict -> OnyxDerived recompute -> new write starts its own fresh budget, so a full disk or + * exhausted quota can drive tens of thousands of evict+retry cycles that never make progress and + * freeze the app. This breaker is the session-level brake — `retryOperation` consults it before + * every eviction. + * + * It trips when EITHER: + * - capacity failures within {@link ROLLING_WINDOW_MS} exceed {@link FAILURE_THRESHOLD}, or + * - {@link NO_PROGRESS_CAP} consecutive evictions are each immediately followed by another capacity + * failure (the eviction freed nothing the next write could use — a no-progress cycle). This is a + * cheap proxy for `getDatabaseSize()`, which is costly and only reports origin-wide usage. + * + * On trip it emits exactly ONE alert and self-resets once the rolling window clears, so a persistent + * condition produces at most one alert per window instead of one log line per failed write. + */ + +/** Rolling window over which capacity failures are counted, and how long a trip stays open. */ +const ROLLING_WINDOW_MS = 60 * 1000; + +/** Capacity failures within the window above which the breaker trips (storm backstop). */ +const FAILURE_THRESHOLD = 50; + +/** Consecutive no-progress evictions (evict -> still capacity failure) above which the breaker trips. */ +const NO_PROGRESS_CAP = 5; + +let failureTimestamps: number[] = []; +let consecutiveNoProgressEvictions = 0; +let evictionAwaitingResult = false; +let trippedUntil = 0; + +function reset(): void { + failureTimestamps = []; + consecutiveNoProgressEvictions = 0; + evictionAwaitingResult = false; + trippedUntil = 0; +} + +/** Whether the breaker is currently open. Self-resets once the window since the trip has cleared. */ +function isTripped(): boolean { + if (trippedUntil === 0) { + return false; + } + if (Date.now() >= trippedUntil) { + reset(); + return false; + } + return true; +} + +function trip(reason: string): void { + trippedUntil = Date.now() + ROLLING_WINDOW_MS; + Logger.logAlert(`Storage circuit breaker tripped: ${reason}. Halting eviction/retry for ${ROLLING_WINDOW_MS / 1000}s to stop a storage failure storm.`); +} + +/** + * Record a CAPACITY failure. Call once per capacity failure in `retryOperation`, BEFORE deciding + * whether to evict; then check {@link isTripped} to decide whether to proceed. + */ +function recordCapacityFailure(): void { + // While open, recording is a no-op: no extra timestamps, no second alert, and nothing to keep the + // window from clearing. `isTripped()` self-resets here once the window has elapsed. + if (isTripped()) { + return; + } + + const now = Date.now(); + failureTimestamps = failureTimestamps.filter((timestamp) => now - timestamp < ROLLING_WINDOW_MS); + + // A fresh storm (nothing left in the window) resets the no-progress tracking so a stale eviction + // from an earlier, unrelated incident can't be miscounted as no-progress for this one. + if (failureTimestamps.length === 0) { + consecutiveNoProgressEvictions = 0; + evictionAwaitingResult = false; + } + + // We evicted on the previous cycle and we're back here with another capacity failure, so that + // eviction freed no usable space. + if (evictionAwaitingResult) { + consecutiveNoProgressEvictions += 1; + evictionAwaitingResult = false; + } + + failureTimestamps.push(now); + + if (failureTimestamps.length > FAILURE_THRESHOLD) { + trip(`${failureTimestamps.length} capacity failures within ${ROLLING_WINDOW_MS / 1000}s`); + return; + } + if (consecutiveNoProgressEvictions >= NO_PROGRESS_CAP) { + trip(`${consecutiveNoProgressEvictions} consecutive evictions freed no usable space`); + } +} + +/** Record that `retryOperation` just evicted a key, so the next capacity failure counts as no-progress. */ +function recordEviction(): void { + evictionAwaitingResult = true; +} + +const StorageCircuitBreaker = {recordCapacityFailure, recordEviction, isTripped, reset, ROLLING_WINDOW_MS, FAILURE_THRESHOLD, NO_PROGRESS_CAP}; + +export default StorageCircuitBreaker; diff --git a/lib/storage/errors.ts b/lib/storage/errors.ts index 7318b5a3e..dffe9f55a 100644 --- a/lib/storage/errors.ts +++ b/lib/storage/errors.ts @@ -64,6 +64,7 @@ function classifyStorageError(error: unknown): StorageErrorClassValue { name.includes('aborterror') || message.includes('connection to indexed database server lost') || message.includes('connection is closing') || + // This is related to https://github.com/Expensify/react-native-onyx/pull/796 — remove this comment when #796 is merged. message.includes('idb write transaction aborted without an error') ) { return StorageErrorClass.TRANSIENT; diff --git a/lib/storage/providers/IDBKeyValProvider/createStore.ts b/lib/storage/providers/IDBKeyValProvider/createStore.ts index cb5b36267..e238d07c9 100644 --- a/lib/storage/providers/IDBKeyValProvider/createStore.ts +++ b/lib/storage/providers/IDBKeyValProvider/createStore.ts @@ -141,8 +141,11 @@ function createStore(dbName: string, storeName: string): UseStore { dbName, storeName, }); - } else { - // CAPACITY / UNKNOWN — let the operation layer decide (evict or bounded retry). + } else if (errorClass === StorageErrorClass.UNKNOWN) { + // UNKNOWN — unexpected at this layer; record it so it's visible. CAPACITY is the + // expected propagation path (the operation layer owns its logging, and suppresses it + // entirely once the circuit breaker is open), so we do NOT log it here — doing so was a + // per-failed-write line that dominated the storm. Logger.logInfo('IDB error not recoverable at the connection layer, propagating', { dbName, storeName, diff --git a/tests/unit/StorageCircuitBreakerTest.ts b/tests/unit/StorageCircuitBreakerTest.ts new file mode 100644 index 000000000..8322c0337 --- /dev/null +++ b/tests/unit/StorageCircuitBreakerTest.ts @@ -0,0 +1,108 @@ +import * as Logger from '../../lib/Logger'; +import StorageCircuitBreaker from '../../lib/StorageCircuitBreaker'; + +describe('StorageCircuitBreaker', () => { + let currentTime = 1_000_000; + let nowSpy: jest.SpyInstance; + + const advance = (ms: number) => { + currentTime += ms; + }; + + beforeEach(() => { + currentTime = 1_000_000; + nowSpy = jest.spyOn(Date, 'now').mockImplementation(() => currentTime); + StorageCircuitBreaker.reset(); + }); + + afterEach(() => { + nowSpy.mockRestore(); + jest.restoreAllMocks(); + }); + + it('should not trip below the failure threshold', () => { + for (let i = 0; i < StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should trip once capacity failures exceed the threshold within the window', () => { + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(true); + }); + + it('should not trip when failures are spread across multiple windows', () => { + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + // Space each failure out so older ones fall out of the rolling window before the count builds up. + advance(2_000); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should trip after consecutive no-progress evictions', () => { + // Each cycle is a capacity failure followed by an eviction that frees no usable space. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + } + // The next capacity failure observes that the last eviction made no progress, tipping it over. + StorageCircuitBreaker.recordCapacityFailure(); + + expect(StorageCircuitBreaker.isTripped()).toBe(true); + }); + + it('should not count a failure as no-progress when no eviction preceded it', () => { + // Capacity failures with no interleaved evictions must not accumulate no-progress cycles. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP + 2; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should emit exactly one alert when it trips, even as failures continue', () => { + const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + // Further failures while open must not produce more alerts. + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordCapacityFailure(); + + expect(logAlertSpy).toHaveBeenCalledTimes(1); + expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('Storage circuit breaker tripped')); + }); + + it('should self-reset once the rolling window clears', () => { + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + expect(StorageCircuitBreaker.isTripped()).toBe(true); + + advance(StorageCircuitBreaker.ROLLING_WINDOW_MS); + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); + + it('should reset no-progress tracking after the window clears between storms', () => { + // First storm: some no-progress evictions, but not enough to trip. + for (let i = 0; i < StorageCircuitBreaker.NO_PROGRESS_CAP - 1; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + StorageCircuitBreaker.recordEviction(); + } + + // Let the window fully clear so the next failure starts a fresh storm. + advance(StorageCircuitBreaker.ROLLING_WINDOW_MS + 1); + StorageCircuitBreaker.recordCapacityFailure(); + + expect(StorageCircuitBreaker.isTripped()).toBe(false); + }); +}); diff --git a/tests/unit/onyxUtilsTest.ts b/tests/unit/onyxUtilsTest.ts index d3a9c7baf..240c2dc12 100644 --- a/tests/unit/onyxUtilsTest.ts +++ b/tests/unit/onyxUtilsTest.ts @@ -8,6 +8,7 @@ import type GenericCollection from '../utils/GenericCollection'; import OnyxCache from '../../lib/OnyxCache'; import * as Logger from '../../lib/Logger'; import StorageMock from '../../lib/storage'; +import StorageCircuitBreaker from '../../lib/StorageCircuitBreaker'; import createDeferredTask from '../../lib/createDeferredTask'; import waitForPromisesToResolve from '../utils/waitForPromisesToResolve'; @@ -722,6 +723,9 @@ describe('OnyxUtils', () => { const diskFullError = new Error('database or disk is full'); const nonRetriableIdbError = Object.assign(new Error('Internal error opening backing store for indexedDB.open.'), {name: 'UnknownError'}); + // The circuit breaker is process-scoped, so reset it between tests to avoid state leaking. + beforeEach(() => StorageCircuitBreaker.reset()); + it('should retry only one time if the operation is firstly failed and then passed', async () => { StorageMock.setItem = jest.fn(StorageMock.setItem).mockRejectedValueOnce(genericError).mockImplementation(StorageMock.setItem); @@ -821,6 +825,43 @@ describe('OnyxUtils', () => { expect(logAlertSpy).toHaveBeenCalledWith(`Unable to get database size. getDatabaseSize error: ${dbSizeError}. Original error: ${diskFullError}`); }); + it('should trip the circuit breaker and alert once after sustained capacity failures', async () => { + const logAlertSpy = jest.spyOn(Logger, 'logAlert'); + StorageMock.setItem = jest.fn().mockRejectedValue(diskFullError); + + // No evictable keys are configured, so each failing write records exactly one capacity + // failure with the breaker (it cannot evict). Enough of them within one window trips it. + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + await Onyx.set(ONYXKEYS.TEST_KEY, {test: i}); + } + await waitForPromisesToResolve(); + + expect(StorageCircuitBreaker.isTripped()).toBe(true); + expect(logAlertSpy).toHaveBeenCalledWith(expect.stringContaining('Storage circuit breaker tripped')); + }); + + it('should drop capacity writes silently while the circuit breaker is open', async () => { + // Trip the breaker deterministically so every capacity failure below is observed while open. + for (let i = 0; i <= StorageCircuitBreaker.FAILURE_THRESHOLD; i++) { + StorageCircuitBreaker.recordCapacityFailure(); + } + expect(StorageCircuitBreaker.isTripped()).toBe(true); + + // Clear so we only observe logging caused by the write below, not the trip alert above. + const logInfoSpy = jest.spyOn(Logger, 'logInfo').mockClear(); + const logAlertSpy = jest.spyOn(Logger, 'logAlert').mockClear(); + StorageMock.setItem = jest.fn().mockRejectedValue(diskFullError); + + await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); + await waitForPromisesToResolve(); + + // The write and any cascading derived writes are dropped without per-write log spam, and + // without re-alerting — the single trip alert is the only signal while open. + expect(StorageCircuitBreaker.isTripped()).toBe(true); + expect(logInfoSpy).not.toHaveBeenCalledWith(expect.stringContaining('Failed to save to storage')); + expect(logAlertSpy).not.toHaveBeenCalled(); + }); + it('should not re-add an evicted key to recentlyAccessedKeys after removal', async () => { // Re-init with evictable keys so getKeyForEviction() has something to return Object.assign(OnyxUtils.getDeferredInitTask(), createDeferredTask()); diff --git a/tests/unit/storage/providers/createStoreTest.ts b/tests/unit/storage/providers/createStoreTest.ts index 3ca68f587..acb6787c1 100644 --- a/tests/unit/storage/providers/createStoreTest.ts +++ b/tests/unit/storage/providers/createStoreTest.ts @@ -421,17 +421,15 @@ describe('createStore', () => { logInfoSpy = jest.spyOn(Logger, 'logInfo'); // QuotaExceededError — a CAPACITY error the connection layer does not own; it propagates - // to the operation layer (OnyxUtils.retryOperation) which handles eviction. + // to the operation layer (OnyxUtils.retryOperation) which handles eviction. The connection + // layer stays quiet for capacity (it's the expected path) so it can't spam the storm log. jest.spyOn(IDBDatabase.prototype, 'transaction').mockImplementation(() => { throw new DOMException('Quota exceeded', 'QuotaExceededError'); }); await expect(store('readonly', (s) => IDB.promisifyRequest(s.get('key1')))).rejects.toThrow('Quota exceeded'); expect(logAlertSpy).not.toHaveBeenCalled(); - expect(logInfoSpy).toHaveBeenCalledWith( - 'IDB error not recoverable at the connection layer, propagating', - expect.objectContaining({errorMessage: 'Quota exceeded', errorClass: 'capacity'}), - ); + expect(logInfoSpy).not.toHaveBeenCalledWith('IDB error not recoverable at the connection layer, propagating', expect.objectContaining({errorClass: 'capacity'})); }); }); From 1ffe796da6393fb674075810c51f85090ddb4f12 Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 8 Jun 2026 16:04:22 +0200 Subject: [PATCH 3/4] updated api doc --- API-INTERNAL.md | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/API-INTERNAL.md b/API-INTERNAL.md index b3cfeb90a..3a5cc6460 100644 --- a/API-INTERNAL.md +++ b/API-INTERNAL.md @@ -79,12 +79,17 @@ If the requested key is a collection, it will return an object with all the coll

Remove a key from Onyx and update the subscribers

retryOperation()
-

Handles storage operation failures based on the error type:

+

Handles storage operation failures based on the error class (see lib/storage/errors.ts). +The connection layer (createStore) owns connection/transport recovery; this operation layer owns +capacity recovery (eviction) so that a given failure is retried by exactly one layer:

    -
  • Storage capacity errors: evicts data and retries the operation
  • -
  • Invalid data errors: logs an alert and throws an error
  • -
  • Non-retriable errors: logs an alert and resolves without retrying
  • -
  • Other errors: retries the operation
  • +
  • INVALID_DATA: logs an alert and throws (the same data will always fail).
  • +
  • TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget +and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly.
  • +
  • CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level +circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making +progress or failures storm — the per-operation budget alone cannot stop a session-wide storm.
  • +
  • UNKNOWN: bounded retry.
broadcastUpdate()
@@ -318,11 +323,16 @@ Remove a key from Onyx and update the subscribers ## retryOperation() -Handles storage operation failures based on the error type: -- Storage capacity errors: evicts data and retries the operation -- Invalid data errors: logs an alert and throws an error -- Non-retriable errors: logs an alert and resolves without retrying -- Other errors: retries the operation +Handles storage operation failures based on the error class (see lib/storage/errors.ts). +The connection layer (createStore) owns connection/transport recovery; this operation layer owns +capacity recovery (eviction) so that a given failure is retried by exactly one layer: +- INVALID_DATA: logs an alert and throws (the same data will always fail). +- TRANSIENT / FATAL: the connection layer already retried (transient) or exhausted its heal budget + and alerted (fatal). Retrying here would only re-amplify, so we skip the write quietly. +- CAPACITY: evicts the least recently accessed evictable key and retries, under a session-level + circuit breaker (see lib/StorageCircuitBreaker.ts) that halts the loop once eviction stops making + progress or failures storm — the per-operation budget alone cannot stop a session-wide storm. +- UNKNOWN: bounded retry. **Kind**: global function From 8bc993cafd23c8980d9274333a38bb335f28c244 Mon Sep 17 00:00:00 2001 From: Hubert Sosinski Date: Mon, 8 Jun 2026 16:06:12 +0200 Subject: [PATCH 4/4] prettier fix --- lib/OnyxUtils.ts | 4 +++- tests/unit/onyxUtilsTest.ts | 16 ++++++++++++---- tests/unit/storage/providers/createStoreTest.ts | 5 +---- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/OnyxUtils.ts b/lib/OnyxUtils.ts index 96065e415..d80b300b4 100644 --- a/lib/OnyxUtils.ts +++ b/lib/OnyxUtils.ts @@ -810,7 +810,9 @@ function retryOperation(error: Error, on return Promise.resolve(); } - Logger.logInfo(`Failed to save to storage. Error: ${error}. class: ${errorClass}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`); + Logger.logInfo( + `Failed to save to storage. Error: ${error}. class: ${errorClass}. onyxMethod: ${onyxMethod.name}. retryAttempt: ${currentRetryAttempt}/${MAX_STORAGE_OPERATION_RETRY_ATTEMPTS}`, + ); if (errorClass === StorageErrorClass.INVALID_DATA) { Logger.logAlert(`Attempted to set invalid data set in Onyx. Please ensure all data is serializable. Error: ${error}`); diff --git a/tests/unit/onyxUtilsTest.ts b/tests/unit/onyxUtilsTest.ts index 240c2dc12..7321e715e 100644 --- a/tests/unit/onyxUtilsTest.ts +++ b/tests/unit/onyxUtilsTest.ts @@ -777,7 +777,9 @@ describe('OnyxUtils', () => { // The connection layer (createStore) owns and alerts on fatal errors; the operation layer // just skips the retry at info level. No alert here, and no "5 retries exhausted" alert. - expect(logInfoSpy).toHaveBeenCalledWith(`Storage operation skipped retry; fatal errors are handled by the connection layer. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`); + expect(logInfoSpy).toHaveBeenCalledWith( + `Storage operation skipped retry; fatal errors are handled by the connection layer. Error: ${nonRetriableIdbError}. onyxMethod: setWithRetry.`, + ); expect(logAlertSpy).not.toHaveBeenCalled(); }); @@ -798,7 +800,9 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logAlertSpy).toHaveBeenCalledWith(`Out of storage. But found no acceptable keys to remove. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith( + `Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`, + ); }); it('should include usageDetails in the storage quota log when available', async () => { @@ -810,7 +814,9 @@ describe('OnyxUtils', () => { await Onyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith( - `Storage Quota Check -- bytesUsed: 13289269 originWideBytesRemaining (estimate, not per-DB headroom): 5000000 usageDetails: ${JSON.stringify(usageDetails)}. Original error: ${diskFullError}`, + `Storage Quota Check -- bytesUsed: 13289269 originWideBytesRemaining (estimate, not per-DB headroom): 5000000 usageDetails: ${JSON.stringify( + usageDetails, + )}. Original error: ${diskFullError}`, ); }); @@ -1390,7 +1396,9 @@ describe('OnyxUtils', () => { await LocalOnyx.set(ONYXKEYS.TEST_KEY, {test: 'data'}); expect(logInfoSpy).toHaveBeenCalledWith(`Out of storage. Evicting least recently accessed key (${key1}) and retrying. Error: ${diskFullError}`); - expect(logInfoSpy).toHaveBeenCalledWith(`Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`); + expect(logInfoSpy).toHaveBeenCalledWith( + `Storage Quota Check -- bytesUsed: 0 originWideBytesRemaining (estimate, not per-DB headroom): Infinity. Original error: ${diskFullError}`, + ); }); }); diff --git a/tests/unit/storage/providers/createStoreTest.ts b/tests/unit/storage/providers/createStoreTest.ts index acb6787c1..ec58361fa 100644 --- a/tests/unit/storage/providers/createStoreTest.ts +++ b/tests/unit/storage/providers/createStoreTest.ts @@ -459,10 +459,7 @@ describe('createStore', () => { const result = await store('readonly', (s) => IDB.promisifyRequest(s.get('key1'))); expect(result).toBe('value'); expect(callCount).toBe(2); - expect(logInfoSpy).toHaveBeenCalledWith( - 'IDB transient error — dropping cached connection and retrying once', - expect.objectContaining({dbName: expect.any(String)}), - ); + expect(logInfoSpy).toHaveBeenCalledWith('IDB transient error — dropping cached connection and retrying once', expect.objectContaining({dbName: expect.any(String)})); }); it('should not be budgeted — reopens on every call without ever exhausting a budget', async () => {