From 68e1396830058a7158a4d7e97d49fb1b7b7abf8a Mon Sep 17 00:00:00 2001 From: Kenji Kono Date: Fri, 20 Mar 2026 11:38:41 +0900 Subject: [PATCH 1/4] fix(prisma): add retry for Aurora Serverless v2 connection errors (#104, #105) Why: Aurora Serverless v2 with auto-pause (0 ACU) drops connections on idle_session_timeout and takes ~15s to resume. Without retry, both runtime queries and CDK deployment migrations fail on transient errors. Also, DATABASE_URL (including password) was logged to CloudWatch. What: - Remove console.log(DATABASE_URL) that leaked credentials to CloudWatch - Add Prisma client extension with retry on transient connection errors (P2024, P1001, P1017, idle-session timeout, ECONNRESET) - Add exponential backoff retry to migration-runner for prisma db push - Optimize connection params: connection_limit=1, connect_timeout=30 --- cdk/lib/constructs/database.ts | 7 ++-- webapp/src/jobs/migration-runner.ts | 47 ++++++++++++++++------- webapp/src/lib/prisma.ts | 59 +++++++++++++++++++++++++++-- 3 files changed, 93 insertions(+), 20 deletions(-) diff --git a/cdk/lib/constructs/database.ts b/cdk/lib/constructs/database.ts index 3f9cdf9..3aec50d 100644 --- a/cdk/lib/constructs/database.ts +++ b/cdk/lib/constructs/database.ts @@ -90,9 +90,10 @@ export class Database extends Construct implements ec2.IConnectable { public getLambdaEnvironment(databaseName: string) { const conn = this.getConnectionInfo(); - // Aurora Serverless v2 cold start takes up to 15 seconds - // https://www.prisma.io/docs/orm/prisma-client/setup-and-configuration/databases-connections/connection-pool - const option = '?pool_timeout=20&connect_timeout=20'; + // connection_limit=1: Each Lambda instance handles one request at a time + // connect_timeout=30: Aurora Serverless v2 auto-pause resume takes ~15s (longer after 24h+ pause) + // https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html + const option = '?connection_limit=1&connect_timeout=30'; return { DATABASE_HOST: conn.host, DATABASE_NAME: databaseName, diff --git a/webapp/src/jobs/migration-runner.ts b/webapp/src/jobs/migration-runner.ts index e73ee26..389f782 100644 --- a/webapp/src/jobs/migration-runner.ts +++ b/webapp/src/jobs/migration-runner.ts @@ -27,26 +27,45 @@ export const handler: Handler = async (event, _) => { // Currently we don't have any direct method to invoke prisma migration programmatically. // As a workaround, we spawn migration script as a child process and wait for its completion. // Please also refer to the following GitHub issue: https://github.com/prisma/prisma/issues/4703 - try { - const exitCode = await new Promise((resolve, _) => { + await runPrismaDbPush(options); +}; + +// Aurora Serverless v2 may be resuming from auto-pause (0 ACU) during CDK deployment, +// which takes approximately 15 seconds. Retry transient connection errors with exponential backoff. +// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html +async function runPrismaDbPush(options: string[], maxRetries = 5, baseDelay = 3000): Promise { + for (let attempt = 1; attempt <= maxRetries; attempt++) { + const { exitCode, stdout, stderr } = await new Promise<{ + exitCode: number; + stdout: string; + stderr: string; + }>((resolve) => { execFile( path.resolve('./node_modules/prisma/build/index.js'), ['db', 'push', '--skip-generate'].concat(options), (error, stdout, stderr) => { - console.log(stdout); - if (error != null) { - console.log(`prisma db push exited with error ${error.message}`); - resolve(error.code ?? 1); - } else { - resolve(0); - } + resolve({ + exitCode: error ? (typeof error.code === 'number' ? error.code : 1) : 0, + stdout, + stderr, + }); }, ); }); - if (exitCode != 0) throw Error(`db push failed with exit code ${exitCode}`); - } catch (e) { - console.log(e); - throw e; + console.log(`prisma db push attempt ${attempt}/${maxRetries}`, { exitCode, stdout, stderr }); + + if (exitCode === 0) return; + + const isRetryable = + stderr.includes('P1001') || stderr.includes("Can't reach database") || stderr.includes('Connection refused'); + + if (!isRetryable || attempt === maxRetries) { + throw new Error(`prisma db push failed after ${attempt} attempt(s): ${stderr}`); + } + + const delay = baseDelay * Math.pow(2, attempt - 1) + Math.random() * 1000; + console.log(`Retrying prisma db push in ${Math.round(delay)}ms...`); + await new Promise((r) => setTimeout(r, delay)); } -}; +} diff --git a/webapp/src/lib/prisma.ts b/webapp/src/lib/prisma.ts index 757a2d0..6437c87 100644 --- a/webapp/src/lib/prisma.ts +++ b/webapp/src/lib/prisma.ts @@ -1,4 +1,4 @@ -import { PrismaClient } from '@prisma/client'; +import { Prisma, PrismaClient } from '@prisma/client'; // https://www.prisma.io/docs/guides/nextjs @@ -6,7 +6,60 @@ const globalForPrisma = global as unknown as { prisma: PrismaClient; }; -console.log(process.env.DATABASE_URL); -export const prisma = globalForPrisma.prisma || new PrismaClient({ log: ['query', 'info', 'warn', 'error'] }); +// Determine if an error is a transient connection issue that may resolve on retry. +// Aurora Serverless v2 can drop connections due to idle_session_timeout (60s) or auto-pause, +// and resume takes approximately 15 seconds. +// https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html +function isRetryableError(error: unknown): boolean { + if (!(error instanceof Error)) return false; + const code = (error as { code?: string }).code; + if ( + code === 'P2024' || // Connection pool timeout + code === 'P1001' || // Can't reach database server + code === 'P1017' // Server has closed the connection + ) { + return true; + } + const msg = error.message; + return ( + msg.includes('idle-session timeout') || + msg.includes('terminating connection') || + msg.includes('Connection terminated') || + msg.includes('ECONNRESET') + ); +} + +const basePrisma = new PrismaClient(); + +async function withRetry(fn: () => Promise, maxRetries = 3, baseDelay = 500): Promise { + let lastError: unknown; + for (let attempt = 0; attempt <= maxRetries; attempt++) { + try { + return await fn(); + } catch (error) { + lastError = error; + if (attempt === maxRetries || !isRetryableError(error)) throw error; + // Discard stale connections before retrying + await basePrisma.$disconnect(); + const delay = baseDelay * Math.pow(2, attempt) + Math.random() * 100; + console.warn(`Prisma retry attempt ${attempt + 1}/${maxRetries}, waiting ${Math.round(delay)}ms`); + await new Promise((r) => setTimeout(r, delay)); + } + } + throw lastError; +} + +const retryExtension = Prisma.defineExtension({ + name: 'retry-on-connection-error', + query: { + $allModels: { + async $allOperations({ args, query }) { + return withRetry(() => query(args)); + }, + }, + }, +}); + +export const prisma = basePrisma.$extends(retryExtension) as unknown as PrismaClient; if (process.env.NODE_ENV !== 'production') globalForPrisma.prisma = prisma; From f18481fbdd1e7630e252d8804dd7e2c60e21d344 Mon Sep 17 00:00:00 2001 From: Kenji Kono Date: Fri, 20 Mar 2026 11:42:07 +0900 Subject: [PATCH 2/4] test: update CDK snapshots for connection option changes --- ...ck-webapp-starter-kit-without-domain.test.ts.snap | 12 ++++++------ ...verless-fullstack-webapp-starter-kit.test.ts.snap | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit-without-domain.test.ts.snap b/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit-without-domain.test.ts.snap index c3bca2c..5aa9782 100644 --- a/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit-without-domain.test.ts.snap +++ b/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit-without-domain.test.ts.snap @@ -723,7 +723,7 @@ exports[`Snapshot test 2`] = ` ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20", + "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -772,7 +772,7 @@ exports[`Snapshot test 2`] = ` "Endpoint.Port", ], }, - "/main?pool_timeout=20&connect_timeout=20", + "/main?connection_limit=1&connect_timeout=30", ], ], }, @@ -3406,7 +3406,7 @@ service iptables save", ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20", + "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -3455,7 +3455,7 @@ service iptables save", "Endpoint.Port", ], }, - "/main?pool_timeout=20&connect_timeout=20", + "/main?connection_limit=1&connect_timeout=30", ], ], }, @@ -3783,7 +3783,7 @@ service iptables save", ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20", + "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -3832,7 +3832,7 @@ service iptables save", "Endpoint.Port", ], }, - "/main?pool_timeout=20&connect_timeout=20", + "/main?connection_limit=1&connect_timeout=30", ], ], }, diff --git a/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit.test.ts.snap b/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit.test.ts.snap index 520dfb6..27af260 100644 --- a/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit.test.ts.snap +++ b/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit.test.ts.snap @@ -744,7 +744,7 @@ exports[`Snapshot test 2`] = ` ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20", + "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -793,7 +793,7 @@ exports[`Snapshot test 2`] = ` "Endpoint.Port", ], }, - "/main?pool_timeout=20&connect_timeout=20", + "/main?connection_limit=1&connect_timeout=30", ], ], }, @@ -3236,7 +3236,7 @@ service iptables save", ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20", + "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -3285,7 +3285,7 @@ service iptables save", "Endpoint.Port", ], }, - "/main?pool_timeout=20&connect_timeout=20", + "/main?connection_limit=1&connect_timeout=30", ], ], }, @@ -3589,7 +3589,7 @@ service iptables save", ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?pool_timeout=20&connect_timeout=20", + "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -3638,7 +3638,7 @@ service iptables save", "Endpoint.Port", ], }, - "/main?pool_timeout=20&connect_timeout=20", + "/main?connection_limit=1&connect_timeout=30", ], ], }, From c4b9d35c0b38919a120f1f81c2109c6371ad08ae Mon Sep 17 00:00:00 2001 From: Kenji Kono Date: Fri, 20 Mar 2026 11:49:26 +0900 Subject: [PATCH 3/4] fix(prisma): add success log after retry recovery --- webapp/src/lib/prisma.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/webapp/src/lib/prisma.ts b/webapp/src/lib/prisma.ts index 6437c87..cad29cd 100644 --- a/webapp/src/lib/prisma.ts +++ b/webapp/src/lib/prisma.ts @@ -35,7 +35,11 @@ async function withRetry(fn: () => Promise, maxRetries = 3, baseDelay = 50 let lastError: unknown; for (let attempt = 0; attempt <= maxRetries; attempt++) { try { - return await fn(); + const result = await fn(); + if (attempt > 0) { + console.warn(`Prisma query succeeded after ${attempt} retry(s)`); + } + return result; } catch (error) { lastError = error; if (attempt === maxRetries || !isRetryableError(error)) throw error; From 908ab827cce0de0a360d2eea6e2897bd00fd623f Mon Sep 17 00:00:00 2001 From: Kenji Kono Date: Fri, 20 Mar 2026 13:01:21 +0900 Subject: [PATCH 4/4] fix(prisma): add pool_timeout=30 and retry on connection pool timeout The default pool_timeout (10s) is insufficient for Aurora Serverless v2 auto-pause resume (~15s). Also, PrismaClientInitializationError for pool timeout has errorCode=undefined, so message-based detection is needed. --- cdk/lib/constructs/database.ts | 3 ++- ...ck-webapp-starter-kit-without-domain.test.ts.snap | 12 ++++++------ ...verless-fullstack-webapp-starter-kit.test.ts.snap | 12 ++++++------ webapp/src/lib/prisma.ts | 1 + 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/cdk/lib/constructs/database.ts b/cdk/lib/constructs/database.ts index 3aec50d..d6b655b 100644 --- a/cdk/lib/constructs/database.ts +++ b/cdk/lib/constructs/database.ts @@ -91,9 +91,10 @@ export class Database extends Construct implements ec2.IConnectable { public getLambdaEnvironment(databaseName: string) { const conn = this.getConnectionInfo(); // connection_limit=1: Each Lambda instance handles one request at a time + // pool_timeout=30: Must be >= connect_timeout to allow Aurora Serverless v2 resume (~15s) // connect_timeout=30: Aurora Serverless v2 auto-pause resume takes ~15s (longer after 24h+ pause) // https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/aurora-serverless-v2-auto-pause.html - const option = '?connection_limit=1&connect_timeout=30'; + const option = '?connection_limit=1&pool_timeout=30&connect_timeout=30'; return { DATABASE_HOST: conn.host, DATABASE_NAME: databaseName, diff --git a/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit-without-domain.test.ts.snap b/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit-without-domain.test.ts.snap index 5aa9782..69c2b81 100644 --- a/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit-without-domain.test.ts.snap +++ b/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit-without-domain.test.ts.snap @@ -723,7 +723,7 @@ exports[`Snapshot test 2`] = ` ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", + "DATABASE_OPTION": "?connection_limit=1&pool_timeout=30&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -772,7 +772,7 @@ exports[`Snapshot test 2`] = ` "Endpoint.Port", ], }, - "/main?connection_limit=1&connect_timeout=30", + "/main?connection_limit=1&pool_timeout=30&connect_timeout=30", ], ], }, @@ -3406,7 +3406,7 @@ service iptables save", ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", + "DATABASE_OPTION": "?connection_limit=1&pool_timeout=30&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -3455,7 +3455,7 @@ service iptables save", "Endpoint.Port", ], }, - "/main?connection_limit=1&connect_timeout=30", + "/main?connection_limit=1&pool_timeout=30&connect_timeout=30", ], ], }, @@ -3783,7 +3783,7 @@ service iptables save", ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", + "DATABASE_OPTION": "?connection_limit=1&pool_timeout=30&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -3832,7 +3832,7 @@ service iptables save", "Endpoint.Port", ], }, - "/main?connection_limit=1&connect_timeout=30", + "/main?connection_limit=1&pool_timeout=30&connect_timeout=30", ], ], }, diff --git a/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit.test.ts.snap b/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit.test.ts.snap index 27af260..3f76dc2 100644 --- a/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit.test.ts.snap +++ b/cdk/test/__snapshots__/serverless-fullstack-webapp-starter-kit.test.ts.snap @@ -744,7 +744,7 @@ exports[`Snapshot test 2`] = ` ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", + "DATABASE_OPTION": "?connection_limit=1&pool_timeout=30&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -793,7 +793,7 @@ exports[`Snapshot test 2`] = ` "Endpoint.Port", ], }, - "/main?connection_limit=1&connect_timeout=30", + "/main?connection_limit=1&pool_timeout=30&connect_timeout=30", ], ], }, @@ -3236,7 +3236,7 @@ service iptables save", ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", + "DATABASE_OPTION": "?connection_limit=1&pool_timeout=30&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -3285,7 +3285,7 @@ service iptables save", "Endpoint.Port", ], }, - "/main?connection_limit=1&connect_timeout=30", + "/main?connection_limit=1&pool_timeout=30&connect_timeout=30", ], ], }, @@ -3589,7 +3589,7 @@ service iptables save", ], }, "DATABASE_NAME": "main", - "DATABASE_OPTION": "?connection_limit=1&connect_timeout=30", + "DATABASE_OPTION": "?connection_limit=1&pool_timeout=30&connect_timeout=30", "DATABASE_PASSWORD": { "Fn::Join": [ "", @@ -3638,7 +3638,7 @@ service iptables save", "Endpoint.Port", ], }, - "/main?connection_limit=1&connect_timeout=30", + "/main?connection_limit=1&pool_timeout=30&connect_timeout=30", ], ], }, diff --git a/webapp/src/lib/prisma.ts b/webapp/src/lib/prisma.ts index cad29cd..d003457 100644 --- a/webapp/src/lib/prisma.ts +++ b/webapp/src/lib/prisma.ts @@ -25,6 +25,7 @@ function isRetryableError(error: unknown): boolean { msg.includes('idle-session timeout') || msg.includes('terminating connection') || msg.includes('Connection terminated') || + msg.includes('Timed out fetching a new connection from the connection pool') || msg.includes('ECONNRESET') ); }