From 57604d80dc3f7a01cbb6e0c1fde04325f4645889 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Tue, 9 Jun 2026 11:52:28 +0100 Subject: [PATCH 1/2] fix(webapp): stop writer DB connectivity errors leaking to trigger() API clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit During trigger() worker-queue resolution, getWorkerQueue wraps any error from getDefaultWorkerGroupForProject into a client-facing ServiceValidationError (HTTP 422) carrying error.message. That method runs project.findFirst on the *writer*; when the writer is unreachable Prisma throws P1001 ("Can't reach database server at "), and its raw message — including the DB hostname — was echoed to the API client and surfaced in the customer's run view via the SDK's TriggerApiError. This also mis-classifies a transient outage: a 422 is not retried by the SDK, so triggers failed permanently instead of riding out a brief writer blip. Add isInfrastructureError() (Prisma connectivity codes P1001/P1002/P1008/P1017 plus init/panic/unknown classes) and, at the wrap site, rethrow infrastructure errors so they hit the route's generic 500 handler (scrubbed + retryable); only genuine domain failures (e.g. "Project not found.") become a 422. Co-Authored-By: Claude Opus 4.8 --- .../trigger-worker-queue-db-error-leak.md | 6 +++ .../app/runEngine/concerns/queues.server.ts | 12 +++++ apps/webapp/app/utils/prismaErrors.ts | 39 ++++++++++++++ apps/webapp/test/prismaErrors.test.ts | 32 +++++++++++ .../test/queueManagerWorkerQueue.test.ts | 53 +++++++++++++++++++ 5 files changed, 142 insertions(+) create mode 100644 .server-changes/trigger-worker-queue-db-error-leak.md create mode 100644 apps/webapp/app/utils/prismaErrors.ts create mode 100644 apps/webapp/test/prismaErrors.test.ts create mode 100644 apps/webapp/test/queueManagerWorkerQueue.test.ts diff --git a/.server-changes/trigger-worker-queue-db-error-leak.md b/.server-changes/trigger-worker-queue-db-error-leak.md new file mode 100644 index 00000000000..9725ef9f2eb --- /dev/null +++ b/.server-changes/trigger-worker-queue-db-error-leak.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Stop `trigger()` from leaking raw database connection errors to API clients during a database outage; infrastructure errors now return a generic, retryable 500. diff --git a/apps/webapp/app/runEngine/concerns/queues.server.ts b/apps/webapp/app/runEngine/concerns/queues.server.ts index 2fc35fc8435..dce74d7d1a9 100644 --- a/apps/webapp/app/runEngine/concerns/queues.server.ts +++ b/apps/webapp/app/runEngine/concerns/queues.server.ts @@ -15,6 +15,7 @@ import type { RunEngine } from "~/v3/runEngine.server"; import { env } from "~/env.server"; import { tryCatch } from "@trigger.dev/core/v3"; import { ServiceValidationError } from "~/v3/services/common.server"; +import { isInfrastructureError } from "~/utils/prismaErrors"; import { createCache, createLRUMemoryStore, DefaultStatefulContext, Namespace } from "@internal/cache"; import { singleton } from "~/utils/singleton"; import type { TaskMetadataCache, TaskMetadataEntry } from "~/services/taskMetadataCache.server"; @@ -394,6 +395,17 @@ export class DefaultQueueManager implements QueueManager { ); if (error) { + // getDefaultWorkerGroupForProject queries the writer DB. A Prisma + // infrastructure error (e.g. P1001 "Can't reach database server", whose + // message carries the DB hostname) must NOT be promoted into a + // client-facing ServiceValidationError: that leaks internal infra detail + // to the API client (the SDK echoes it into the run view) and + // mis-classifies a transient outage as a non-retryable 422. Let it + // propagate to the route's generic 500 handler (scrubbed + retryable); + // only wrap genuine domain failures. + if (isInfrastructureError(error)) { + throw error; + } throw new ServiceValidationError(error.message); } diff --git a/apps/webapp/app/utils/prismaErrors.ts b/apps/webapp/app/utils/prismaErrors.ts new file mode 100644 index 00000000000..b8262ed3d1f --- /dev/null +++ b/apps/webapp/app/utils/prismaErrors.ts @@ -0,0 +1,39 @@ +import { Prisma } from "@trigger.dev/database"; + +// Prisma connectivity / infrastructure error codes — engine- and +// connection-level failures, not query- or validation-level ones. When the +// database is unreachable, Prisma 6.x throws a PrismaClientKnownRequestError +// carrying one of these codes (e.g. P1001 "Can't reach database server"). +const INFRASTRUCTURE_PRISMA_CODES = new Set([ + "P1001", // Can't reach database server + "P1002", // Database server reached but timed out + "P1008", // Operations timed out + "P1017", // Server has closed the connection +]); + +/** + * True when `error` is a Prisma infrastructure/connectivity failure (DB + * unreachable, timed out, connection dropped) rather than a query- or + * validation-level error. + * + * These errors carry internal infrastructure detail (e.g. the database + * hostname) in their `.message`, so they must never be surfaced to API + * clients — callers should let them propagate to the generic 5xx handler + * (which both scrubs the message and is retryable by the SDK) instead of + * folding `.message` into a client-facing error. + */ +export function isInfrastructureError(error: unknown): boolean { + if ( + error instanceof Prisma.PrismaClientInitializationError || + error instanceof Prisma.PrismaClientRustPanicError || + error instanceof Prisma.PrismaClientUnknownRequestError + ) { + return true; + } + + if (error instanceof Prisma.PrismaClientKnownRequestError) { + return INFRASTRUCTURE_PRISMA_CODES.has(error.code); + } + + return false; +} diff --git a/apps/webapp/test/prismaErrors.test.ts b/apps/webapp/test/prismaErrors.test.ts new file mode 100644 index 00000000000..48af6c829c7 --- /dev/null +++ b/apps/webapp/test/prismaErrors.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from "vitest"; +import { Prisma } from "@trigger.dev/database"; +import { isInfrastructureError } from "../app/utils/prismaErrors.js"; + +describe("isInfrastructureError", () => { + it("treats a P1001 'can't reach database server' (KnownRequestError) as infrastructure", () => { + // Prisma 6.x reports P1001 as a PrismaClientKnownRequestError with code P1001 — + // this is the exact production shape that leaked the RDS hostname to a customer. + const err = new Prisma.PrismaClientKnownRequestError( + "Invalid `prisma.project.findFirst()` invocation: Can't reach database server at host:5432", + { code: "P1001", clientVersion: "6.14.0" } + ); + expect(isInfrastructureError(err)).toBe(true); + }); + + it("treats a PrismaClientInitializationError as infrastructure", () => { + const err = new Prisma.PrismaClientInitializationError("init failed", "6.14.0"); + expect(isInfrastructureError(err)).toBe(true); + }); + + it("does NOT treat a query/validation error (P2002 unique constraint) as infrastructure", () => { + const err = new Prisma.PrismaClientKnownRequestError("Unique constraint failed", { + code: "P2002", + clientVersion: "6.14.0", + }); + expect(isInfrastructureError(err)).toBe(false); + }); + + it("does NOT treat a plain domain Error as infrastructure", () => { + expect(isInfrastructureError(new Error("Project not found."))).toBe(false); + }); +}); diff --git a/apps/webapp/test/queueManagerWorkerQueue.test.ts b/apps/webapp/test/queueManagerWorkerQueue.test.ts new file mode 100644 index 00000000000..21a590b3788 --- /dev/null +++ b/apps/webapp/test/queueManagerWorkerQueue.test.ts @@ -0,0 +1,53 @@ +import { describe, expect, it } from "vitest"; +import { Prisma } from "@trigger.dev/database"; +import { DefaultQueueManager } from "../app/runEngine/concerns/queues.server.js"; +import { ServiceValidationError } from "../app/v3/services/common.server.js"; + +// Minimal non-DEVELOPMENT environment so getWorkerQueue resolves a worker group +// (DEVELOPMENT short-circuits before touching the DB). +function productionEnv() { + return { type: "PRODUCTION", projectId: "proj_test", id: "env_test" } as any; +} + +describe("DefaultQueueManager.getWorkerQueue — writer DB error handling", () => { + it("rethrows a Prisma connectivity error unchanged instead of wrapping it in a client-facing ServiceValidationError", async () => { + // The exact production failure: getDefaultWorkerGroupForProject's writer + // `project.findFirst` throws P1001 when the DB is unreachable. The raw + // message carries the DB hostname and must NOT become a 422 with that text. + const prisma = { + project: { + findFirst: async () => { + throw new Prisma.PrismaClientKnownRequestError( + "Invalid `prisma.project.findFirst()` invocation: Can't reach database server at host:5432", + { code: "P1001", clientVersion: "6.14.0" } + ); + }, + }, + } as any; + + const queueManager = new DefaultQueueManager(prisma, {} as any); + + const result = await queueManager.getWorkerQueue(productionEnv()).then( + () => ({ ok: true as const }), + (error: unknown) => ({ ok: false as const, error }) + ); + + expect(result.ok).toBe(false); + if (!result.ok) { + expect(result.error).toBeInstanceOf(Prisma.PrismaClientKnownRequestError); + expect(result.error).not.toBeInstanceOf(ServiceValidationError); + } + }); + + it("still wraps a genuine domain failure (project not found) as a ServiceValidationError", async () => { + const prisma = { + project: { findFirst: async () => null }, + } as any; + + const queueManager = new DefaultQueueManager(prisma, {} as any); + + await expect(queueManager.getWorkerQueue(productionEnv())).rejects.toBeInstanceOf( + ServiceValidationError + ); + }); +}); From 269af87e81178805767a4d01ee8a14070e8b487a Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Tue, 9 Jun 2026 17:45:34 +0100 Subject: [PATCH 2/2] test(webapp): drop CI-unsafe getWorkerQueue integration test It imported queues.server.ts, which transitively starts DB/Redis-touching singletons; in the no-infra unit shard those reject as unhandled rejections and fail the run (passed locally only because docker services were up). The guard logic is covered by prismaErrors.test.ts; the full HTTP path belongs in a toxiproxy e2e, not the unit shard. Co-Authored-By: Claude Opus 4.8 --- .../test/queueManagerWorkerQueue.test.ts | 53 ------------------- 1 file changed, 53 deletions(-) delete mode 100644 apps/webapp/test/queueManagerWorkerQueue.test.ts diff --git a/apps/webapp/test/queueManagerWorkerQueue.test.ts b/apps/webapp/test/queueManagerWorkerQueue.test.ts deleted file mode 100644 index 21a590b3788..00000000000 --- a/apps/webapp/test/queueManagerWorkerQueue.test.ts +++ /dev/null @@ -1,53 +0,0 @@ -import { describe, expect, it } from "vitest"; -import { Prisma } from "@trigger.dev/database"; -import { DefaultQueueManager } from "../app/runEngine/concerns/queues.server.js"; -import { ServiceValidationError } from "../app/v3/services/common.server.js"; - -// Minimal non-DEVELOPMENT environment so getWorkerQueue resolves a worker group -// (DEVELOPMENT short-circuits before touching the DB). -function productionEnv() { - return { type: "PRODUCTION", projectId: "proj_test", id: "env_test" } as any; -} - -describe("DefaultQueueManager.getWorkerQueue — writer DB error handling", () => { - it("rethrows a Prisma connectivity error unchanged instead of wrapping it in a client-facing ServiceValidationError", async () => { - // The exact production failure: getDefaultWorkerGroupForProject's writer - // `project.findFirst` throws P1001 when the DB is unreachable. The raw - // message carries the DB hostname and must NOT become a 422 with that text. - const prisma = { - project: { - findFirst: async () => { - throw new Prisma.PrismaClientKnownRequestError( - "Invalid `prisma.project.findFirst()` invocation: Can't reach database server at host:5432", - { code: "P1001", clientVersion: "6.14.0" } - ); - }, - }, - } as any; - - const queueManager = new DefaultQueueManager(prisma, {} as any); - - const result = await queueManager.getWorkerQueue(productionEnv()).then( - () => ({ ok: true as const }), - (error: unknown) => ({ ok: false as const, error }) - ); - - expect(result.ok).toBe(false); - if (!result.ok) { - expect(result.error).toBeInstanceOf(Prisma.PrismaClientKnownRequestError); - expect(result.error).not.toBeInstanceOf(ServiceValidationError); - } - }); - - it("still wraps a genuine domain failure (project not found) as a ServiceValidationError", async () => { - const prisma = { - project: { findFirst: async () => null }, - } as any; - - const queueManager = new DefaultQueueManager(prisma, {} as any); - - await expect(queueManager.getWorkerQueue(productionEnv())).rejects.toBeInstanceOf( - ServiceValidationError - ); - }); -});