diff --git a/.server-changes/trigger-worker-queue-db-error-leak.md b/.server-changes/trigger-worker-queue-db-error-leak.md new file mode 100644 index 00000000000..9725ef9f2eb --- /dev/null +++ b/.server-changes/trigger-worker-queue-db-error-leak.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: fix +--- + +Stop `trigger()` from leaking raw database connection errors to API clients during a database outage; infrastructure errors now return a generic, retryable 500. diff --git a/apps/webapp/app/runEngine/concerns/queues.server.ts b/apps/webapp/app/runEngine/concerns/queues.server.ts index 2fc35fc8435..dce74d7d1a9 100644 --- a/apps/webapp/app/runEngine/concerns/queues.server.ts +++ b/apps/webapp/app/runEngine/concerns/queues.server.ts @@ -15,6 +15,7 @@ import type { RunEngine } from "~/v3/runEngine.server"; import { env } from "~/env.server"; import { tryCatch } from "@trigger.dev/core/v3"; import { ServiceValidationError } from "~/v3/services/common.server"; +import { isInfrastructureError } from "~/utils/prismaErrors"; import { createCache, createLRUMemoryStore, DefaultStatefulContext, Namespace } from "@internal/cache"; import { singleton } from "~/utils/singleton"; import type { TaskMetadataCache, TaskMetadataEntry } from "~/services/taskMetadataCache.server"; @@ -394,6 +395,17 @@ export class DefaultQueueManager implements QueueManager { ); if (error) { + // getDefaultWorkerGroupForProject queries the writer DB. A Prisma + // infrastructure error (e.g. P1001 "Can't reach database server", whose + // message carries the DB hostname) must NOT be promoted into a + // client-facing ServiceValidationError: that leaks internal infra detail + // to the API client (the SDK echoes it into the run view) and + // mis-classifies a transient outage as a non-retryable 422. Let it + // propagate to the route's generic 500 handler (scrubbed + retryable); + // only wrap genuine domain failures. + if (isInfrastructureError(error)) { + throw error; + } throw new ServiceValidationError(error.message); } diff --git a/apps/webapp/app/utils/prismaErrors.ts b/apps/webapp/app/utils/prismaErrors.ts new file mode 100644 index 00000000000..b8262ed3d1f --- /dev/null +++ b/apps/webapp/app/utils/prismaErrors.ts @@ -0,0 +1,39 @@ +import { Prisma } from "@trigger.dev/database"; + +// Prisma connectivity / infrastructure error codes — engine- and +// connection-level failures, not query- or validation-level ones. When the +// database is unreachable, Prisma 6.x throws a PrismaClientKnownRequestError +// carrying one of these codes (e.g. P1001 "Can't reach database server"). +const INFRASTRUCTURE_PRISMA_CODES = new Set([ + "P1001", // Can't reach database server + "P1002", // Database server reached but timed out + "P1008", // Operations timed out + "P1017", // Server has closed the connection +]); + +/** + * True when `error` is a Prisma infrastructure/connectivity failure (DB + * unreachable, timed out, connection dropped) rather than a query- or + * validation-level error. + * + * These errors carry internal infrastructure detail (e.g. the database + * hostname) in their `.message`, so they must never be surfaced to API + * clients — callers should let them propagate to the generic 5xx handler + * (which both scrubs the message and is retryable by the SDK) instead of + * folding `.message` into a client-facing error. + */ +export function isInfrastructureError(error: unknown): boolean { + if ( + error instanceof Prisma.PrismaClientInitializationError || + error instanceof Prisma.PrismaClientRustPanicError || + error instanceof Prisma.PrismaClientUnknownRequestError + ) { + return true; + } + + if (error instanceof Prisma.PrismaClientKnownRequestError) { + return INFRASTRUCTURE_PRISMA_CODES.has(error.code); + } + + return false; +} diff --git a/apps/webapp/test/prismaErrors.test.ts b/apps/webapp/test/prismaErrors.test.ts new file mode 100644 index 00000000000..48af6c829c7 --- /dev/null +++ b/apps/webapp/test/prismaErrors.test.ts @@ -0,0 +1,32 @@ +import { describe, expect, it } from "vitest"; +import { Prisma } from "@trigger.dev/database"; +import { isInfrastructureError } from "../app/utils/prismaErrors.js"; + +describe("isInfrastructureError", () => { + it("treats a P1001 'can't reach database server' (KnownRequestError) as infrastructure", () => { + // Prisma 6.x reports P1001 as a PrismaClientKnownRequestError with code P1001 — + // this is the exact production shape that leaked the RDS hostname to a customer. + const err = new Prisma.PrismaClientKnownRequestError( + "Invalid `prisma.project.findFirst()` invocation: Can't reach database server at host:5432", + { code: "P1001", clientVersion: "6.14.0" } + ); + expect(isInfrastructureError(err)).toBe(true); + }); + + it("treats a PrismaClientInitializationError as infrastructure", () => { + const err = new Prisma.PrismaClientInitializationError("init failed", "6.14.0"); + expect(isInfrastructureError(err)).toBe(true); + }); + + it("does NOT treat a query/validation error (P2002 unique constraint) as infrastructure", () => { + const err = new Prisma.PrismaClientKnownRequestError("Unique constraint failed", { + code: "P2002", + clientVersion: "6.14.0", + }); + expect(isInfrastructureError(err)).toBe(false); + }); + + it("does NOT treat a plain domain Error as infrastructure", () => { + expect(isInfrastructureError(new Error("Project not found."))).toBe(false); + }); +});