Skip to content

Commit 98948c0

Browse files
authored
fix(tables): heartbeat export job before upload so the stale janitor can't kill a live finalize (#5017)
* fix(tables): heartbeat export job before upload so the stale janitor can't kill a live finalize * chore(tables): rename stale-janitor counters to cover all table job types
1 parent 43017d7 commit 98948c0

3 files changed

Lines changed: 22 additions & 8 deletions

File tree

apps/sim/app/api/cron/cleanup-stale-executions/route.ts

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -113,15 +113,15 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
113113
})
114114
}
115115

116-
// Mark stale table jobs (import or delete) as failed. Jobs run detached on the web container
116+
// Mark stale table jobs (import, export, or delete) as failed. Jobs run detached on the web container
117117
// and are lost if the pod is killed mid-run. `updated_at` is bumped by progress updates, so a
118118
// `running` job with no recent update has stalled (not merely slow). Committed work is left in
119119
// place (no rollback); the user retries. Also prune long-settled terminal jobs so the table
120120
// doesn't grow unbounded (the latest job per table is what list/detail reads surface).
121-
let staleImportsMarkedFailed = 0
121+
let staleTableJobsMarkedFailed = 0
122122
try {
123123
const now = new Date()
124-
const staleImports = await db
124+
const staleJobs = await db
125125
.update(tableJobs)
126126
.set({
127127
status: 'failed',
@@ -132,9 +132,9 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
132132
.where(and(eq(tableJobs.status, 'running'), lt(tableJobs.updatedAt, staleThreshold)))
133133
.returning({ id: tableJobs.id })
134134

135-
staleImportsMarkedFailed = staleImports.length
136-
if (staleImportsMarkedFailed > 0) {
137-
logger.info(`Marked ${staleImportsMarkedFailed} stale table jobs as failed`)
135+
staleTableJobsMarkedFailed = staleJobs.length
136+
if (staleTableJobsMarkedFailed > 0) {
137+
logger.info(`Marked ${staleTableJobsMarkedFailed} stale table jobs as failed`)
138138
}
139139

140140
const terminalRetention = new Date(Date.now() - TABLE_JOB_RETENTION_HOURS * 60 * 60 * 1000)
@@ -236,8 +236,8 @@ export const GET = withRouteHandler(async (request: NextRequest) => {
236236
staleThresholdMinutes: STALE_THRESHOLD_MINUTES,
237237
retentionHours: JOB_RETENTION_HOURS,
238238
},
239-
tableImports: {
240-
staleMarkedFailed: staleImportsMarkedFailed,
239+
tableJobs: {
240+
staleMarkedFailed: staleTableJobsMarkedFailed,
241241
},
242242
})
243243
} catch (error) {

apps/sim/lib/table/export-runner.test.ts

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,17 @@ describe('runTableExport', () => {
104104
expect(mockMarkJobFailed).not.toHaveBeenCalled()
105105
})
106106

107+
it('stops before the upload when ownership is lost at the finalize gate', async () => {
108+
mockUpdateJobProgress.mockResolvedValueOnce(true).mockResolvedValue(false)
109+
110+
await runTableExport(payload)
111+
112+
expect(mockSelectExportRowPage).toHaveBeenCalledTimes(1)
113+
expect(mockUploadFile).not.toHaveBeenCalled()
114+
expect(mockMarkJobReady).not.toHaveBeenCalled()
115+
expect(mockMarkJobFailed).not.toHaveBeenCalled()
116+
})
117+
107118
it('cleans up an orphaned upload when the job was canceled at the wire', async () => {
108119
mockMarkJobReady.mockResolvedValue(false)
109120

apps/sim/lib/table/export-runner.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,9 @@ export async function runTableExport(payload: TableExportPayload): Promise<void>
9393
}
9494
if (format === 'json') chunks.push(']')
9595

96+
const ownsFinalize = await updateJobProgress(tableId, exported, jobId)
97+
if (!ownsFinalize) throw new JobSupersededError()
98+
9699
const fileName = `${sanitizeExportFilename(table.name)}.${format}`
97100
const key = `workspace/${workspaceId}/exports/${tableId}/${jobId}/${fileName}`
98101
// `preserveKey` keeps the custom key verbatim (without it the provider rewrites the key to a

0 commit comments

Comments
 (0)