Skip to content

Commit 7310c06

Browse files
improvement(mothership): table speed parity — keyset reads, limit bounds, background import/delete jobs
Mothership's table operations lagged the fast paths the tables feature already has: - function_execute mounted inputTables via queryRows with defaults, silently truncating the sandbox CSV to 100 rows and paying for a count and execution metadata it never used. Mounts now drain the keyset export reader page by page, remap stored column-id keys to display names (headers previously didn't match id-keyed cell data), mount tables in parallel, and count toward the 50MB sandbox mount budget. - user_table query_rows accepted unbounded limits (whole table into one tool result) and only offset paging. It now enforces the contracts' MAX_QUERY_LIMIT, skips execution metadata, accepts the keyset `after` cursor, and returns `nextCursor` when a default-order page fills. - import_file / create_from_file / delete_rows_by_filter mutated without claiming the per-table job slot, racing running background jobs, and ran whole imports inline in the chat request. CSV/TSV imports ≥8MB and unbounded filter-deletes matching >1000 rows now dispatch the same trigger.dev jobs the UI routes use (slot claim, release-on-dispatch- failure, delete mask); inline imports claim and release the slot. A new get_job operation surfaces the table's derived job state for polling. - TableImportPayload grows deleteSourceFile (default true) so Mothership imports of persistent workspace files survive the worker's single-use source cleanup. Generated tool catalog artifacts regenerated from simstudioai/copilot#289. Co-Authored-By: Claude Fable 5 <noreply@anthropic.com>
1 parent 2626482 commit 7310c06

8 files changed

Lines changed: 1140 additions & 106 deletions

File tree

apps/sim/lib/copilot/generated/tool-catalog-v1.ts

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3868,6 +3868,22 @@ export const UserTable: ToolCatalogEntry = {
38683868
type: 'object',
38693869
description: 'Arguments for the operation',
38703870
properties: {
3871+
after: {
3872+
type: 'object',
3873+
description:
3874+
"Keyset cursor for query_rows: pass the nextCursor object ({ orderKey, id }) from the previous page's response to fetch the next page on the default row order. Cannot be combined with sort; takes precedence over offset.",
3875+
properties: {
3876+
id: {
3877+
type: 'string',
3878+
description: 'id of the last row of the previous page (from nextCursor).',
3879+
},
3880+
orderKey: {
3881+
type: 'string',
3882+
description: 'orderKey of the last row of the previous page (from nextCursor).',
3883+
},
3884+
},
3885+
required: ['orderKey', 'id'],
3886+
},
38713887
autoRun: {
38723888
type: 'boolean',
38733889
description:
@@ -3956,7 +3972,8 @@ export const UserTable: ToolCatalogEntry = {
39563972
},
39573973
limit: {
39583974
type: 'number',
3959-
description: 'Maximum rows to return or affect (optional, default 100)',
3975+
description:
3976+
'Maximum rows to return or affect (optional; default 100, max 1000). For delete_rows_by_filter, omitting it lets matches above 1000 run as a background job.',
39603977
},
39613978
mapping: {
39623979
type: 'object',
@@ -4009,7 +4026,8 @@ export const UserTable: ToolCatalogEntry = {
40094026
},
40104027
offset: {
40114028
type: 'number',
4012-
description: 'Number of rows to skip (optional for query_rows, default 0)',
4029+
description:
4030+
'Number of rows to skip (optional for query_rows, default 0). For paging past more than a few pages, prefer the after cursor — offset re-scans every prior row.',
40134031
},
40144032
outputColumnNames: {
40154033
type: 'object',
@@ -4146,6 +4164,7 @@ export const UserTable: ToolCatalogEntry = {
41464164
'import_file',
41474165
'get',
41484166
'get_schema',
4167+
'get_job',
41494168
'delete',
41504169
'rename',
41514170
'insert_row',
@@ -4524,6 +4543,7 @@ export const UserTableOperation = {
45244543
importFile: 'import_file',
45254544
get: 'get',
45264545
getSchema: 'get_schema',
4546+
getJob: 'get_job',
45274547
delete: 'delete',
45284548
rename: 'rename',
45294549
insertRow: 'insert_row',
@@ -4560,6 +4580,7 @@ export const UserTableOperationValues = [
45604580
UserTableOperation.importFile,
45614581
UserTableOperation.get,
45624582
UserTableOperation.getSchema,
4583+
UserTableOperation.getJob,
45634584
UserTableOperation.delete,
45644585
UserTableOperation.rename,
45654586
UserTableOperation.insertRow,

apps/sim/lib/copilot/generated/tool-schemas-v1.ts

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3598,6 +3598,22 @@ export const TOOL_RUNTIME_SCHEMAS: Record<string, ToolRuntimeSchemaEntry> = {
35983598
type: 'object',
35993599
description: 'Arguments for the operation',
36003600
properties: {
3601+
after: {
3602+
type: 'object',
3603+
description:
3604+
"Keyset cursor for query_rows: pass the nextCursor object ({ orderKey, id }) from the previous page's response to fetch the next page on the default row order. Cannot be combined with sort; takes precedence over offset.",
3605+
properties: {
3606+
id: {
3607+
type: 'string',
3608+
description: 'id of the last row of the previous page (from nextCursor).',
3609+
},
3610+
orderKey: {
3611+
type: 'string',
3612+
description: 'orderKey of the last row of the previous page (from nextCursor).',
3613+
},
3614+
},
3615+
required: ['orderKey', 'id'],
3616+
},
36013617
autoRun: {
36023618
type: 'boolean',
36033619
description:
@@ -3694,7 +3710,8 @@ export const TOOL_RUNTIME_SCHEMAS: Record<string, ToolRuntimeSchemaEntry> = {
36943710
},
36953711
limit: {
36963712
type: 'number',
3697-
description: 'Maximum rows to return or affect (optional, default 100)',
3713+
description:
3714+
'Maximum rows to return or affect (optional; default 100, max 1000). For delete_rows_by_filter, omitting it lets matches above 1000 run as a background job.',
36983715
},
36993716
mapping: {
37003717
type: 'object',
@@ -3753,7 +3770,8 @@ export const TOOL_RUNTIME_SCHEMAS: Record<string, ToolRuntimeSchemaEntry> = {
37533770
},
37543771
offset: {
37553772
type: 'number',
3756-
description: 'Number of rows to skip (optional for query_rows, default 0)',
3773+
description:
3774+
'Number of rows to skip (optional for query_rows, default 0). For paging past more than a few pages, prefer the after cursor — offset re-scans every prior row.',
37573775
},
37583776
outputColumnNames: {
37593777
type: 'object',
@@ -3902,6 +3920,7 @@ export const TOOL_RUNTIME_SCHEMAS: Record<string, ToolRuntimeSchemaEntry> = {
39023920
'import_file',
39033921
'get',
39043922
'get_schema',
3923+
'get_job',
39053924
'delete',
39063925
'rename',
39073926
'insert_row',
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
/**
2+
* @vitest-environment node
3+
*/
4+
5+
import { beforeEach, describe, expect, it, vi } from 'vitest'
6+
7+
const { mockGetTableById, mockSelectExportRowPage, mockExecuteTool } = vi.hoisted(() => ({
8+
mockGetTableById: vi.fn(),
9+
mockSelectExportRowPage: vi.fn(),
10+
mockExecuteTool: vi.fn(),
11+
}))
12+
13+
vi.mock('@/lib/table/service', () => ({
14+
getTableById: mockGetTableById,
15+
listTables: vi.fn(),
16+
selectExportRowPage: mockSelectExportRowPage,
17+
}))
18+
19+
vi.mock('@/tools', () => ({
20+
executeTool: mockExecuteTool,
21+
}))
22+
23+
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-manager', () => ({
24+
fetchWorkspaceFileBuffer: vi.fn(),
25+
findWorkspaceFileRecord: vi.fn(),
26+
getSandboxWorkspaceFilePath: vi.fn(),
27+
listWorkspaceFiles: vi.fn(),
28+
}))
29+
30+
vi.mock('@/lib/uploads/contexts/workspace/workspace-file-folder-manager', () => ({
31+
listWorkspaceFileFolders: vi.fn(),
32+
}))
33+
34+
vi.mock('@/lib/copilot/vfs/workflow-alias-resolver', () => ({
35+
resolveWorkflowAliasForWorkspace: vi.fn(),
36+
}))
37+
38+
import { executeFunctionExecute } from '@/lib/copilot/tools/handlers/function-execute'
39+
40+
const PAGE_SIZE = 5000
41+
42+
function buildTable() {
43+
return {
44+
id: 'tbl_1',
45+
name: 'People',
46+
description: null,
47+
schema: {
48+
columns: [
49+
{ id: 'col_name', name: 'name', type: 'string' },
50+
{ id: 'col_age', name: 'age', type: 'number' },
51+
],
52+
},
53+
metadata: null,
54+
rowCount: PAGE_SIZE + 2,
55+
maxRows: 100000,
56+
workspaceId: 'workspace-1',
57+
createdBy: 'user-1',
58+
archivedAt: null,
59+
createdAt: new Date('2024-01-01'),
60+
updatedAt: new Date('2024-01-01'),
61+
}
62+
}
63+
64+
function makeRows(start: number, count: number) {
65+
return Array.from({ length: count }, (_, i) => ({
66+
id: `row_${start + i}`,
67+
data: { col_name: `person ${start + i}`, col_age: start + i },
68+
position: start + i,
69+
}))
70+
}
71+
72+
describe('executeFunctionExecute table mounts', () => {
73+
beforeEach(() => {
74+
vi.clearAllMocks()
75+
mockGetTableById.mockResolvedValue(buildTable())
76+
mockExecuteTool.mockResolvedValue({ success: true, output: {} })
77+
})
78+
79+
it('drains every row through the keyset export reader, not a single 100-row page', async () => {
80+
mockSelectExportRowPage
81+
.mockResolvedValueOnce(makeRows(0, PAGE_SIZE))
82+
.mockResolvedValueOnce(makeRows(PAGE_SIZE, 2))
83+
84+
await executeFunctionExecute(
85+
{ code: 'print(1)', inputTables: ['tbl_1'] },
86+
{ userId: 'user-1', workflowId: 'wf-1', workspaceId: 'workspace-1' }
87+
)
88+
89+
expect(mockSelectExportRowPage).toHaveBeenCalledTimes(2)
90+
expect(mockSelectExportRowPage.mock.calls[0][1]).toBeNull()
91+
expect(mockSelectExportRowPage.mock.calls[1][1]).toEqual({
92+
position: PAGE_SIZE - 1,
93+
id: `row_${PAGE_SIZE - 1}`,
94+
})
95+
96+
const params = mockExecuteTool.mock.calls[0][1] as {
97+
_sandboxFiles: Array<{ path: string; content: string }>
98+
}
99+
const mount = params._sandboxFiles.find((f) => f.path === '/home/user/tables/tbl_1.csv')
100+
expect(mount).toBeDefined()
101+
const lines = mount!.content.split('\n')
102+
expect(lines[0]).toBe('name,age')
103+
expect(lines).toHaveLength(1 + PAGE_SIZE + 2)
104+
expect(lines[1]).toBe('person 0,0')
105+
expect(lines[lines.length - 1]).toBe(`person ${PAGE_SIZE + 1},${PAGE_SIZE + 1}`)
106+
})
107+
108+
it('maps stored column-id keys back to display-name headers', async () => {
109+
mockSelectExportRowPage.mockResolvedValueOnce([
110+
{ id: 'row_1', data: { col_name: 'Alice', col_age: 30 }, position: 0 },
111+
])
112+
113+
await executeFunctionExecute(
114+
{ code: 'print(1)', inputTables: ['tbl_1'] },
115+
{ userId: 'user-1', workflowId: 'wf-1', workspaceId: 'workspace-1' }
116+
)
117+
118+
const params = mockExecuteTool.mock.calls[0][1] as {
119+
_sandboxFiles: Array<{ path: string; content: string }>
120+
}
121+
expect(params._sandboxFiles[0].content).toBe('name,age\nAlice,30')
122+
})
123+
124+
it('throws when the table belongs to a different workspace', async () => {
125+
mockGetTableById.mockResolvedValue({ ...buildTable(), workspaceId: 'workspace-other' })
126+
127+
await expect(
128+
executeFunctionExecute(
129+
{ code: 'print(1)', inputTables: ['tbl_1'] },
130+
{ userId: 'user-1', workflowId: 'wf-1', workspaceId: 'workspace-1' }
131+
)
132+
).rejects.toThrow(/Input table not found/)
133+
expect(mockExecuteTool).not.toHaveBeenCalled()
134+
})
135+
})

apps/sim/lib/copilot/tools/handlers/function-execute.ts

Lines changed: 73 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@ import { decodeVfsPathSegments, encodeVfsPathSegments } from '@/lib/copilot/vfs/
33
import { resolveWorkflowAliasForWorkspace } from '@/lib/copilot/vfs/workflow-alias-resolver'
44
import { isPlanAliasPath, workflowAliasSandboxPath } from '@/lib/copilot/vfs/workflow-aliases'
55
import { isMothershipBetaFeaturesEnabled } from '@/lib/core/config/feature-flags'
6-
import { getTableById, listTables, queryRows } from '@/lib/table/service'
6+
import { buildNameById, rowDataIdToName } from '@/lib/table/column-keys'
7+
import { toCsvRow } from '@/lib/table/export-format'
8+
import { getTableById, listTables, selectExportRowPage } from '@/lib/table/service'
9+
import type { TableDefinition } from '@/lib/table/types'
710
import { listWorkspaceFileFolders } from '@/lib/uploads/contexts/workspace/workspace-file-folder-manager'
811
import {
912
fetchWorkspaceFileBuffer,
@@ -62,6 +65,44 @@ async function resolveTableRef(
6265
return tablePathLookup?.get(tableName) ?? null
6366
}
6467

68+
const TABLE_MOUNT_PAGE_SIZE = 5000
69+
70+
/**
71+
* Serializes a cell for a sandbox CSV mount. Unlike export downloads this skips
72+
* formula neutralization — the CSV is consumed by code, and a prefixed `'`
73+
* would corrupt values.
74+
*/
75+
function formatMountCsvValue(value: unknown): string {
76+
if (value === null || value === undefined) return ''
77+
if (value instanceof Date) return value.toISOString()
78+
if (typeof value === 'object') return JSON.stringify(value)
79+
return String(value)
80+
}
81+
82+
/**
83+
* Serializes a full table to CSV for a sandbox mount. Walks the keyset export
84+
* reader page by page so every row is included (`queryRows` with defaults
85+
* silently truncated mounts to its 100-row page and paid for a count and
86+
* execution metadata the CSV never used), and remaps stored column-id keys
87+
* back to display names so headers line up with cell values.
88+
*/
89+
async function buildTableCsvForMount(table: TableDefinition): Promise<string> {
90+
const nameById = buildNameById(table.schema)
91+
const headers = table.schema.columns.map((c) => c.name)
92+
const lines = [toCsvRow(headers)]
93+
let after: { position: number; id: string } | null = null
94+
while (true) {
95+
const page = await selectExportRowPage(table, after, TABLE_MOUNT_PAGE_SIZE)
96+
for (const row of page) {
97+
const data = rowDataIdToName(row.data, nameById)
98+
lines.push(toCsvRow(headers.map((header) => formatMountCsvValue(data[header]))))
99+
}
100+
if (page.length < TABLE_MOUNT_PAGE_SIZE) return lines.join('\n')
101+
const last = page[page.length - 1]
102+
after = { position: last.position, id: last.id }
103+
}
104+
}
105+
65106
async function resolveInputFiles(
66107
workspaceId: string,
67108
inputFiles?: unknown[],
@@ -247,55 +288,41 @@ async function resolveInputFiles(
247288
const tablePathLookup = hasTablePathRefs
248289
? new Map((await listTables(workspaceId)).map((table) => [table.name, table]))
249290
: undefined
250-
for (const tableRef of inputTables) {
251-
const tableId =
252-
typeof tableRef === 'string'
253-
? tableRef
254-
: tableRef && typeof tableRef === 'object'
255-
? (tableRef as CanonicalTableInput).tableId || (tableRef as CanonicalTableInput).path
291+
const tableMounts = await Promise.all(
292+
inputTables.map(async (tableRef) => {
293+
const tableId =
294+
typeof tableRef === 'string'
295+
? tableRef
296+
: tableRef && typeof tableRef === 'object'
297+
? (tableRef as CanonicalTableInput).tableId || (tableRef as CanonicalTableInput).path
298+
: undefined
299+
if (!tableId) return null
300+
const table = await resolveTableRef(tableId, tablePathLookup)
301+
if (!table || table.workspaceId !== workspaceId) {
302+
throw new Error(
303+
`Input table not found: "${tableId}". Pass the table id (tbl_...) from tables/{name}/meta.json, or a tables/{name}/meta.json path.`
304+
)
305+
}
306+
const csvContent = await buildTableCsvForMount(table)
307+
const sandboxPath =
308+
typeof tableRef === 'object' && tableRef !== null
309+
? (tableRef as CanonicalTableInput).sandboxPath
256310
: undefined
257-
if (!tableId) continue
258-
const table = await resolveTableRef(tableId, tablePathLookup)
259-
if (!table || table.workspaceId !== workspaceId) {
260-
throw new Error(
261-
`Input table not found: "${tableId}". Pass the table id (tbl_...) from tables/{name}/meta.json, or a tables/{name}/meta.json path.`
262-
)
263-
}
264-
const rows = await queryRows(table, {}, 'copilot-fn-exec')
265-
266-
const allKeys = new Set(table.schema.columns.map((column) => column.name))
267-
for (const row of rows.rows ?? []) {
268-
if (row.data && typeof row.data === 'object') {
269-
for (const key of Object.keys(row.data as Record<string, unknown>)) {
270-
allKeys.add(key)
271-
}
311+
return {
312+
path: sandboxPath || `/home/user/tables/${table.id}.csv`,
313+
content: csvContent,
272314
}
273-
}
274-
const headers = Array.from(allKeys)
275-
const csvLines = [headers.join(',')]
276-
for (const row of rows.rows ?? []) {
277-
const data = (row.data || {}) as Record<string, unknown>
278-
csvLines.push(
279-
headers
280-
.map((h) => {
281-
const val = data[h]
282-
const str = val === null || val === undefined ? '' : String(val)
283-
return str.includes(',') || str.includes('"') || str.includes('\n')
284-
? `"${str.replace(/"/g, '""')}"`
285-
: str
286-
})
287-
.join(',')
315+
})
316+
)
317+
for (const mount of tableMounts) {
318+
if (!mount) continue
319+
if (totalSize + mount.content.length > MAX_TOTAL_SIZE) {
320+
throw new Error(
321+
`Mounting table "${mount.path}" would exceed the ${MAX_TOTAL_SIZE / 1024 / 1024}MB total mount limit. Mount fewer or smaller tables.`
288322
)
289323
}
290-
const csvContent = csvLines.join('\n')
291-
const sandboxPath =
292-
typeof tableRef === 'object' && tableRef !== null
293-
? (tableRef as CanonicalTableInput).sandboxPath
294-
: undefined
295-
sandboxFiles.push({
296-
path: sandboxPath || `/home/user/tables/${table.id}.csv`,
297-
content: csvContent,
298-
})
324+
totalSize += mount.content.length
325+
sandboxFiles.push(mount)
299326
}
300327
}
301328

0 commit comments

Comments
 (0)