Skip to content

Commit 2f44ac0

Browse files
perf(tables): tenant-bound unique-constraint checks (3.5s -> <1s per write)
The unique check runs lower(data->>'col') = $1 LIMIT 1 on every insert and cell edit touching a unique column. The predicate is unestimatable and a unique (non-conflicting) value never exits early, so the planner seq-scanned all 12.3M shared-relation rows per check — 3.5s measured. Tenant-bound both the single and batch variants; the batch path sets the flag on the caller's transaction when one is supplied (SET LOCAL dies at its commit, and the statements that follow are tenant-scoped writes). Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
1 parent 30b950d commit 2f44ac0

1 file changed

Lines changed: 92 additions & 72 deletions

File tree

apps/sim/lib/table/validation.ts

Lines changed: 92 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44

55
import { db } from '@sim/db'
66
import { userTableRows } from '@sim/db/schema'
7-
import { and, eq, or, sql } from 'drizzle-orm'
7+
import { and, eq, or, type SQL, sql } from 'drizzle-orm'
88
import { NextResponse } from 'next/server'
99
import { getColumnId } from './column-keys'
1010
import { COLUMN_TYPES, NAME_PATTERN, TABLE_LIMITS } from './constants'
11+
import { withSeqscanOff } from './planner'
1112
import type { ColumnDefinition, JsonValue, RowData, TableSchema, ValidationResult } from './types'
1213

1314
export type { ColumnDefinition, TableSchema, ValidationResult }
@@ -420,7 +421,7 @@ export async function checkUniqueConstraintsDb(
420421
}
421422

422423
// Build conditions for each unique column value
423-
const conditions = []
424+
const conditions: Array<{ column: ColumnDefinition; value: unknown; sql: SQL }> = []
424425

425426
for (const column of uniqueColumns) {
426427
const key = getColumnId(column)
@@ -451,26 +452,31 @@ export async function checkUniqueConstraintsDb(
451452
return { valid: true, errors: [] }
452453
}
453454

454-
// Query for each unique column separately to provide specific error messages
455-
for (const condition of conditions) {
456-
const baseCondition = and(eq(userTableRows.tableId, tableId), condition.sql)
455+
// Query for each unique column separately to provide specific error messages.
456+
// Tenant-bounded: `lower(data->>'col') = ...` is unestimatable, so the planner
457+
// otherwise seq-scans the whole shared relation per check — 3.5s on every
458+
// insert/edit when the value is unique (no early exit). See withSeqscanOff.
459+
await withSeqscanOff(async (trx) => {
460+
for (const condition of conditions) {
461+
const baseCondition = and(eq(userTableRows.tableId, tableId), condition.sql)
457462

458-
const whereClause = excludeRowId
459-
? and(baseCondition, sql`${userTableRows.id} != ${excludeRowId}`)
460-
: baseCondition
463+
const whereClause = excludeRowId
464+
? and(baseCondition, sql`${userTableRows.id} != ${excludeRowId}`)
465+
: baseCondition
461466

462-
const conflictingRow = await db
463-
.select({ id: userTableRows.id, position: userTableRows.position })
464-
.from(userTableRows)
465-
.where(whereClause)
466-
.limit(1)
467+
const conflictingRow = await trx
468+
.select({ id: userTableRows.id, position: userTableRows.position })
469+
.from(userTableRows)
470+
.where(whereClause)
471+
.limit(1)
467472

468-
if (conflictingRow.length > 0) {
469-
errors.push(
470-
`Column "${condition.column.name}" must be unique. Value "${condition.value}" already exists in row ${conflictingRow[0].position + 1}`
471-
)
473+
if (conflictingRow.length > 0) {
474+
errors.push(
475+
`Column "${condition.column.name}" must be unique. Value "${condition.value}" already exists in row ${conflictingRow[0].position + 1}`
476+
)
477+
}
472478
}
473-
}
479+
})
474480

475481
return { valid: errors.length === 0, errors }
476482
}
@@ -480,7 +486,7 @@ export async function checkUniqueConstraintsDb(
480486
* drizzle transaction (`trx`) satisfy this, letting callers run the lookup
481487
* inside an open transaction so it observes uncommitted prior-batch inserts.
482488
*/
483-
type UniqueCheckExecutor = Pick<typeof db, 'select'>
489+
type UniqueCheckExecutor = Pick<typeof db, 'select' | 'execute'>
484490

485491
/**
486492
* Checks unique constraints for a batch of rows using targeted database queries.
@@ -548,70 +554,84 @@ export async function checkBatchUniqueConstraintsDb(
548554
}
549555
}
550556

551-
// Now check against database for all unique values at once
552-
for (const [columnId, { values, column }] of valuesByColumn) {
553-
if (values.size === 0) continue
554-
555-
if (!NAME_PATTERN.test(columnId)) {
556-
throw new Error(`Invalid column id: ${columnId}`)
557-
}
558-
559-
const valueArray = Array.from(values)
560-
const valueConditions = valueArray.map((normalizedValue) => {
561-
// Check if the original values are strings (normalized values for strings are lowercase)
562-
// We need to determine the type from the column definition or the first row that has this value
563-
const isStringColumn = column.type === 'string'
557+
// Now check against database for all unique values at once. Tenant-bounded
558+
// for the same reason as checkUniqueConstraintsDb: the lower(data->>...)
559+
// predicates are unestimatable and otherwise trigger whole-relation seq
560+
// scans. With an external transaction the flag is set on it directly (SET
561+
// LOCAL dies at its commit; it only penalizes plan shape, and the statements
562+
// that follow in those transactions are tenant-scoped writes).
563+
const checkColumns = async (ex: UniqueCheckExecutor) => {
564+
for (const [columnId, { values, column }] of valuesByColumn) {
565+
if (values.size === 0) continue
564566

565-
if (isStringColumn) {
566-
return sql`lower(${userTableRows.data}->>${sql.raw(`'${columnId}'`)}) = ${normalizedValue}`
567+
if (!NAME_PATTERN.test(columnId)) {
568+
throw new Error(`Invalid column id: ${columnId}`)
567569
}
568-
return sql`(${userTableRows.data}->${sql.raw(`'${columnId}'`)})::jsonb = ${normalizedValue}::jsonb`
569-
})
570570

571-
const conflictingRows = await executor
572-
.select({
573-
id: userTableRows.id,
574-
data: userTableRows.data,
575-
position: userTableRows.position,
571+
const valueArray = Array.from(values)
572+
const valueConditions = valueArray.map((normalizedValue) => {
573+
// Check if the original values are strings (normalized values for strings are lowercase)
574+
// We need to determine the type from the column definition or the first row that has this value
575+
const isStringColumn = column.type === 'string'
576+
577+
if (isStringColumn) {
578+
return sql`lower(${userTableRows.data}->>${sql.raw(`'${columnId}'`)}) = ${normalizedValue}`
579+
}
580+
return sql`(${userTableRows.data}->${sql.raw(`'${columnId}'`)})::jsonb = ${normalizedValue}::jsonb`
576581
})
577-
.from(userTableRows)
578-
.where(and(eq(userTableRows.tableId, tableId), or(...valueConditions)))
579-
.limit(valueArray.length) // We only need up to one conflict per value
580-
581-
// Map conflicts back to batch rows
582-
for (const conflict of conflictingRows) {
583-
const conflictData = conflict.data as RowData
584-
const conflictValue = conflictData[columnId]
585-
const normalizedConflictValue =
586-
typeof conflictValue === 'string'
587-
? conflictValue.toLowerCase()
588-
: JSON.stringify(conflictValue)
589-
590-
// Find which batch rows have this conflicting value
591-
for (let i = 0; i < rows.length; i++) {
592-
const rowValue = rows[i][columnId]
593-
if (rowValue === null || rowValue === undefined) continue
594-
595-
const normalizedRowValue =
596-
typeof rowValue === 'string' ? rowValue.toLowerCase() : JSON.stringify(rowValue)
597-
598-
if (normalizedRowValue === normalizedConflictValue) {
599-
// Check if this row already has errors for this column
600-
let rowError = rowErrors.find((e) => e.row === i)
601-
if (!rowError) {
602-
rowError = { row: i, errors: [] }
603-
rowErrors.push(rowError)
604-
}
605582

606-
const errorMsg = `Column "${column.name}" must be unique. Value "${rowValue}" already exists in row ${conflict.position + 1}`
607-
if (!rowError.errors.includes(errorMsg)) {
608-
rowError.errors.push(errorMsg)
583+
const conflictingRows = await ex
584+
.select({
585+
id: userTableRows.id,
586+
data: userTableRows.data,
587+
position: userTableRows.position,
588+
})
589+
.from(userTableRows)
590+
.where(and(eq(userTableRows.tableId, tableId), or(...valueConditions)))
591+
.limit(valueArray.length) // We only need up to one conflict per value
592+
593+
// Map conflicts back to batch rows
594+
for (const conflict of conflictingRows) {
595+
const conflictData = conflict.data as RowData
596+
const conflictValue = conflictData[columnId]
597+
const normalizedConflictValue =
598+
typeof conflictValue === 'string'
599+
? conflictValue.toLowerCase()
600+
: JSON.stringify(conflictValue)
601+
602+
// Find which batch rows have this conflicting value
603+
for (let i = 0; i < rows.length; i++) {
604+
const rowValue = rows[i][columnId]
605+
if (rowValue === null || rowValue === undefined) continue
606+
607+
const normalizedRowValue =
608+
typeof rowValue === 'string' ? rowValue.toLowerCase() : JSON.stringify(rowValue)
609+
610+
if (normalizedRowValue === normalizedConflictValue) {
611+
// Check if this row already has errors for this column
612+
let rowError = rowErrors.find((e) => e.row === i)
613+
if (!rowError) {
614+
rowError = { row: i, errors: [] }
615+
rowErrors.push(rowError)
616+
}
617+
618+
const errorMsg = `Column "${column.name}" must be unique. Value "${rowValue}" already exists in row ${conflict.position + 1}`
619+
if (!rowError.errors.includes(errorMsg)) {
620+
rowError.errors.push(errorMsg)
621+
}
609622
}
610623
}
611624
}
612625
}
613626
}
614627

628+
if (executor === db) {
629+
await withSeqscanOff(async (trx) => checkColumns(trx))
630+
} else {
631+
await executor.execute(sql`SET LOCAL enable_seqscan = off`)
632+
await checkColumns(executor)
633+
}
634+
615635
// Sort errors by row index
616636
rowErrors.sort((a, b) => a.row - b.row)
617637

0 commit comments

Comments
 (0)