From 624db17d2161e314066a16570d64ad209c47295c Mon Sep 17 00:00:00 2001 From: anshuman jaiswal Date: Thu, 21 May 2026 23:24:05 +0530 Subject: [PATCH] Stream large database exports --- src/export/csv.test.ts | 153 ++++++++++++------------- src/export/csv.ts | 87 +++++++++----- src/export/dump.test.ts | 116 +++++++++++-------- src/export/dump.ts | 126 +++++++++++--------- src/export/index.test.ts | 66 ++++++++++- src/export/index.ts | 241 +++++++++++++++++++++++++++++++++++++-- src/export/json.test.ts | 138 +++++++++++----------- src/export/json.ts | 65 +++++++++-- src/handler.ts | 19 ++- 9 files changed, 716 insertions(+), 295 deletions(-) diff --git a/src/export/csv.test.ts b/src/export/csv.test.ts index b186aeb..1790648 100644 --- a/src/export/csv.test.ts +++ b/src/export/csv.test.ts @@ -1,23 +1,11 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' import { exportTableToCsvRoute } from './csv' -import { getTableData, createExportResponse } from './index' -import { createResponse } from '../utils' +import { executeTransaction } from '../operation' import type { DataSource } from '../types' import type { StarbaseDBConfiguration } from '../handler' -vi.mock('./index', () => ({ - getTableData: vi.fn(), - createExportResponse: vi.fn(), -})) - -vi.mock('../utils', () => ({ - createResponse: vi.fn( - (data, message, status) => - new Response(JSON.stringify({ result: data, error: message }), { - status, - headers: { 'Content-Type': 'application/json' }, - }) - ), +vi.mock('../operation', () => ({ + executeTransaction: vi.fn(), })) let mockDataSource: DataSource @@ -27,8 +15,7 @@ beforeEach(() => { vi.clearAllMocks() mockDataSource = { - source: 'external', - external: { dialect: 'sqlite' }, + source: 'internal', rpc: { executeQuery: vi.fn(), }, @@ -42,17 +29,18 @@ beforeEach(() => { }) describe('CSV Export Module', () => { - it('should return a CSV file when table data exists', async () => { - vi.mocked(getTableData).mockResolvedValue([ - { id: 1, name: 'Alice', age: 30 }, - { id: 2, name: 'Bob', age: 25 }, - ]) - - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-csv-content', { - headers: { 'Content-Type': 'text/csv' }, - }) - ) + it('should stream a CSV file when table data exists', async () => { + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'users' }]]) + .mockResolvedValueOnce([ + [{ name: 'id' }, { name: 'name' }, { name: 'age' }], + ]) + .mockResolvedValueOnce([ + [ + { id: 1, name: 'Alice', age: 30 }, + { id: 2, name: 'Bob', age: 25 }, + ], + ]) const response = await exportTableToCsvRoute( 'users', @@ -60,21 +48,17 @@ describe('CSV Export Module', () => { mockConfig ) - expect(getTableData).toHaveBeenCalledWith( - 'users', - mockDataSource, - mockConfig + expect(response.headers.get('Content-Type')).toBe('text/csv') + expect(response.headers.get('Content-Disposition')).toBe( + 'attachment; filename="users_export.csv"' ) - expect(createExportResponse).toHaveBeenCalledWith( - 'id,name,age\n1,Alice,30\n2,Bob,25\n', - 'users_export.csv', - 'text/csv' + expect(await response.text()).toBe( + 'id,name,age\n1,Alice,30\n2,Bob,25\n' ) - expect(response.headers.get('Content-Type')).toBe('text/csv') }) it('should return 404 if table does not exist', async () => { - vi.mocked(getTableData).mockResolvedValue(null) + vi.mocked(executeTransaction).mockResolvedValueOnce([[]]) const response = await exportTableToCsvRoute( 'non_existent_table', @@ -82,11 +66,6 @@ describe('CSV Export Module', () => { mockConfig ) - expect(getTableData).toHaveBeenCalledWith( - 'non_existent_table', - mockDataSource, - mockConfig - ) expect(response.status).toBe(404) const jsonResponse: { error: string } = await response.json() @@ -95,14 +74,11 @@ describe('CSV Export Module', () => { ) }) - it('should handle empty table (return only headers)', async () => { - vi.mocked(getTableData).mockResolvedValue([]) - - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-csv-content', { - headers: { 'Content-Type': 'text/csv' }, - }) - ) + it('should include headers when an existing table has no rows', async () => { + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'empty_table' }]]) + .mockResolvedValueOnce([[{ name: 'id' }, { name: 'name' }]]) + .mockResolvedValueOnce([[]]) const response = await exportTableToCsvRoute( 'empty_table', @@ -110,49 +86,69 @@ describe('CSV Export Module', () => { mockConfig ) - expect(getTableData).toHaveBeenCalledWith( - 'empty_table', + expect(await response.text()).toBe('id,name\n') + }) + + it('should escape commas, quotes, and new lines in CSV values', async () => { + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'special_chars' }]]) + .mockResolvedValueOnce([ + [{ name: 'id' }, { name: 'name' }, { name: 'bio' }], + ]) + .mockResolvedValueOnce([ + [ + { + id: 1, + name: 'Sahithi, is', + bio: 'line one\nwith a "quote"', + }, + ], + ]) + + const response = await exportTableToCsvRoute( + 'special_chars', mockDataSource, mockConfig ) - expect(createExportResponse).toHaveBeenCalledWith( - '', - 'empty_table_export.csv', - 'text/csv' + + expect(await response.text()).toBe( + 'id,name,bio\n1,"Sahithi, is","line one\nwith a ""quote"""\n' ) - expect(response.headers.get('Content-Type')).toBe('text/csv') }) - it('should escape commas and quotes in CSV values', async () => { - vi.mocked(getTableData).mockResolvedValue([ - { id: 1, name: 'Sahithi, is', bio: 'my forever "penguin"' }, - ]) - - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-csv-content', { - headers: { 'Content-Type': 'text/csv' }, - }) - ) + it('should page additional CSV rows only as the stream is read', async () => { + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'users' }]]) + .mockResolvedValueOnce([[{ name: 'id' }, { name: 'name' }]]) + .mockResolvedValueOnce([[{ id: 1, name: 'Alice' }]]) + .mockResolvedValueOnce([[{ id: 2, name: 'Bob' }]]) + .mockResolvedValueOnce([[]]) const response = await exportTableToCsvRoute( - 'special_chars', + 'users', mockDataSource, - mockConfig + mockConfig, + { batchSize: 1 } ) - expect(createExportResponse).toHaveBeenCalledWith( - 'id,name,bio\n1,"Sahithi, is","my forever ""penguin"""\n', - 'special_chars_export.csv', - 'text/csv' - ) - expect(response.headers.get('Content-Type')).toBe('text/csv') + expect(executeTransaction).toHaveBeenCalledTimes(3) + expect(await response.text()).toBe('id,name\n1,Alice\n2,Bob\n') + expect(executeTransaction).toHaveBeenCalledTimes(5) + expect(vi.mocked(executeTransaction).mock.calls[3][0].queries).toEqual([ + { + sql: 'SELECT * FROM "users" LIMIT ? OFFSET ?;', + params: [1, 1], + }, + ]) }) - it('should return 500 on an unexpected error', async () => { + it('should return 500 on an unexpected error before streaming', async () => { const consoleErrorMock = vi .spyOn(console, 'error') .mockImplementation(() => {}) - vi.mocked(getTableData).mockRejectedValue(new Error('Database Error')) + vi.mocked(executeTransaction).mockRejectedValue( + new Error('Database Error') + ) const response = await exportTableToCsvRoute( 'users', @@ -163,5 +159,6 @@ describe('CSV Export Module', () => { expect(response.status).toBe(500) const jsonResponse: { error: string } = await response.json() expect(jsonResponse.error).toBe('Failed to export table to CSV') + consoleErrorMock.mockRestore() }) }) diff --git a/src/export/csv.ts b/src/export/csv.ts index 22a4591..939542c 100644 --- a/src/export/csv.ts +++ b/src/export/csv.ts @@ -1,17 +1,50 @@ -import { getTableData, createExportResponse } from './index' +import { + createExportResponse, + createTextStream, + formatCsvValue, + getTableColumns, + getTableDataPage, + iterateTableRows, + resolveExportBatchSize, + tableExists, + type ExportOptions, + type ExportRow, +} from './index' import { createResponse } from '../utils' -import { DataSource } from '../types' -import { StarbaseDBConfiguration } from '../handler' +import type { DataSource } from '../types' +import type { StarbaseDBConfiguration } from '../handler' + +async function* createCsvExportIterator(opts: { + tableName: string + dataSource: DataSource + config: StarbaseDBConfiguration + batchSize: number + columns: string[] + firstPage: ExportRow[] +}): AsyncGenerator { + const { columns } = opts + + if (columns.length) { + yield columns.map(formatCsvValue).join(',') + '\n' + } + + for await (const row of iterateTableRows(opts)) { + yield columns.map((column) => formatCsvValue(row[column])).join(',') + + '\n' + } +} export async function exportTableToCsvRoute( tableName: string, dataSource: DataSource, - config: StarbaseDBConfiguration + config: StarbaseDBConfiguration, + options: ExportOptions = {} ): Promise { try { - const data = await getTableData(tableName, dataSource, config) + const batchSize = resolveExportBatchSize(options.batchSize) + const exists = await tableExists(tableName, dataSource, config) - if (data === null) { + if (!exists) { return createResponse( undefined, `Table '${tableName}' does not exist.`, @@ -19,33 +52,25 @@ export async function exportTableToCsvRoute( ) } - // Convert the result to CSV - let csvContent = '' - if (data.length > 0) { - // Add headers - csvContent += Object.keys(data[0]).join(',') + '\n' - - // Add data rows - data.forEach((row: any) => { - csvContent += - Object.values(row) - .map((value) => { - if ( - typeof value === 'string' && - (value.includes(',') || - value.includes('"') || - value.includes('\n')) - ) { - return `"${value.replace(/"/g, '""')}"` - } - return value - }) - .join(',') + '\n' - }) - } + const [columnsResult, firstPage] = await Promise.all([ + getTableColumns(tableName, dataSource, config), + getTableDataPage(tableName, dataSource, config, batchSize, 0), + ]) + const columns = columnsResult.length + ? columnsResult + : Object.keys(firstPage[0] ?? {}) return createExportResponse( - csvContent, + createTextStream( + createCsvExportIterator({ + tableName, + dataSource, + config, + batchSize, + columns, + firstPage, + }) + ), `${tableName}_export.csv`, 'text/csv' ) diff --git a/src/export/dump.test.ts b/src/export/dump.test.ts index ca65b43..9e2d6cc 100644 --- a/src/export/dump.test.ts +++ b/src/export/dump.test.ts @@ -1,22 +1,11 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' import { dumpDatabaseRoute } from './dump' -import { executeOperation } from '.' -import { createResponse } from '../utils' +import { executeTransaction } from '../operation' import type { DataSource } from '../types' import type { StarbaseDBConfiguration } from '../handler' -vi.mock('.', () => ({ - executeOperation: vi.fn(), -})) - -vi.mock('../utils', () => ({ - createResponse: vi.fn( - (data, message, status) => - new Response(JSON.stringify({ result: data, error: message }), { - status, - headers: { 'Content-Type': 'application/json' }, - }) - ), +vi.mock('../operation', () => ({ + executeTransaction: vi.fn(), })) let mockDataSource: DataSource @@ -26,8 +15,7 @@ beforeEach(() => { vi.clearAllMocks() mockDataSource = { - source: 'external', - external: { dialect: 'sqlite' }, + source: 'internal', rpc: { executeQuery: vi.fn() }, } as any @@ -40,21 +28,27 @@ beforeEach(() => { describe('Database Dump Module', () => { it('should return a database dump when tables exist', async () => { - vi.mocked(executeOperation) - .mockResolvedValueOnce([{ name: 'users' }, { name: 'orders' }]) + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'users' }, { name: 'orders' }]]) .mockResolvedValueOnce([ - { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }, + [{ sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }], ]) + .mockResolvedValueOnce([[{ name: 'id' }, { name: 'name' }]]) .mockResolvedValueOnce([ - { id: 1, name: 'Alice' }, - { id: 2, name: 'Bob' }, + [ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ], ]) .mockResolvedValueOnce([ - { sql: 'CREATE TABLE orders (id INTEGER, total REAL);' }, + [{ sql: 'CREATE TABLE orders (id INTEGER, total REAL);' }], ]) + .mockResolvedValueOnce([[{ name: 'id' }, { name: 'total' }]]) .mockResolvedValueOnce([ - { id: 1, total: 99.99 }, - { id: 2, total: 49.5 }, + [ + { id: 1, total: 99.99 }, + { id: 2, total: 49.5 }, + ], ]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) @@ -71,17 +65,17 @@ describe('Database Dump Module', () => { expect(dumpText).toContain( 'CREATE TABLE users (id INTEGER, name TEXT);' ) - expect(dumpText).toContain("INSERT INTO users VALUES (1, 'Alice');") - expect(dumpText).toContain("INSERT INTO users VALUES (2, 'Bob');") + expect(dumpText).toContain('INSERT INTO "users" VALUES (1, \'Alice\');') + expect(dumpText).toContain('INSERT INTO "users" VALUES (2, \'Bob\');') expect(dumpText).toContain( 'CREATE TABLE orders (id INTEGER, total REAL);' ) - expect(dumpText).toContain('INSERT INTO orders VALUES (1, 99.99);') - expect(dumpText).toContain('INSERT INTO orders VALUES (2, 49.5);') + expect(dumpText).toContain('INSERT INTO "orders" VALUES (1, 99.99);') + expect(dumpText).toContain('INSERT INTO "orders" VALUES (2, 49.5);') }) - it('should handle empty databases (no tables)', async () => { - vi.mocked(executeOperation).mockResolvedValueOnce([]) + it('should handle empty databases with no tables', async () => { + vi.mocked(executeTransaction).mockResolvedValueOnce([[]]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) @@ -89,17 +83,17 @@ describe('Database Dump Module', () => { expect(response.headers.get('Content-Type')).toBe( 'application/x-sqlite3' ) - const dumpText = await response.text() - expect(dumpText).toBe('SQLite format 3\0') + expect(await response.text()).toBe('SQLite format 3\0') }) it('should handle databases with tables but no data', async () => { - vi.mocked(executeOperation) - .mockResolvedValueOnce([{ name: 'users' }]) + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'users' }]]) .mockResolvedValueOnce([ - { sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }, + [{ sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }], ]) - .mockResolvedValueOnce([]) + .mockResolvedValueOnce([[{ name: 'id' }, { name: 'name' }]]) + .mockResolvedValueOnce([[]]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) @@ -108,31 +102,60 @@ describe('Database Dump Module', () => { expect(dumpText).toContain( 'CREATE TABLE users (id INTEGER, name TEXT);' ) - expect(dumpText).not.toContain('INSERT INTO users VALUES') + expect(dumpText).not.toContain('INSERT INTO "users" VALUES') }) - it('should escape single quotes properly in string values', async () => { - vi.mocked(executeOperation) - .mockResolvedValueOnce([{ name: 'users' }]) + it('should escape single quotes and null values in SQL rows', async () => { + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'users' }]]) .mockResolvedValueOnce([ - { sql: 'CREATE TABLE users (id INTEGER, bio TEXT);' }, + [{ sql: 'CREATE TABLE users (id INTEGER, bio TEXT);' }], ]) - .mockResolvedValueOnce([{ id: 1, bio: "Alice's adventure" }]) + .mockResolvedValueOnce([[{ name: 'id' }, { name: 'bio' }]]) + .mockResolvedValueOnce([[{ id: 1, bio: "Alice's adventure" }]]) const response = await dumpDatabaseRoute(mockDataSource, mockConfig) - expect(response).toBeInstanceOf(Response) const dumpText = await response.text() expect(dumpText).toContain( - "INSERT INTO users VALUES (1, 'Alice''s adventure');" + "INSERT INTO \"users\" VALUES (1, 'Alice''s adventure');" ) }) - it('should return a 500 response when an error occurs', async () => { + it('should page dump rows as the stream is read', async () => { + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'users' }]]) + .mockResolvedValueOnce([ + [{ sql: 'CREATE TABLE users (id INTEGER, name TEXT);' }], + ]) + .mockResolvedValueOnce([[{ name: 'id' }, { name: 'name' }]]) + .mockResolvedValueOnce([[{ id: 1, name: 'Alice' }]]) + .mockResolvedValueOnce([[{ id: 2, name: 'Bob' }]]) + .mockResolvedValueOnce([[]]) + + const response = await dumpDatabaseRoute(mockDataSource, mockConfig, { + batchSize: 1, + }) + + expect(executeTransaction).toHaveBeenCalledTimes(1) + + const dumpText = await response.text() + expect(dumpText).toContain('INSERT INTO "users" VALUES (1, \'Alice\');') + expect(dumpText).toContain('INSERT INTO "users" VALUES (2, \'Bob\');') + expect(executeTransaction).toHaveBeenCalledTimes(6) + expect(vi.mocked(executeTransaction).mock.calls[4][0].queries).toEqual([ + { + sql: 'SELECT * FROM "users" LIMIT ? OFFSET ?;', + params: [1, 1], + }, + ]) + }) + + it('should return a 500 response when an error occurs before streaming', async () => { const consoleErrorMock = vi .spyOn(console, 'error') .mockImplementation(() => {}) - vi.mocked(executeOperation).mockRejectedValue( + vi.mocked(executeTransaction).mockRejectedValue( new Error('Database Error') ) @@ -141,5 +164,6 @@ describe('Database Dump Module', () => { expect(response.status).toBe(500) const jsonResponse: { error: string } = await response.json() expect(jsonResponse.error).toBe('Failed to create database dump') + consoleErrorMock.mockRestore() }) }) diff --git a/src/export/dump.ts b/src/export/dump.ts index 91a2e89..f01bd6a 100644 --- a/src/export/dump.ts +++ b/src/export/dump.ts @@ -1,69 +1,89 @@ -import { executeOperation } from '.' -import { StarbaseDBConfiguration } from '../handler' -import { DataSource } from '../types' +import { + createExportResponse, + createTextStream, + executeOperation, + formatSqlValue, + getExportableTables, + getTableColumns, + iterateTableRows, + quoteSqlIdentifier, + resolveExportBatchSize, + type ExportOptions, +} from '.' +import type { StarbaseDBConfiguration } from '../handler' +import type { DataSource } from '../types' import { createResponse } from '../utils' -export async function dumpDatabaseRoute( - dataSource: DataSource, +async function* createDatabaseDumpIterator(opts: { + tables: string[] + dataSource: DataSource config: StarbaseDBConfiguration -): Promise { - try { - // Get all table names - const tablesResult = await executeOperation( - [{ sql: "SELECT name FROM sqlite_master WHERE type='table';" }], - dataSource, - config - ) + batchSize: number +}): AsyncGenerator { + const { tables, dataSource, config, batchSize } = opts - const tables = tablesResult.map((row: any) => row.name) - let dumpContent = 'SQLite format 3\0' // SQLite file header + yield 'SQLite format 3\0' - // Iterate through all tables - for (const table of tables) { - // Get table schema - const schemaResult = await executeOperation( - [ - { - sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name='${table}';`, - }, - ], - dataSource, - config - ) + for (const table of tables) { + const quotedTable = quoteSqlIdentifier(table) - if (schemaResult.length) { - const schema = schemaResult[0].sql - dumpContent += `\n-- Table: ${table}\n${schema};\n\n` - } + const schemaResult = await executeOperation( + [ + { + sql: `SELECT sql FROM sqlite_master WHERE type='table' AND name=?;`, + params: [table], + }, + ], + dataSource, + config + ) - // Get table data - const dataResult = await executeOperation( - [{ sql: `SELECT * FROM ${table};` }], - dataSource, - config - ) + if (schemaResult.length && typeof schemaResult[0].sql === 'string') { + const schemaSql = schemaResult[0].sql.trim().replace(/;$/, '') + yield `\n-- Table: ${table}\n${schemaSql};\n\n` + } - for (const row of dataResult) { - const values = Object.values(row).map((value) => - typeof value === 'string' - ? `'${value.replace(/'/g, "''")}'` - : value - ) - dumpContent += `INSERT INTO ${table} VALUES (${values.join(', ')});\n` - } + const columns = await getTableColumns(table, dataSource, config) - dumpContent += '\n' + for await (const row of iterateTableRows({ + tableName: table, + dataSource, + config, + batchSize, + })) { + const values = ( + columns.length + ? columns.map((column) => row[column]) + : Object.values(row) + ).map(formatSqlValue) + yield `INSERT INTO ${quotedTable} VALUES (${values.join(', ')});\n` } - // Create a Blob from the dump content - const blob = new Blob([dumpContent], { type: 'application/x-sqlite3' }) + yield '\n' + } +} - const headers = new Headers({ - 'Content-Type': 'application/x-sqlite3', - 'Content-Disposition': 'attachment; filename="database_dump.sql"', - }) +export async function dumpDatabaseRoute( + dataSource: DataSource, + config: StarbaseDBConfiguration, + options: ExportOptions = {} +): Promise { + try { + const batchSize = resolveExportBatchSize(options.batchSize) + const tables = await getExportableTables(dataSource, config) - return new Response(blob, { headers }) + return createExportResponse( + createTextStream( + createDatabaseDumpIterator({ + tables, + dataSource, + config, + batchSize, + }) + ), + 'database_dump.sql', + 'application/x-sqlite3' + ) } catch (error: any) { console.error('Database Dump Error:', error) return createResponse(undefined, 'Failed to create database dump', 500) diff --git a/src/export/index.test.ts b/src/export/index.test.ts index 48de76e..33f430c 100644 --- a/src/export/index.test.ts +++ b/src/export/index.test.ts @@ -1,5 +1,14 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' -import { executeOperation, getTableData, createExportResponse } from './index' +import { + createExportResponse, + executeOperation, + formatCsvValue, + formatSqlValue, + getTableData, + getTableDataPage, + quoteSqlIdentifier, + resolveExportBatchSize, +} from './index' import { executeTransaction } from '../operation' import type { DataSource } from '../types' import type { StarbaseDBConfiguration } from '../handler' @@ -109,6 +118,61 @@ describe('Database Operations Module', () => { }) }) + describe('getTableDataPage', () => { + it('should query a bounded page with a quoted table name', async () => { + vi.mocked(executeTransaction).mockResolvedValueOnce([ + [{ id: 1, name: 'Alice' }], + ]) + + const result = await getTableDataPage( + 'user data', + mockDataSource, + mockConfig, + 100, + 200 + ) + + expect(executeTransaction).toHaveBeenCalledWith({ + queries: [ + { + sql: 'SELECT * FROM "user data" LIMIT ? OFFSET ?;', + params: [100, 200], + }, + ], + isRaw: false, + dataSource: mockDataSource, + config: mockConfig, + }) + expect(result).toEqual([{ id: 1, name: 'Alice' }]) + }) + }) + + describe('export formatting helpers', () => { + it('should clamp batch sizes to safe defaults', () => { + expect(resolveExportBatchSize()).toBe(500) + expect(resolveExportBatchSize('2')).toBe(2) + expect(resolveExportBatchSize(0)).toBe(500) + expect(resolveExportBatchSize(6000)).toBe(5000) + }) + + it('should quote SQL identifiers safely', () => { + expect(quoteSqlIdentifier('users')).toBe('"users"') + expect(quoteSqlIdentifier('weird"name')).toBe('"weird""name"') + }) + + it('should format SQL values safely', () => { + expect(formatSqlValue("Alice's note")).toBe("'Alice''s note'") + expect(formatSqlValue(null)).toBe('NULL') + expect(formatSqlValue(true)).toBe('1') + }) + + it('should format CSV values safely', () => { + expect(formatCsvValue('plain')).toBe('plain') + expect(formatCsvValue('hello, "world"')).toBe('"hello, ""world"""') + expect(formatCsvValue(null)).toBe('') + }) + }) + describe('createExportResponse', () => { it('should create a valid response for a CSV file', () => { const response = createExportResponse( diff --git a/src/export/index.ts b/src/export/index.ts index 9c40119..d94b0ec 100644 --- a/src/export/index.ts +++ b/src/export/index.ts @@ -1,6 +1,15 @@ -import { DataSource } from '../types' +import type { DataSource } from '../types' import { executeTransaction } from '../operation' -import { StarbaseDBConfiguration } from '../handler' +import type { StarbaseDBConfiguration } from '../handler' + +export const DEFAULT_EXPORT_BATCH_SIZE = 500 +export const MAX_EXPORT_BATCH_SIZE = 5000 + +export type ExportOptions = { + batchSize?: number | string +} + +export type ExportRow = Record export async function executeOperation( queries: { sql: string; params?: any[] }[], @@ -43,7 +52,7 @@ export async function getTableData( // Get table data const dataResult = await executeOperation( - [{ sql: `SELECT * FROM ${tableName};` }], + [{ sql: `SELECT * FROM ${quoteSqlIdentifier(tableName)};` }], dataSource, config ) @@ -54,17 +63,235 @@ export async function getTableData( } } +export function resolveExportBatchSize( + batchSize?: ExportOptions['batchSize'] +): number { + const parsed = + typeof batchSize === 'string' + ? Number.parseInt(batchSize, 10) + : batchSize + + if (!parsed || Number.isNaN(parsed) || parsed < 1) { + return DEFAULT_EXPORT_BATCH_SIZE + } + + return Math.min(Math.floor(parsed), MAX_EXPORT_BATCH_SIZE) +} + +export function quoteSqlIdentifier(identifier: string): string { + return `"${identifier.replace(/"/g, '""')}"` +} + +export async function tableExists( + tableName: string, + dataSource: DataSource, + config: StarbaseDBConfiguration +): Promise { + const tableExistsResult = await executeOperation( + [ + { + sql: `SELECT name FROM sqlite_master WHERE type='table' AND name=?;`, + params: [tableName], + }, + ], + dataSource, + config + ) + + return !!tableExistsResult?.length +} + +export async function getExportableTables( + dataSource: DataSource, + config: StarbaseDBConfiguration +): Promise { + const tablesResult = await executeOperation( + [ + { + sql: "SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite_%';", + }, + ], + dataSource, + config + ) + + return tablesResult + .map((row: ExportRow) => row.name) + .filter((name): name is string => typeof name === 'string') +} + +export async function getTableColumns( + tableName: string, + dataSource: DataSource, + config: StarbaseDBConfiguration +): Promise { + const columnResult = await executeOperation( + [ + { + sql: `PRAGMA table_info(${quoteSqlIdentifier(tableName)});`, + }, + ], + dataSource, + config + ) + + return columnResult + .map((row: ExportRow) => row.name) + .filter((name): name is string => typeof name === 'string') +} + +export async function getTableDataPage( + tableName: string, + dataSource: DataSource, + config: StarbaseDBConfiguration, + limit: number, + offset: number +): Promise { + return executeOperation( + [ + { + sql: `SELECT * FROM ${quoteSqlIdentifier(tableName)} LIMIT ? OFFSET ?;`, + params: [limit, offset], + }, + ], + dataSource, + config + ) +} + +export async function* iterateTableRows(opts: { + tableName: string + dataSource: DataSource + config: StarbaseDBConfiguration + batchSize: number + firstPage?: ExportRow[] +}): AsyncGenerator { + const { tableName, dataSource, config, batchSize } = opts + let offset = 0 + let page = opts.firstPage + + while (true) { + if (!page) { + page = await getTableDataPage( + tableName, + dataSource, + config, + batchSize, + offset + ) + } + + if (!page.length) { + return + } + + for (const row of page) { + yield row + } + + offset += page.length + + if (page.length < batchSize) { + return + } + + page = undefined + } +} + +export function createTextStream( + iterator: AsyncGenerator +): ReadableStream { + const encoder = new TextEncoder() + + return new ReadableStream({ + async pull(controller) { + try { + const { value, done } = await iterator.next() + + if (done) { + controller.close() + return + } + + controller.enqueue(encoder.encode(value)) + } catch (error) { + controller.error(error) + } + }, + async cancel() { + await iterator.return?.(undefined) + }, + }) +} + +function bytesToHex(bytes: Uint8Array): string { + return Array.from(bytes, (byte) => byte.toString(16).padStart(2, '0')).join( + '' + ) +} + +export function formatSqlValue(value: unknown): string { + if (value === null || value === undefined) { + return 'NULL' + } + + if (typeof value === 'number') { + return Number.isFinite(value) ? String(value) : 'NULL' + } + + if (typeof value === 'bigint') { + return value.toString() + } + + if (typeof value === 'boolean') { + return value ? '1' : '0' + } + + if (value instanceof ArrayBuffer) { + return `X'${bytesToHex(new Uint8Array(value))}'` + } + + if (ArrayBuffer.isView(value)) { + const bytes = new Uint8Array( + value.buffer, + value.byteOffset, + value.byteLength + ) + return `X'${bytesToHex(bytes)}'` + } + + return `'${String(value).replace(/'/g, "''")}'` +} + +export function formatCsvValue(value: unknown): string { + if (value === null || value === undefined) { + return '' + } + + const output = value instanceof Date ? value.toISOString() : String(value) + + if ( + output.includes(',') || + output.includes('"') || + output.includes('\n') || + output.includes('\r') + ) { + return `"${output.replace(/"/g, '""')}"` + } + + return output +} + export function createExportResponse( - data: any, + data: BodyInit | null, fileName: string, contentType: string ): Response { - const blob = new Blob([data], { type: contentType }) - const headers = new Headers({ 'Content-Type': contentType, 'Content-Disposition': `attachment; filename="${fileName}"`, }) - return new Response(blob, { headers }) + return new Response(data, { headers }) } diff --git a/src/export/json.test.ts b/src/export/json.test.ts index 3fe4a8c..75a3f6b 100644 --- a/src/export/json.test.ts +++ b/src/export/json.test.ts @@ -1,23 +1,11 @@ import { describe, it, expect, vi, beforeEach } from 'vitest' import { exportTableToJsonRoute } from './json' -import { getTableData, createExportResponse } from './index' -import { createResponse } from '../utils' +import { executeTransaction } from '../operation' import type { DataSource } from '../types' import type { StarbaseDBConfiguration } from '../handler' -vi.mock('./index', () => ({ - getTableData: vi.fn(), - createExportResponse: vi.fn(), -})) - -vi.mock('../utils', () => ({ - createResponse: vi.fn( - (data, message, status) => - new Response(JSON.stringify({ result: data, error: message }), { - status, - headers: { 'Content-Type': 'application/json' }, - }) - ), +vi.mock('../operation', () => ({ + executeTransaction: vi.fn(), })) let mockDataSource: DataSource @@ -27,8 +15,7 @@ beforeEach(() => { vi.clearAllMocks() mockDataSource = { - source: 'external', - external: { dialect: 'sqlite' }, + source: 'internal', rpc: { executeQuery: vi.fn() }, } as any @@ -41,7 +28,7 @@ beforeEach(() => { describe('JSON Export Module', () => { it('should return a 404 response if table does not exist', async () => { - vi.mocked(getTableData).mockResolvedValue(null) + vi.mocked(executeTransaction).mockResolvedValueOnce([[]]) const response = await exportTableToJsonRoute( 'missing_table', @@ -54,18 +41,15 @@ describe('JSON Export Module', () => { expect(jsonResponse.error).toBe("Table 'missing_table' does not exist.") }) - it('should return a JSON file when table data exists', async () => { - const mockData = [ - { id: 1, name: 'Alice' }, - { id: 2, name: 'Bob' }, - ] - vi.mocked(getTableData).mockResolvedValue(mockData) - - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-json-content', { - headers: { 'Content-Type': 'application/json' }, - }) - ) + it('should stream a JSON file when table data exists', async () => { + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'users' }]]) + .mockResolvedValueOnce([ + [ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ], + ]) const response = await exportTableToJsonRoute( 'users', @@ -73,27 +57,22 @@ describe('JSON Export Module', () => { mockConfig ) - expect(getTableData).toHaveBeenCalledWith( - 'users', - mockDataSource, - mockConfig - ) - expect(createExportResponse).toHaveBeenCalledWith( - JSON.stringify(mockData, null, 4), - 'users_export.json', - 'application/json' - ) expect(response.headers.get('Content-Type')).toBe('application/json') + expect(response.headers.get('Content-Disposition')).toBe( + 'attachment; filename="users_export.json"' + ) + + const jsonResponse = JSON.parse(await response.text()) + expect(jsonResponse).toEqual([ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ]) }) it('should return an empty JSON array when table has no data', async () => { - vi.mocked(getTableData).mockResolvedValue([]) - - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-json-content', { - headers: { 'Content-Type': 'application/json' }, - }) - ) + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'empty_table' }]]) + .mockResolvedValueOnce([[]]) const response = await exportTableToJsonRoute( 'empty_table', @@ -101,12 +80,44 @@ describe('JSON Export Module', () => { mockConfig ) - expect(createExportResponse).toHaveBeenCalledWith( - '[]', - 'empty_table_export.json', - 'application/json' + expect(await response.text()).toBe('[]') + }) + + it('should page additional JSON rows only as the stream is read', async () => { + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'users' }]]) + .mockResolvedValueOnce([[{ id: 1, name: 'Alice' }]]) + .mockResolvedValueOnce([[{ id: 2, name: 'Bob' }]]) + .mockResolvedValueOnce([[]]) + + const response = await exportTableToJsonRoute( + 'users', + mockDataSource, + mockConfig, + { batchSize: 1 } ) - expect(response.headers.get('Content-Type')).toBe('application/json') + + expect(executeTransaction).toHaveBeenCalledTimes(2) + + const jsonResponse = JSON.parse(await response.text()) + + expect(jsonResponse).toEqual([ + { id: 1, name: 'Alice' }, + { id: 2, name: 'Bob' }, + ]) + expect(executeTransaction).toHaveBeenCalledTimes(4) + expect(vi.mocked(executeTransaction).mock.calls[2][0].queries).toEqual([ + { + sql: 'SELECT * FROM "users" LIMIT ? OFFSET ?;', + params: [1, 1], + }, + ]) + expect(vi.mocked(executeTransaction).mock.calls[3][0].queries).toEqual([ + { + sql: 'SELECT * FROM "users" LIMIT ? OFFSET ?;', + params: [1, 2], + }, + ]) }) it('should escape special characters in JSON properly', async () => { @@ -114,13 +125,10 @@ describe('JSON Export Module', () => { { id: 1, name: 'Sahithi "The Best"' }, { id: 2, description: 'New\nLine' }, ] - vi.mocked(getTableData).mockResolvedValue(specialCharsData) - vi.mocked(createExportResponse).mockReturnValue( - new Response('mocked-json-content', { - headers: { 'Content-Type': 'application/json' }, - }) - ) + vi.mocked(executeTransaction) + .mockResolvedValueOnce([[{ name: 'special_chars' }]]) + .mockResolvedValueOnce([specialCharsData]) const response = await exportTableToJsonRoute( 'special_chars', @@ -128,19 +136,16 @@ describe('JSON Export Module', () => { mockConfig ) - expect(createExportResponse).toHaveBeenCalledWith( - JSON.stringify(specialCharsData, null, 4), - 'special_chars_export.json', - 'application/json' - ) - expect(response.headers.get('Content-Type')).toBe('application/json') + expect(JSON.parse(await response.text())).toEqual(specialCharsData) }) - it('should return a 500 response when an error occurs', async () => { + it('should return a 500 response when an error occurs before streaming', async () => { const consoleErrorMock = vi .spyOn(console, 'error') .mockImplementation(() => {}) - vi.mocked(getTableData).mockRejectedValue(new Error('Database Error')) + vi.mocked(executeTransaction).mockRejectedValue( + new Error('Database Error') + ) const response = await exportTableToJsonRoute( 'users', @@ -151,5 +156,6 @@ describe('JSON Export Module', () => { expect(response.status).toBe(500) const jsonResponse = (await response.json()) as { error: string } expect(jsonResponse.error).toBe('Failed to export table to JSON') + consoleErrorMock.mockRestore() }) }) diff --git a/src/export/json.ts b/src/export/json.ts index c0ab811..d65e17e 100644 --- a/src/export/json.ts +++ b/src/export/json.ts @@ -1,17 +1,51 @@ -import { getTableData, createExportResponse } from './index' +import { + createExportResponse, + createTextStream, + iterateTableRows, + getTableDataPage, + resolveExportBatchSize, + tableExists, + type ExportOptions, + type ExportRow, +} from './index' import { createResponse } from '../utils' -import { DataSource } from '../types' -import { StarbaseDBConfiguration } from '../handler' +import type { DataSource } from '../types' +import type { StarbaseDBConfiguration } from '../handler' + +async function* createJsonExportIterator(opts: { + tableName: string + dataSource: DataSource + config: StarbaseDBConfiguration + batchSize: number + firstPage: ExportRow[] +}): AsyncGenerator { + let hasRows = false + + yield '[' + + for await (const row of iterateTableRows(opts)) { + yield `${hasRows ? ',' : ''}\n ${JSON.stringify(row)}` + hasRows = true + } + + if (hasRows) { + yield '\n' + } + + yield ']' +} export async function exportTableToJsonRoute( tableName: string, dataSource: DataSource, - config: StarbaseDBConfiguration + config: StarbaseDBConfiguration, + options: ExportOptions = {} ): Promise { try { - const data = await getTableData(tableName, dataSource, config) + const batchSize = resolveExportBatchSize(options.batchSize) + const exists = await tableExists(tableName, dataSource, config) - if (data === null) { + if (!exists) { return createResponse( undefined, `Table '${tableName}' does not exist.`, @@ -19,11 +53,24 @@ export async function exportTableToJsonRoute( ) } - // Convert the result to JSON - const jsonData = JSON.stringify(data, null, 4) + const firstPage = await getTableDataPage( + tableName, + dataSource, + config, + batchSize, + 0 + ) return createExportResponse( - jsonData, + createTextStream( + createJsonExportIterator({ + tableName, + dataSource, + config, + batchSize, + firstPage, + }) + ), `${tableName}_export.json`, 'application/json' ) diff --git a/src/handler.ts b/src/handler.ts index 3fa0085..4946e79 100644 --- a/src/handler.ts +++ b/src/handler.ts @@ -9,6 +9,7 @@ import { createResponse, QueryRequest, QueryTransactionRequest } from './utils' import { dumpDatabaseRoute } from './export/dump' import { exportTableToJsonRoute } from './export/json' import { exportTableToCsvRoute } from './export/csv' +import { resolveExportBatchSize } from './export' import { importDumpRoute } from './import/dump' import { importTableFromJsonRoute } from './import/json' import { importTableFromCsvRoute } from './import/csv' @@ -120,8 +121,10 @@ export class StarbaseDB { } if (this.getFeature('export')) { - this.app.get('/export/dump', this.isInternalSource, async () => { - return dumpDatabaseRoute(this.dataSource, this.config) + this.app.get('/export/dump', this.isInternalSource, async (c) => { + return dumpDatabaseRoute(this.dataSource, this.config, { + batchSize: resolveExportBatchSize(c.req.query('batchSize')), + }) }) this.app.get( @@ -130,10 +133,14 @@ export class StarbaseDB { this.hasTableName, async (c) => { const tableName = c.req.valid('param').tableName + const batchSize = resolveExportBatchSize( + c.req.query('batchSize') + ) return exportTableToJsonRoute( tableName, this.dataSource, - this.config + this.config, + { batchSize } ) } ) @@ -144,10 +151,14 @@ export class StarbaseDB { this.hasTableName, async (c) => { const tableName = c.req.valid('param').tableName + const batchSize = resolveExportBatchSize( + c.req.query('batchSize') + ) return exportTableToCsvRoute( tableName, this.dataSource, - this.config + this.config, + { batchSize } ) } )