From cb48895bd204ac47494c6eee2f697334bd0b1157 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 3 Jun 2026 12:09:43 +0100 Subject: [PATCH 01/11] wip: spatialite wasm --- .cargo/config.toml | 13 + .github/workflows/build.yaml | 9 + .github/workflows/publish.yaml | 9 + .github/workflows/release-packages.yml | 13 + Cargo.lock | 4 +- Cargo.toml | 5 +- ggsql-wasm/Cargo.toml | 4 +- ggsql-wasm/build-wasm.sh | 41 ++- ggsql-wasm/demo/build.mjs | 8 + ggsql-wasm/demo/package-lock.json | 12 + ggsql-wasm/demo/src/context.ts | 25 +- ggsql-wasm/demo/src/examples.ts | 23 ++ ggsql-wasm/demo/src/main.ts | 31 +- ggsql-wasm/library/package.json | 1 + ggsql-wasm/library/src/extensions.ts | 346 ++++++++++++++++++ ggsql-wasm/library/src/index.ts | 3 + ggsql-wasm/src/lib.rs | 64 +++- src/reader/sqlite.rs | 22 +- .../bindings/rust/wasm-sysroot/src/stdio.c | 9 + .../bindings/rust/wasm-sysroot/src/stdlib.c | 4 + .../bindings/rust/wasm-sysroot/src/string.c | 8 + .../bindings/rust/wasm-sysroot/src/wctype.c | 3 + 22 files changed, 617 insertions(+), 40 deletions(-) create mode 100644 ggsql-wasm/library/src/extensions.ts diff --git a/.cargo/config.toml b/.cargo/config.toml index 96805a81d..18c384acb 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -4,4 +4,17 @@ LIBSQLITE3_FLAGS = "-DSQLITE_ENABLE_MATH_FUNCTIONS" [target.wasm32-unknown-unknown] rustflags = [ "-C", "link-args=-z stack-size=16777216", + "-C", "link-args=--export-table --growable-table", + "-C", "link-args=--export=acos --export=asin --export=atan --export=atan2 --export=cos --export=exp --export=fmod --export=log --export=pow --export=sin --export=tan", + "-C", "link-args=--export=rust_sqlite_wasm_malloc --export=rust_sqlite_wasm_free --export=rust_sqlite_wasm_realloc --export=rust_sqlite_wasm_localtime --export=rust_sqlite_wasm_errno_location", + "-C", "link-args=--export=rust_sqlite_wasm_atoi --export=rust_sqlite_wasm_strtod --export=rust_sqlite_wasm_strtol --export=rust_sqlite_wasm_bsearch --export=rust_sqlite_wasm_qsort", + "-C", "link-args=--export=rust_sqlite_wasm_strcat --export=rust_sqlite_wasm_strchr --export=rust_sqlite_wasm_strcmp --export=rust_sqlite_wasm_strcpy --export=rust_sqlite_wasm_strlen --export=rust_sqlite_wasm_strncmp", + "-C", "link-args=--export=sqlite3_open_v2 --export=sqlite3_close --export=sqlite3_exec --export=sqlite3_prepare_v2 --export=sqlite3_step --export=sqlite3_reset --export=sqlite3_finalize", + "-C", "link-args=--export=sqlite3_bind_blob --export=sqlite3_bind_double --export=sqlite3_bind_int --export=sqlite3_bind_int64 --export=sqlite3_bind_null --export=sqlite3_bind_text", + "-C", "link-args=--export=sqlite3_column_blob --export=sqlite3_column_bytes --export=sqlite3_column_count --export=sqlite3_column_double --export=sqlite3_column_int64 --export=sqlite3_column_text --export=sqlite3_column_type", + "-C", "link-args=--export=sqlite3_value_double --export=sqlite3_value_int64 --export=sqlite3_value_type --export=sqlite3_result_double --export=sqlite3_result_int --export=sqlite3_result_null", + "-C", "link-args=--export=sqlite3_create_function --export=sqlite3_errmsg --export=sqlite3_config --export=sqlite3_initialize --export=sqlite3_last_insert_rowid --export=sqlite3_test_control", + "-C", "link-args=--export=sqlite3_malloc --export=sqlite3_free --export=sqlite3_mprintf --export=sqlite3_snprintf --export=sqlite3_vsnprintf --export=sqlite3_get_table --export=sqlite3_free_table", + "-C", "link-args=--export=sqlite3_libversion --export=sqlite3_libversion_number --export=sqlite3_uri_int64", + "-C", "link-args=--export=sqlite3_vfs_find --export=sqlite3_vfs_register --export=sqlite3_vfs_unregister", ] diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 3d762b62a..0c890eac6 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -84,3 +84,12 @@ jobs: - name: Build WASM package working-directory: ggsql-wasm run: wasm-pack build --target web --profile wasm --no-opt + + # TODO: drop once rustwasm/wasm-pack#1092 is resolved. + - name: Rebuild wasm bindings with --keep-lld-exports + run: | + wb=$(find "$HOME/.cache/.wasm-pack" "$HOME/Library/Caches/.wasm-pack" -name wasm-bindgen -type f 2>/dev/null | sort -V | tail -1 || true) + : "${wb:?cached wasm-bindgen not found}" + "$wb" --target web --keep-lld-exports \ + --out-dir ggsql-wasm/pkg \ + target/wasm32-unknown-unknown/wasm/ggsql_wasm.wasm diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index a8234ff95..10de20fa6 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -74,6 +74,15 @@ jobs: working-directory: ggsql-wasm run: wasm-pack build --target web --profile wasm --no-opt + # TODO: drop once rustwasm/wasm-pack#1092 is resolved. + - name: Rebuild wasm bindings with --keep-lld-exports + run: | + wb=$(find "$HOME/.cache/.wasm-pack" "$HOME/Library/Caches/.wasm-pack" -name wasm-bindgen -type f 2>/dev/null | sort -V | tail -1 || true) + : "${wb:?cached wasm-bindgen not found}" + "$wb" --target web --keep-lld-exports \ + --out-dir ggsql-wasm/pkg \ + target/wasm32-unknown-unknown/wasm/ggsql_wasm.wasm + - name: Optimise WASM binary working-directory: ggsql-wasm run: wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz --all-features diff --git a/.github/workflows/release-packages.yml b/.github/workflows/release-packages.yml index 7a22cc198..66fe41d8b 100644 --- a/.github/workflows/release-packages.yml +++ b/.github/workflows/release-packages.yml @@ -495,10 +495,23 @@ jobs: working-directory: ggsql-wasm run: wasm-pack build --target web --profile wasm --no-opt + # TODO: drop once rustwasm/wasm-pack#1092 is resolved. + - name: Rebuild wasm bindings with --keep-lld-exports + run: | + wb=$(find "$HOME/.cache/.wasm-pack" "$HOME/Library/Caches/.wasm-pack" -name wasm-bindgen -type f 2>/dev/null | sort -V | tail -1 || true) + : "${wb:?cached wasm-bindgen not found}" + "$wb" --target web --keep-lld-exports \ + --out-dir ggsql-wasm/pkg \ + target/wasm32-unknown-unknown/wasm/ggsql_wasm.wasm + - name: Optimise WASM binary working-directory: ggsql-wasm run: wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz --all-features + - name: Add snippets/ to package files + working-directory: ggsql-wasm/pkg + run: npm pkg set 'files[]=snippets/' + - name: Create npm tarball working-directory: ggsql-wasm/pkg run: npm pack diff --git a/Cargo.lock b/Cargo.lock index 2dbe4d44b..1b58ff27a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4048,9 +4048,7 @@ dependencies = [ [[package]] name = "sqlite-wasm-rs" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b2c760607300407ddeaee518acf28c795661b7108c75421303dbefb237d3a36" +version = "0.5.5" dependencies = [ "cc", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index c949a0df4..5ecd7f28d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ arrow = { version = "58", default-features = false } duckdb = { version = "~1.10502", features = ["bundled", "vtab-arrow"] } parquet = { version = "58", default-features = false, features = ["arrow", "snap"] } bytes = "1" -rusqlite = { version = "0.38", features = ["bundled", "chrono"] } +rusqlite = { version = "0.38", features = ["bundled", "chrono", "load_extension"] } # ODBC toml_edit = "0.22" @@ -80,3 +80,6 @@ strip = true inherits = "release" opt-level = "z" panic = "abort" + +[patch.crates-io] +sqlite-wasm-rs = { path = "../sqlite-wasm-rs" } diff --git a/ggsql-wasm/Cargo.toml b/ggsql-wasm/Cargo.toml index 1ce69e2fd..ca67029c4 100644 --- a/ggsql-wasm/Cargo.toml +++ b/ggsql-wasm/Cargo.toml @@ -16,7 +16,7 @@ wasm-bindgen = "0.2" wasm-bindgen-futures = "0.4" js-sys = "0.3" arrow = { workspace = true } -ggsql = { path = "../src", default-features = false, features = ["vegalite", "sqlite", "builtin-data"] } +ggsql = { path = "../src", default-features = false, features = ["vegalite", "sqlite", "builtin-data", "spatial"] } serde_json = "1" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] @@ -24,7 +24,7 @@ tokio = { version = "1.35", features = ["full"] } [target.'cfg(target_arch = "wasm32")'.dependencies] tokio = { version = "1.35", default-features = false } -sqlite-wasm-rs = "0.5.2" +sqlite-wasm-rs = { version = "0.5.2", features = ["loadable-extensions"] } # Transitive dep feature overrides for wasm32-unknown-unknown. # Cargo's feature unification activates these on the transitive deps. # - getrandom: pulled in by arrow (via ahash/const-random), needs "js" for wasm diff --git a/ggsql-wasm/build-wasm.sh b/ggsql-wasm/build-wasm.sh index afd5c3c02..fc480ae89 100755 --- a/ggsql-wasm/build-wasm.sh +++ b/ggsql-wasm/build-wasm.sh @@ -22,24 +22,53 @@ check_wasm32_support() { echo "Install an LLVM/clang toolchain with wasm backend support (e.g. 'sudo apt-get install llvm' on Debian/Ubuntu)." >&2 exit 1 fi + if ! command -v wasm-pack >/dev/null 2>&1; then + echo "Error: wasm-pack not found. Install with: cargo install wasm-pack" >&2 + exit 1 + fi } echo "Building WASM library..." (cd "$SCRIPT_DIR/library" && npm install && npm run build) +SQLITE_WASM_RS="${SQLITE_WASM_RS:-$REPO_ROOT/../sqlite-wasm-rs}" +if [ -d "$SQLITE_WASM_RS/loadable_extensions" ]; then + echo "Building loadable extensions..." + make -C "$SQLITE_WASM_RS/loadable_extensions" +fi + if [ "$SKIP_BINARY" = false ]; then - echo "Checking wasm32 compiler support..." + echo "Checking wasm build prerequisites..." check_wasm32_support echo "Building WASM binary..." + rm -rf "$SCRIPT_DIR/pkg" # start clean so stale wasm-bindgen snippets don't accumulate (cd "$SCRIPT_DIR" && wasm-pack build --target web --profile wasm --no-opt) + # wasm-bindgen is invoked directly so we can pass --keep-lld-exports, + # which preserves the LLD symbols that loadable extensions import. + # wasm-pack cannot forward that flag (rustwasm/wasm-pack#1092). + echo "Re-running wasm-bindgen with --keep-lld-exports..." + WASM_BINDGEN="$(find "$HOME/Library/Caches/.wasm-pack" "$HOME/.cache/.wasm-pack" -name wasm-bindgen -type f 2>/dev/null | sort -V | tail -1 || true)" + if [ -z "$WASM_BINDGEN" ]; then + echo "Error: could not locate wasm-pack's cached wasm-bindgen." >&2 + exit 1 + fi + "$WASM_BINDGEN" \ + --target web \ + --keep-lld-exports \ + --out-dir "$SCRIPT_DIR/pkg" \ + "$REPO_ROOT/target/wasm32-unknown-unknown/wasm/ggsql_wasm.wasm" + if [ "$SKIP_OPT" = false ]; then echo "Optimising WASM binary..." (cd "$SCRIPT_DIR" && wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz --all-features) else echo "Skipping wasm-opt (--skip-opt)." fi + + echo "Adding snippets/ to package files..." + (cd "$SCRIPT_DIR/pkg" && npm pkg set 'files[]=snippets/') else echo "Skipping WASM binary build (--skip-binary)." fi @@ -47,6 +76,16 @@ fi echo "Building WASM demo and Quarto integration..." (cd "$SCRIPT_DIR/demo" && npm install && npm run build) +for wasm in "$SQLITE_WASM_RS"/loadable_extensions/*/test_ext.wasm \ + "$SQLITE_WASM_RS"/loadable_extensions/*/mod_spatialite.wasm; do + if [ -f "$wasm" ]; then + name="$(basename "$wasm")" + echo "Copying $name..." + cp "$wasm" "$SCRIPT_DIR/pkg/" + cp "$wasm" "$SCRIPT_DIR/demo/dist/" 2>/dev/null || true + fi +done + echo "Copying output to doc/wasm..." rm -rf "$REPO_ROOT/doc/wasm" cp -r "$SCRIPT_DIR/demo/dist" "$REPO_ROOT/doc/wasm" diff --git a/ggsql-wasm/demo/build.mjs b/ggsql-wasm/demo/build.mjs index 8b4cc8c2c..d41fed513 100644 --- a/ggsql-wasm/demo/build.mjs +++ b/ggsql-wasm/demo/build.mjs @@ -25,6 +25,14 @@ copyFileSync( join(__dirname, "../../ggsql-vscode/syntaxes/ggsql.tmLanguage.json"), join(distDir, "ggsql.tmLanguage.json"), ); +for (const ext of ["test_ext", "mod_spatialite"]) { + try { + copyFileSync( + join(__dirname, `../pkg/${ext}.wasm`), + join(distDir, `${ext}.wasm`), + ); + } catch (_) {} +} // Build Monaco editor web worker console.log("Building Monaco editor worker..."); diff --git a/ggsql-wasm/demo/package-lock.json b/ggsql-wasm/demo/package-lock.json index 86e043c17..4a6437e5b 100644 --- a/ggsql-wasm/demo/package-lock.json +++ b/ggsql-wasm/demo/package-lock.json @@ -22,6 +22,18 @@ "vscode-textmate": "^9.3.0" } }, + "../library": { + "name": "ggsql-wasm-lib", + "version": "0.0.0", + "extraneous": true, + "dependencies": { + "hyparquet": "^1.25.0" + }, + "devDependencies": { + "esbuild": "^0.27.0", + "typescript": "^5.9.0" + } + }, "../pkg": { "name": "ggsql-wasm", "version": "0.3.3", diff --git a/ggsql-wasm/demo/src/context.ts b/ggsql-wasm/demo/src/context.ts index e895418f1..465622cdf 100644 --- a/ggsql-wasm/demo/src/context.ts +++ b/ggsql-wasm/demo/src/context.ts @@ -1,4 +1,8 @@ -import init, { GgsqlContext } from "ggsql-wasm"; +import init, { + GgsqlContext, + initExtensionLoader, + installExtension, +} from "ggsql-wasm"; import { WASM_BASE } from "./wasmBase"; export class WasmContextManager { @@ -8,11 +12,28 @@ export class WasmContextManager { async initialize(): Promise { if (this.initialized) return; - await init(WASM_BASE + "ggsql_wasm_bg.wasm"); + const wasmExports = await init(WASM_BASE + "ggsql_wasm_bg.wasm"); + initExtensionLoader(wasmExports); this.context = new GgsqlContext(); this.initialized = true; } + async installExtension(name: string, url: string): Promise { + await installExtension(name, url); + } + + loadExtension(name: string): void { + this.getContext().load_extension(name, undefined); + // SpatiaLite needs its spatial_ref_sys table populated before functions + // like ST_Transform can resolve SRIDs. Initialise it once on first load. + if (name === "mod_spatialite" && !this.spatialMetadataReady) { + this.spatialMetadataReady = true; + this.getContext().execute_sql("SELECT InitSpatialMetaData(1)"); + } + } + + private spatialMetadataReady = false; + private getContext(): GgsqlContext { if (!this.context) { throw new Error("Context not initialized. Call initialize() first."); diff --git a/ggsql-wasm/demo/src/examples.ts b/ggsql-wasm/demo/src/examples.ts index 61b9acba1..8e7dc5b2c 100644 --- a/ggsql-wasm/demo/src/examples.ts +++ b/ggsql-wasm/demo/src/examples.ts @@ -2,6 +2,7 @@ export interface Example { name: string; query: string; section: string; + loadExtension?: string; } export const examples: Example[] = [ @@ -211,4 +212,26 @@ VISUALISE DRAW point MAPPING bill_len AS x, bill_dep AS y, body_mass AS size LABEL title => 'Penguin Measurements', x => 'Bill Length (mm)', y => 'Bill Depth (mm)'`, }, + + // === Extensions === + { + section: "Extensions", + name: "Wasm Extension", + query: `-- Loaded from test_ext.wasm via the SQLite extension API +SELECT test_ext_hello() AS greeting`, + loadExtension: "test_ext", + }, + { + section: "Extensions", + name: "SpatiaLite", + query: `SELECT + spatialite_version() AS spatialite, + geos_version() AS geos, + proj_version() AS proj, + -- PROJ: reproject London (WGS84) to Web Mercator (metres) + ST_AsText(ST_Transform(MakePoint(-0.1276, 51.5074, 4326), 3857)) AS london_web_mercator, + -- GEOS: area of a 1 km buffer around the projected point + Round(ST_Area(ST_Buffer(ST_Transform(MakePoint(-0.1276, 51.5074, 4326), 3857), 1000)), 1) AS buffer_area_m2`, + loadExtension: "mod_spatialite", + }, ]; diff --git a/ggsql-wasm/demo/src/main.ts b/ggsql-wasm/demo/src/main.ts index 30cffda44..2f8fb43a1 100644 --- a/ggsql-wasm/demo/src/main.ts +++ b/ggsql-wasm/demo/src/main.ts @@ -5,6 +5,7 @@ import { WasmContextManager } from "./context"; import { EditorManager } from "./editor"; import { TableManager } from "./tableManager"; import { examples } from "./examples"; +import { WASM_BASE } from "./wasmBase"; // State const contextManager = new WasmContextManager(); @@ -156,8 +157,19 @@ function initializeExamples() { button.className = "example-button"; button.textContent = example.name; button.onclick = () => { + if (example.loadExtension) { + try { + contextManager.loadExtension(example.loadExtension); + console.log(`[ext] load_extension("${example.loadExtension}") succeeded`); + } catch (e: any) { + console.error(`[ext] load_extension("${example.loadExtension}") FAILED:`, e); + if (!e.toString().includes("already loaded")) { + showProblems([`Extension load error: ${e}`], []); + return; + } + } + } editorManager.setValue(example.query); - //executeQuery(example.query); }; examplesList.appendChild(button); }); @@ -186,7 +198,17 @@ function initializeMobileExamples() { select.addEventListener("change", () => { const idx = parseInt(select.value, 10); if (!isNaN(idx) && examples[idx]) { - editorManager.setValue(examples[idx].query); + const example = examples[idx]; + if (example.loadExtension) { + try { + contextManager.loadExtension(example.loadExtension); + console.log(`[ext] load_extension("${example.loadExtension}") succeeded`); + } catch (e: any) { + console.error(`[ext] load_extension("${example.loadExtension}") FAILED:`, e); + return; + } + } + editorManager.setValue(example.query); } }); } @@ -200,6 +222,11 @@ async function main() { setStatus("Loading builtin datasets...", "loading"); await contextManager.registerBuiltinDatasets(); + // Install extensions (fetch + compile, but don't load into SQLite yet) + setStatus("Installing extensions...", "loading"); + await contextManager.installExtension("test_ext", WASM_BASE + "test_ext.wasm"); + await contextManager.installExtension("mod_spatialite", WASM_BASE + "mod_spatialite.wasm"); + setStatus("Initializing editor...", "loading"); await editorManager.initialize(editorContainer, examples[0].query); diff --git a/ggsql-wasm/library/package.json b/ggsql-wasm/library/package.json index d3ddd51ce..884cb3d02 100644 --- a/ggsql-wasm/library/package.json +++ b/ggsql-wasm/library/package.json @@ -3,6 +3,7 @@ "version": "0.0.0", "private": true, "type": "module", + "main": "dist/lib.js", "scripts": { "build": "node build.mjs", "dev": "node build.mjs --watch", diff --git a/ggsql-wasm/library/src/extensions.ts b/ggsql-wasm/library/src/extensions.ts new file mode 100644 index 000000000..9c5ee6e24 --- /dev/null +++ b/ggsql-wasm/library/src/extensions.ts @@ -0,0 +1,346 @@ +// Wasm Exception Handling proposal types +declare global { + namespace WebAssembly { + interface Tag {} + const Tag: { new (descriptor: { parameters: ValueType[] }): Tag }; + interface Exception {} + const Exception: { + new (tag: Tag, payload: unknown[], options?: { traceStack?: boolean }): Exception; + }; + } +} + +interface LoadedExtension { + instance: WebAssembly.Instance; + exports: Record; +} + +const registry = new Map(); +let lastError: string | null = null; +let nextHandle = 1; +const handleMap = new Map(); + +let sharedMemory: WebAssembly.Memory | null = null; +let sharedTable: WebAssembly.Table | null = null; +let hostExports: WebAssembly.Exports | null = null; + +export function initExtensionLoader(wasmExports: WebAssembly.Exports): void { + hostExports = wasmExports; + sharedMemory = wasmExports.memory as WebAssembly.Memory; + sharedTable = wasmExports.__indirect_function_table as WebAssembly.Table; + + if (!sharedMemory) throw new Error("Main module does not export 'memory'"); + if (!sharedTable) throw new Error("Main module does not export '__indirect_function_table'"); + + (globalThis as any).__sqlite_ext = { + dlOpen, + dlSym, + dlClose, + dlError, + }; +} + +export async function installExtension( + name: string, + wasmSource: BufferSource | Response | string, +): Promise { + if (!sharedMemory || !sharedTable || !hostExports) { + throw new Error("Call initExtensionLoader() before installExtension()"); + } + + let bytes: ArrayBuffer; + if (typeof wasmSource === "string") { + const response = await fetch(wasmSource); + if (!response.ok) throw new Error(`Failed to fetch extension: ${response.status}`); + bytes = await response.arrayBuffer(); + } else if (wasmSource instanceof Response) { + bytes = await wasmSource.arrayBuffer(); + } else { + bytes = wasmSource instanceof ArrayBuffer ? wasmSource : (wasmSource as Uint8Array).buffer; + } + + const extModule = await WebAssembly.compile(bytes); + + const currentBytes = sharedMemory.buffer.byteLength; + const extraPages = Math.ceil(bytes.byteLength / 65536) + 1; + sharedMemory.grow(extraPages); + const memBase = currentBytes; + + const stackTop = memBase + extraPages * 65536; + sharedMemory.grow(1); + // Small scratch area for __wasm_lpad_context (3 x i32 = 12 bytes) + const lpadContextAddr = stackTop + 65536 - 64; + + const moduleExportDescs = WebAssembly.Module.exports(extModule); + + // Determine how many table slots the extension needs by inspecting its + // element segments. The wasm binary encodes table entries for all functions + // that may be called indirectly — far more than just the exported ones. + const tableSlots = countElementSegmentEntries(new Uint8Array(bytes)); + const tableBase = sharedTable.length; + sharedTable.grow(tableSlots + 64); + + const imports: WebAssembly.Imports = { + env: { + memory: sharedMemory, + __indirect_function_table: sharedTable, + __memory_base: new WebAssembly.Global({ value: "i32", mutable: false }, memBase), + __table_base: new WebAssembly.Global({ value: "i32", mutable: false }, tableBase), + __stack_pointer: new WebAssembly.Global({ value: "i32", mutable: true }, stackTop), + }, + }; + + // Know which functions the extension itself exports (before instantiation). + // PIC --shared modules both import AND export the same symbols — env imports + // are for direct calls, GOT is for indirect. We use lazy trampolines so that + // direct calls to self-defined symbols bounce to the extension's own export. + const extExportNames = new Set( + moduleExportDescs.filter((e) => e.kind === "function").map((e) => e.name), + ); + let extInstance: WebAssembly.Instance | null = null; + let cppExceptionTag: WebAssembly.Tag | null = null; + + const moduleImportDescs = WebAssembly.Module.imports(extModule); + for (const imp of moduleImportDescs) { + if (imp.module === "env" && imp.name in (imports.env as Record)) { + continue; + } + + if (imp.module === "env" && imp.kind === "function") { + const hostFn = hostExports[imp.name]; + if (typeof hostFn === "function") { + (imports.env as Record)[imp.name] = hostFn; + } else if (imp.name === "abort") { + (imports.env as Record)[imp.name] = () => { + throw new Error("[ext] abort() called from extension"); + }; + } else if (imp.name === "exit") { + (imports.env as Record)[imp.name] = (code: number) => { + throw new Error(`[ext] exit(${code}) called from extension`); + }; + } else if (extExportNames.has(imp.name)) { + // Symbol defined in the extension itself — lazy trampoline that calls + // the extension's own export once the instance exists. + const sym = imp.name; + (imports.env as Record)[sym] = (...args: unknown[]) => { + const fn = extInstance?.exports[sym]; + if (typeof fn === "function") return (fn as Function)(...args); + return 0; + }; + } else if (imp.name === "__ext_trap") { + const trapNames: Record = { 1: "abort()", 2: "__assert_fail()", 3: "abort() [stubs]" }; + (imports.env as Record)[imp.name] = (code: number) => { + const name = code >= 100 ? `exit(${code - 100})` : (trapNames[code] ?? `trap(${code})`); + throw new Error(`[ext] ${name} called from extension`); + }; + } else if (imp.name === "_Unwind_RaiseException") { + (imports.env as Record)[imp.name] = (excPtr: number) => { + if (cppExceptionTag) { + throw new WebAssembly.Exception(cppExceptionTag, [excPtr], { traceStack: true }); + } + throw new Error("_Unwind_RaiseException: no cpp exception tag"); + }; + } else if (imp.name === "_Unwind_CallPersonality") { + (imports.env as Record)[imp.name] = (excPtr: number) => { + const view = new DataView(sharedMemory!.buffer); + // Set adjustedPtr (excPtr - 8) to point to the thrown object (excPtr + 32) + view.setUint32(excPtr - 8, excPtr + 32, true); + view.setInt32(lpadContextAddr + 8, 1, true); + return 6; // _URC_HANDLER_FOUND + }; + } else if (imp.name === "_Unwind_DeleteException") { + (imports.env as Record)[imp.name] = () => {}; + } else { + // Unresolved import: stub it to return 0. Warn once per symbol so a + // genuinely missing dependency is visible without flooding the console. + const unresName = imp.name; + let warned = false; + (imports.env as Record)[imp.name] = (...args: unknown[]) => { + if (!warned) { + warned = true; + console.warn(`[ext] unresolved import '${unresName}' stubbed to return 0`); + } + void args; + return 0; + }; + } + } + + if (imp.module === "env" && (imp.kind as string) === "tag") { + const tag = new WebAssembly.Tag({ parameters: ["i32"] }); + (imports.env as Record)[imp.name] = tag; + if (imp.name === "__cpp_exception") { + cppExceptionTag = tag; + } + } + + if ((imp.module === "GOT.func" || imp.module === "GOT.mem") && imp.kind === "global") { + if (!imports[imp.module]) imports[imp.module] = {}; + const hostFn = hostExports[imp.name]; + if (typeof hostFn === "function") { + const idx = sharedTable.length; + sharedTable.grow(1); + sharedTable.set(idx, hostFn as any); + (imports[imp.module] as Record)[imp.name] = + new WebAssembly.Global({ value: "i32", mutable: true }, idx); + } else if (imp.module === "GOT.mem" && imp.name === "__wasm_lpad_context") { + (imports[imp.module] as Record)[imp.name] = + new WebAssembly.Global({ value: "i32", mutable: true }, lpadContextAddr); + } else { + (imports[imp.module] as Record)[imp.name] = + new WebAssembly.Global({ value: "i32", mutable: true }, 0); + } + } + } + + extInstance = new WebAssembly.Instance(extModule, imports); + + // PIC shared modules export __wasm_apply_data_relocs which patches data + // segment entries (vtables, function pointers) using GOT.func/GOT.mem values. + const applyRelocs = extInstance.exports.__wasm_apply_data_relocs as Function | undefined; + if (applyRelocs) { + applyRelocs(); + + // The module's start function (__wasm_apply_global_relocs) initialises + // GOT.mem entries but never GOT.func: a shared library can't assign its + // own table indices, so that's the dynamic linker's job — which, in the + // browser, is us. Resolve the GOT entries the host didn't provide and the + // module left at 0 (its own vtable/function-pointer symbols) from the + // module's exports, then re-run the data relocs so vtable slots get + // patched with the now-correct indices. + let fixedAny = false; + for (const imp of moduleImportDescs) { + if (imp.module === "GOT.func" && imp.kind === "global") { + const g = (imports["GOT.func"] as Record)?.[imp.name]; + if (g && g.value === 0) { + const fn = extInstance.exports[imp.name]; + if (typeof fn === "function") { + const idx = sharedTable.length; + sharedTable.grow(1); + sharedTable.set(idx, fn as any); + g.value = idx; + fixedAny = true; + } + } + } + if (imp.module === "GOT.mem" && imp.kind === "global") { + const g = (imports["GOT.mem"] as Record)?.[imp.name]; + if (g && g.value === 0) { + const exp = extInstance.exports[imp.name]; + if (exp && typeof exp === "object" && "value" in exp) { + g.value = (exp as WebAssembly.Global).value + memBase; + fixedAny = true; + } + } + } + } + if (fixedAny) { + applyRelocs(); + } + } + + const callCtors = extInstance.exports.__wasm_call_ctors as Function | undefined; + if (callCtors) { + callCtors(); + } + + const extExports: Record = {}; + for (const exp of moduleExportDescs) { + if (exp.kind === "function") { + const fn = extInstance.exports[exp.name]; + const idx = sharedTable.length; + sharedTable.grow(1); + sharedTable.set(idx, fn as any); + extExports[exp.name] = idx; + } + } + + registry.set(name, { + instance: extInstance, + exports: extExports, + }); +} + +function readLEB128(data: Uint8Array, pos: number): [number, number] { + let val = 0, shift = 0; + while (true) { + const b = data[pos++]; + val |= (b & 0x7f) << shift; + shift += 7; + if (!(b & 0x80)) break; + } + return [val, pos]; +} + +function countElementSegmentEntries(wasm: Uint8Array): number { + let pos = 8; + let total = 0; + while (pos < wasm.length) { + const sid = wasm[pos++]; + let [size, p] = readLEB128(wasm, pos); + pos = p; + const end = pos + size; + if (sid === 9) { + let [count, p2] = readLEB128(wasm, pos); + pos = p2; + for (let i = 0; i < count; i++) { + const flags = wasm[pos++]; + if (flags === 0) { + while (wasm[pos] !== 0x0b) pos++; + pos++; + let [numElem, p3] = readLEB128(wasm, pos); + pos = p3; + total += numElem; + for (let j = 0; j < numElem; j++) { + [, pos] = readLEB128(wasm, pos); + } + } else { + break; + } + } + break; + } + pos = end; + } + return total || 256; +} + +function dlOpen(filename: string): number { + lastError = null; + const name = filename.replace(/^.*[\\/]/, "").replace(/\.wasm$/, ""); + if (!registry.has(name)) { + lastError = `Extension '${name}' not installed. Call installExtension() first.`; + return 0; + } + const handle = nextHandle++; + handleMap.set(handle, name); + return handle; +} + +function dlSym(handle: number, symbol: string): number { + lastError = null; + const name = handleMap.get(handle); + if (!name) { + lastError = `Invalid extension handle: ${handle}`; + return 0; + } + const ext = registry.get(name); + if (!ext) { + lastError = `Extension '${name}' not found in registry`; + return 0; + } + const idx = ext.exports[symbol]; + if (idx === undefined) { + lastError = `Symbol '${symbol}' not found in extension '${name}'`; + return 0; + } + return idx; +} + +function dlClose(handle: number): void { + handleMap.delete(handle); +} + +function dlError(): string | null { + return lastError; +} diff --git a/ggsql-wasm/library/src/index.ts b/ggsql-wasm/library/src/index.ts index 3b130354f..711eab51b 100644 --- a/ggsql-wasm/library/src/index.ts +++ b/ggsql-wasm/library/src/index.ts @@ -2,6 +2,9 @@ export { convert_csv } from "./csv"; export { convert_parquet } from "./parquet"; +// Extension loading +export { initExtensionLoader, installExtension } from "./extensions"; + // Types export interface ColumnDescriptor { name: string; diff --git a/ggsql-wasm/src/lib.rs b/ggsql-wasm/src/lib.rs index 8fa1b3660..19a0c03aa 100644 --- a/ggsql-wasm/src/lib.rs +++ b/ggsql-wasm/src/lib.rs @@ -16,16 +16,47 @@ use std::sync::Arc; use wasm_bindgen::prelude::*; // ============================================================================ -// JS bridge declarations — CSV and Parquet parsing only +// JS bridge declarations // ============================================================================ #[wasm_bindgen(module = "/library/dist/lib.js")] extern "C" { - #[wasm_bindgen(catch)] - async fn convert_parquet(data: &[u8]) -> Result; + #[wasm_bindgen(catch, js_name = convert_parquet)] + async fn convert_parquet_js(data: &[u8]) -> Result; - #[wasm_bindgen(catch)] - fn convert_csv(data: &[u8]) -> Result; + #[wasm_bindgen(catch, js_name = convert_csv)] + fn convert_csv_js(data: &[u8]) -> Result; + + #[wasm_bindgen(catch, js_name = initExtensionLoader)] + fn init_extension_loader_js(exports: &JsValue) -> Result<(), JsValue>; + + #[wasm_bindgen(catch, js_name = installExtension)] + async fn install_extension_js(name: &str, source: JsValue) -> Result; +} + +// ============================================================================ +// Package exports — forward to the JS helpers above +// ============================================================================ + +#[wasm_bindgen(js_name = convert_csv)] +pub fn convert_csv_export(data: &[u8]) -> Result { + convert_csv_js(data) +} + +#[wasm_bindgen(js_name = convert_parquet)] +pub async fn convert_parquet_export(data: &[u8]) -> Result { + convert_parquet_js(data).await +} + +#[wasm_bindgen(js_name = initExtensionLoader)] +pub fn init_extension_loader(exports: JsValue) -> Result<(), JsValue> { + init_extension_loader_js(&exports) +} + +#[wasm_bindgen(js_name = installExtension)] +pub async fn install_extension(name: String, source: JsValue) -> Result<(), JsValue> { + install_extension_js(&name, source).await?; + Ok(()) } // ============================================================================ @@ -251,7 +282,7 @@ impl GgsqlContext { /// Register a CSV file as a table from raw bytes pub fn register_csv(&self, name: &str, data: &[u8]) -> Result<(), JsValue> { - let columns_js = convert_csv(data) + let columns_js = convert_csv_js(data) .map_err(|e| JsValue::from_str(&format!("CSV parse error: {:?}", e)))?; let df = columns_js_to_dataframe(columns_js)?; let reader = self.reader.borrow(); @@ -262,7 +293,7 @@ impl GgsqlContext { /// Register a Parquet file as a table from raw bytes pub async fn register_parquet(&self, name: &str, data: &[u8]) -> Result<(), JsValue> { - let columns_js = convert_parquet(data) + let columns_js = convert_parquet_js(data) .await .map_err(|e| JsValue::from_str(&format!("Parquet parse error: {:?}", e)))?; let df = columns_js_to_dataframe(columns_js)?; @@ -277,7 +308,7 @@ impl GgsqlContext { for &name in ggsql::reader::data::KNOWN_DATASETS { if let Some(bytes) = ggsql::reader::data::builtin_parquet_bytes(name) { let table_name = ggsql::naming::builtin_data_table(name); - let columns_js = convert_parquet(bytes).await.map_err(|e| { + let columns_js = convert_parquet_js(bytes).await.map_err(|e| { JsValue::from_str(&format!("Parquet error for '{}': {:?}", name, e)) })?; let df = columns_js_to_dataframe(columns_js)?; @@ -290,6 +321,23 @@ impl GgsqlContext { Ok(()) } + /// Load a previously installed SQLite extension. + /// + /// `entry_point` is the C init function name. If omitted, SQLite + /// derives it from the extension name. + pub fn load_extension(&self, name: &str, entry_point: Option) -> Result<(), JsValue> { + let reader = self.reader.borrow(); + let conn = reader.connection(); + unsafe { + conn.load_extension_enable() + .map_err(|e| JsValue::from_str(&format!("Enable load_extension error: {:?}", e)))?; + let result = conn.load_extension(name, entry_point.as_deref()); + let _ = conn.load_extension_disable(); + result.map_err(|e| JsValue::from_str(&format!("Load extension error: {:?}", e)))?; + } + Ok(()) + } + /// Unregister a table pub fn unregister(&self, name: &str) -> Result<(), JsValue> { let reader = self.reader.borrow(); diff --git a/src/reader/sqlite.rs b/src/reader/sqlite.rs index 301948dce..048e8fa22 100644 --- a/src/reader/sqlite.rs +++ b/src/reader/sqlite.rs @@ -213,26 +213,6 @@ impl Default for SqliteReader { } } -/// Validate a table name -fn validate_table_name(name: &str) -> Result<()> { - if name.is_empty() { - return Err(GgsqlError::ReaderError("Table name cannot be empty".into())); - } - - let forbidden = ['\0', '\n', '\r']; - for ch in forbidden { - if name.contains(ch) { - return Err(GgsqlError::ReaderError(format!( - "Table name '{}' contains invalid character '{}'", - name, - ch.escape_default() - ))); - } - } - - Ok(()) -} - /// Map an Arrow DataType to a SQLite column type string fn arrow_type_to_sqlite(dtype: &DataType) -> &'static str { match dtype { @@ -445,7 +425,7 @@ impl Reader for SqliteReader { } fn register(&self, name: &str, df: DataFrame, replace: bool) -> Result<()> { - validate_table_name(name)?; + super::validate_table_name(name)?; if self.table_exists(name) { if replace { diff --git a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdio.c b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdio.c index e9031a0b8..57758debe 100644 --- a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdio.c +++ b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdio.c @@ -106,6 +106,7 @@ static int ptr_to_str(void *ptr, char *buffer) { return 2 + len; } +__attribute__((weak)) char *strncpy(char *dest, const char *src, size_t n) { char *d = dest; const char *s = src; @@ -259,6 +260,7 @@ static int vsnprintf_impl(char *buffer, size_t buffsz, const char *format, va_li return total_chars; } +__attribute__((weak)) int snprintf(char *restrict buffer, size_t buffsz, const char *restrict format, ...) { if (!buffer || buffsz == 0 || !format) return -1; @@ -270,38 +272,45 @@ int snprintf(char *restrict buffer, size_t buffsz, const char *restrict format, return result; } +__attribute__((weak)) int vsnprintf(char *restrict buffer, size_t buffsz, const char *restrict format, va_list vlist) { return vsnprintf_impl(buffer, buffsz, format, vlist); } +__attribute__((weak)) int fclose(FILE *stream) { (void)stream; return 0; } +__attribute__((weak)) FILE* fdopen(int fd, const char *mode) { (void)fd; (void)mode; return 0; } +__attribute__((weak)) int fputc(int c, FILE *stream) { (void)stream; return c; } +__attribute__((weak)) int fputs(const char *restrict str, FILE *restrict stream) { (void)str; (void)stream; return 0; } +__attribute__((weak)) size_t fwrite(const void *restrict buffer, size_t size, size_t nmemb, FILE *restrict stream) { (void)buffer; (void)stream; return size * nmemb; } +__attribute__((weak)) int fprintf(FILE *restrict stream, const char *restrict format, ...) { (void)stream; (void)format; diff --git a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c index 0a4510735..149ba8ac1 100644 --- a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c +++ b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c @@ -69,6 +69,7 @@ void reset_heap(void *new_heap_start) { free_list = NULL; } +__attribute__((weak)) void *malloc(size_t size) { if (size == 0) return NULL; @@ -98,6 +99,7 @@ void *malloc(size_t size) { return result; } +__attribute__((weak)) void free(void *ptr) { if (ptr == NULL) return; @@ -114,6 +116,7 @@ void free(void *ptr) { } } +__attribute__((weak)) void *calloc(size_t count, size_t size) { void *result = malloc(count * size); if (!result) return NULL; @@ -121,6 +124,7 @@ void *calloc(size_t count, size_t size) { return result; } +__attribute__((weak)) void *realloc(void *ptr, size_t new_size) { if (ptr == NULL) { return malloc(new_size); diff --git a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/string.c b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/string.c index 3f1b9a0fa..1a79a39b7 100644 --- a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/string.c +++ b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/string.c @@ -1,6 +1,7 @@ #include // Derived from musl (MIT): https://git.musl-libc.org/cgit/musl/tree/src/string/memchr.c +__attribute__((weak)) void *memchr(const void *src, int c, size_t n) { const unsigned char *s = src; c = (unsigned char)c; @@ -8,6 +9,7 @@ void *memchr(const void *src, int c, size_t n) { return n ? (void *)s : 0; } +__attribute__((weak)) int memcmp(const void *lhs, const void *rhs, size_t count) { const unsigned char *l = lhs; const unsigned char *r = rhs; @@ -21,6 +23,7 @@ int memcmp(const void *lhs, const void *rhs, size_t count) { return 0; } +__attribute__((weak)) void *memcpy(void *restrict dst, const void *restrict src, size_t size) { unsigned char *d = dst; const unsigned char *s = src; @@ -30,6 +33,7 @@ void *memcpy(void *restrict dst, const void *restrict src, size_t size) { return dst; } +__attribute__((weak)) void *memmove(void *dst, const void *src, size_t count) { unsigned char *d = dst; const unsigned char *s = src; @@ -47,6 +51,7 @@ void *memmove(void *dst, const void *src, size_t count) { return dst; } +__attribute__((weak)) void *memset(void *dst, int value, size_t count) { unsigned char *p = dst; while (count--) { @@ -55,6 +60,7 @@ void *memset(void *dst, int value, size_t count) { return dst; } +__attribute__((weak)) char *strchr(const char *str, int c) { while (*str != (char)c) { if (*str == '\0') { @@ -65,12 +71,14 @@ char *strchr(const char *str, int c) { return (char *)str; } +__attribute__((weak)) size_t strlen(const char *str) { const char *s = str; while (*s) s++; return s - str; } +__attribute__((weak)) int strncmp(const char *left, const char *right, size_t n) { while (n-- > 0) { if (*left != *right) { diff --git a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/wctype.c b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/wctype.c index 4bcc276f7..6959ac98b 100644 --- a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/wctype.c +++ b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/wctype.c @@ -1,13 +1,16 @@ #include +__attribute__((weak)) int iswlower(wint_t wch) { return (unsigned)wch - L'a' < 26; } +__attribute__((weak)) int iswupper(wint_t wch) { return (unsigned)wch - L'A' < 26; } +__attribute__((weak)) int iswpunct(wint_t wch) { return (wch >= 33 && wch <= 47) || (wch >= 58 && wch <= 64) || From 7e7fb31cbae089ca5c0f65b0336c5284a0ddfce3 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 4 Jun 2026 14:57:50 +0100 Subject: [PATCH 02/11] Disable geoparquet in hyparquet for wasm --- ggsql-wasm/build-wasm.sh | 9 ++-- ggsql-wasm/demo/src/examples.ts | 71 +++++++++++++++++++++++++++---- ggsql-wasm/library/src/index.ts | 7 ++- ggsql-wasm/library/src/parquet.ts | 23 +++++++++- ggsql-wasm/src/lib.rs | 17 +++++++- src/reader/sqlite.rs | 35 +++++++++++++++ 6 files changed, 146 insertions(+), 16 deletions(-) diff --git a/ggsql-wasm/build-wasm.sh b/ggsql-wasm/build-wasm.sh index fc480ae89..eb4fe3840 100755 --- a/ggsql-wasm/build-wasm.sh +++ b/ggsql-wasm/build-wasm.sh @@ -32,15 +32,16 @@ echo "Building WASM library..." (cd "$SCRIPT_DIR/library" && npm install && npm run build) SQLITE_WASM_RS="${SQLITE_WASM_RS:-$REPO_ROOT/../sqlite-wasm-rs}" -if [ -d "$SQLITE_WASM_RS/loadable_extensions" ]; then - echo "Building loadable extensions..." - make -C "$SQLITE_WASM_RS/loadable_extensions" -fi if [ "$SKIP_BINARY" = false ]; then echo "Checking wasm build prerequisites..." check_wasm32_support + if [ -d "$SQLITE_WASM_RS/loadable_extensions" ]; then + echo "Building loadable extensions..." + make -C "$SQLITE_WASM_RS/loadable_extensions" + fi + echo "Building WASM binary..." rm -rf "$SCRIPT_DIR/pkg" # start clean so stale wasm-bindgen snippets don't accumulate (cd "$SCRIPT_DIR" && wasm-pack build --target web --profile wasm --no-opt) diff --git a/ggsql-wasm/demo/src/examples.ts b/ggsql-wasm/demo/src/examples.ts index 8e7dc5b2c..4157348ae 100644 --- a/ggsql-wasm/demo/src/examples.ts +++ b/ggsql-wasm/demo/src/examples.ts @@ -224,14 +224,69 @@ SELECT test_ext_hello() AS greeting`, { section: "Extensions", name: "SpatiaLite", - query: `SELECT - spatialite_version() AS spatialite, - geos_version() AS geos, - proj_version() AS proj, - -- PROJ: reproject London (WGS84) to Web Mercator (metres) - ST_AsText(ST_Transform(MakePoint(-0.1276, 51.5074, 4326), 3857)) AS london_web_mercator, - -- GEOS: area of a 1 km buffer around the projected point - Round(ST_Area(ST_Buffer(ST_Transform(MakePoint(-0.1276, 51.5074, 4326), 3857), 1000)), 1) AS buffer_area_m2`, + query: `-- SpatiaLite reprojects world cities from WGS84 lon/lat (EPSG:4326) +-- to Web Mercator metres (EPSG:3857) via PROJ, then plots them as a map. +WITH cities(name, lon, lat) AS ( + VALUES + ('London', -0.1276, 51.5074), + ('New York', -74.0060, 40.7128), + ('Tokyo', 139.6917, 35.6895), + ('Sydney', 151.2093, -33.8688), + ('Cape Town', 18.4241, -33.9249), + ('Rio de Janeiro', -43.1729, -22.9068), + ('Moscow', 37.6173, 55.7558) +) +SELECT + name, + ST_X(ST_Transform(MakePoint(lon, lat, 4326), 3857)) AS x, + ST_Y(ST_Transform(MakePoint(lon, lat, 4326), 3857)) AS y +FROM cities +VISUALISE x AS x, y AS y +DRAW point SETTING size => 6 +DRAW text MAPPING name AS label SETTING vjust => 'bottom', offset => [0, -8] +LABEL + title => 'World cities in Web Mercator (EPSG:3857)', + subtitle => 'Reprojected from WGS84 with SpatiaLite ST_Transform (PROJ)', + x => 'Easting (m)', + y => 'Northing (m)'`, + loadExtension: "mod_spatialite", + }, + { + section: "Extensions", + name: "World map", + query: `-- Country outlines from the built-in ggsql:world dataset. SpatiaLite +-- reprojects each country (PROJ, to equal-area EPSG:6933), simplifies it +-- (GEOS, 25 km) to thin the geometry, then a numbers table explodes every +-- polygon ring into ordered vertices for the path layer (ggsql can't pass +-- recursive CTEs to SQLite, so the vertex indices come from a join). +WITH +nums(i) AS ( + SELECT 1 + d0.d + 10 * d1.d + 100 * d2.d AS i + FROM (SELECT 0 AS d UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d0, + (SELECT 0 AS d UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d1, + (SELECT 0 AS d UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d2 +), +geo AS ( + SELECT name, continent, + ST_Simplify(ST_Transform(CastToMulti(GeomFromWKB(geom, 4326)), 6933), 25000) AS g + FROM ggsql:world +), +ring AS ( + SELECT name, continent, ST_ExteriorRing(ST_GeometryN(g, nums.i)) AS r, nums.i AS pidx + FROM geo JOIN nums ON nums.i <= ST_NumGeometries(g) +) +SELECT + ring.name || '-' || ring.pidx AS ring_id, + ring.continent, + nums.i AS vidx, + ST_X(ST_PointN(ring.r, nums.i)) AS x, + ST_Y(ST_PointN(ring.r, nums.i)) AS y +FROM ring JOIN nums ON nums.i <= ST_NumPoints(ring.r) +VISUALISE x AS x, y AS y +DRAW path MAPPING continent AS color SETTING linewidth => 0.5 PARTITION BY ring_id ORDER BY vidx +LABEL + title => 'World country outlines (EPSG:6933 equal-area)', + color => 'Continent'`, loadExtension: "mod_spatialite", }, ]; diff --git a/ggsql-wasm/library/src/index.ts b/ggsql-wasm/library/src/index.ts index 711eab51b..a6de6a9fa 100644 --- a/ggsql-wasm/library/src/index.ts +++ b/ggsql-wasm/library/src/index.ts @@ -9,7 +9,9 @@ export { initExtensionLoader, installExtension } from "./extensions"; export interface ColumnDescriptor { name: string; type: ColumnType; - values: Float64Array | Uint8Array | string[]; + // "binary" columns carry one Uint8Array per row; all others use the typed + // forms below. + values: Float64Array | Uint8Array | string[] | Uint8Array[]; nulls: Uint8Array; } @@ -19,7 +21,8 @@ export type ColumnType = | "bool" | "date" | "datetime" - | "string"; + | "string" + | "binary"; export const EPOCH = Date.UTC(1970, 0, 1); export const MS_PER_DAY = 86400000; diff --git a/ggsql-wasm/library/src/parquet.ts b/ggsql-wasm/library/src/parquet.ts index 7789161b3..f09763961 100644 --- a/ggsql-wasm/library/src/parquet.ts +++ b/ggsql-wasm/library/src/parquet.ts @@ -22,6 +22,8 @@ export async function convert_parquet( const rows: Record[] = await parquetReadObjects({ file: asyncBuffer, + geoparquet: false, + utf8: false, }); if (rows.length === 0) return []; @@ -41,6 +43,7 @@ function inferColumnType(values: unknown[]): ColumnType { let hasNumber = false; let hasBool = false; let hasDate = false; + let hasBinary = false; let allSafeInt = true; let allMidnight = true; @@ -48,7 +51,9 @@ function inferColumnType(values: unknown[]): ColumnType { const v = values[i]; if (v === null || v === undefined) continue; - if (v instanceof Date) { + if (v instanceof Uint8Array) { + hasBinary = true; + } else if (v instanceof Date) { hasDate = true; if ( v.getUTCHours() !== 0 || @@ -71,6 +76,7 @@ function inferColumnType(values: unknown[]): ColumnType { } } + if (hasBinary) return "binary"; if (hasDate) return allMidnight ? "date" : "datetime"; if (hasBool && !hasNumber) return "bool"; if (hasNumber) return allSafeInt ? "i64" : "f64"; @@ -142,6 +148,21 @@ function buildColumn(name: string, rawValues: unknown[]): ColumnDescriptor { return { name, type, values, nulls }; } + if (type === "binary") { + const values: Uint8Array[] = []; + for (let i = 0; i < len; i++) { + const v = rawValues[i]; + if (v === null || v === undefined) { + values.push(new Uint8Array(0)); + nulls[i] = 0; + } else { + values.push(v as Uint8Array); + nulls[i] = 1; + } + } + return { name, type, values, nulls }; + } + // string const values: string[] = []; for (let i = 0; i < len; i++) { diff --git a/ggsql-wasm/src/lib.rs b/ggsql-wasm/src/lib.rs index 19a0c03aa..b19159f9c 100644 --- a/ggsql-wasm/src/lib.rs +++ b/ggsql-wasm/src/lib.rs @@ -1,5 +1,5 @@ use arrow::array::{ - ArrayRef, BooleanArray, Date32Array, Float64Array, Int64Array, StringArray, + ArrayRef, BinaryArray, BooleanArray, Date32Array, Float64Array, Int64Array, StringArray, TimestampMillisecondArray, }; use ggsql::array_util::value_to_string; @@ -148,6 +148,21 @@ fn columns_js_to_dataframe(columns_js: JsValue) -> Result { .collect(); Arc::new(StringArray::from(values)) } + "binary" => { + // One Uint8Array per row (e.g. WKB geometry from GeoParquet). + let arr = js_sys::Array::from(&values_js); + let values: Vec>> = (0..arr.length()) + .zip(nulls.iter()) + .map(|(j, &n)| { + if n != 0 { + Some(js_sys::Uint8Array::new(&arr.get(j)).to_vec()) + } else { + None + } + }) + .collect(); + Arc::new(BinaryArray::from_iter(values.iter().map(|o| o.as_deref()))) + } "date" => { // Date32: days since Unix epoch let raw = js_sys::Float64Array::new(&values_js).to_vec(); diff --git a/src/reader/sqlite.rs b/src/reader/sqlite.rs index 048e8fa22..9375b951e 100644 --- a/src/reader/sqlite.rs +++ b/src/reader/sqlite.rs @@ -229,6 +229,7 @@ fn arrow_type_to_sqlite(dtype: &DataType) -> &'static str { DataType::Date32 => "TEXT", DataType::Timestamp(_, _) => "TEXT", DataType::Time64(_) => "TEXT", + DataType::Binary | DataType::LargeBinary => "BLOB", _ => "TEXT", } } @@ -332,6 +333,14 @@ fn array_value_to_sqlite(array: &ArrayRef, row_idx: usize) -> rusqlite::types::V .and_then(|t| to_sql_value(&t)) .unwrap_or(Value::Null) } + DataType::Binary => { + let arr = array.as_any().downcast_ref::().unwrap(); + Value::Blob(arr.value(row_idx).to_vec()) + } + DataType::LargeBinary => { + let arr = array.as_any().downcast_ref::().unwrap(); + Value::Blob(arr.value(row_idx).to_vec()) + } _ => { // Fallback: use array_util::value_to_string Value::Text(crate::array_util::value_to_string(array, row_idx)) @@ -1072,6 +1081,32 @@ mod tests { // Should fall back to String since we have mixed types } + #[test] + fn test_binary_column_stored_as_blob() { + let reader = SqliteReader::new().unwrap(); + + // Arrow Binary must reach SQLite as a BLOB (not stringified), so spatial + // functions like GeomFromWKB receive raw bytes. + let blobs: ArrayRef = Arc::new(BinaryArray::from(vec![ + Some([0x01u8, 0x02, 0x03].as_slice()), + Some([0xDE, 0xAD, 0xBE, 0xEF].as_slice()), + None, + ])); + let df = DataFrame::new(vec![("b", blobs)]).unwrap(); + reader.register("blob_data", df, false).unwrap(); + + let result = reader + .execute_sql("SELECT typeof(b) AS t, hex(b) AS h FROM blob_data ORDER BY rowid") + .unwrap(); + assert_eq!(result.height(), 3); + let t = result.column("t").unwrap(); + let h = result.column("h").unwrap(); + assert_eq!(crate::array_util::value_to_string(t, 0), "blob"); + assert_eq!(crate::array_util::value_to_string(h, 0), "010203"); + assert_eq!(crate::array_util::value_to_string(h, 1), "DEADBEEF"); + assert_eq!(crate::array_util::value_to_string(t, 2), "null"); + } + #[test] fn test_date_column_roundtrip() { let reader = SqliteReader::new().unwrap(); From 73988b331b6bd10eb309f96c113413ffcdf842c4 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 10 Jun 2026 10:57:04 +0100 Subject: [PATCH 03/11] Define wasm32-unknown-unknown malloc and friends on the Rust heap --- tree-sitter-ggsql/bindings/rust/build.rs | 1 - tree-sitter-ggsql/bindings/rust/lib.rs | 72 ++++++++ .../bindings/rust/wasm-sysroot/src/stdlib.c | 167 ------------------ 3 files changed, 72 insertions(+), 168 deletions(-) delete mode 100644 tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c diff --git a/tree-sitter-ggsql/bindings/rust/build.rs b/tree-sitter-ggsql/bindings/rust/build.rs index 3b755f68c..d9e1d491d 100644 --- a/tree-sitter-ggsql/bindings/rust/build.rs +++ b/tree-sitter-ggsql/bindings/rust/build.rs @@ -137,7 +137,6 @@ fn main() { .include(&src_dir) .opt_level_str(opt_level) .file(sysroot_dir.join("src").join("stdio.c")) - .file(sysroot_dir.join("src").join("stdlib.c")) .file(sysroot_dir.join("src").join("string.c")) .file(sysroot_dir.join("src").join("wctype.c")) .compile("stdlib"); diff --git a/tree-sitter-ggsql/bindings/rust/lib.rs b/tree-sitter-ggsql/bindings/rust/lib.rs index 41cd36f96..6cf5b96d3 100644 --- a/tree-sitter-ggsql/bindings/rust/lib.rs +++ b/tree-sitter-ggsql/bindings/rust/lib.rs @@ -19,6 +19,78 @@ pub fn language() -> Language { /// The node types and field names used by the ggsql grammar pub const NODE_TYPES: &str = include_str!("../../src/node-types.json"); +/// The C libc allocator for wasm32-unknown-unknown builds. +/// +/// The C code linked into the module (the generated parser and the +/// tree-sitter runtime) has no libc, so `malloc` and friends are defined +/// here on the Rust global allocator and the whole module shares one heap. +/// Each allocation carries a header recording its size, so the `Layout` +/// can be reconstructed on free. +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +mod wasm_alloc { + use core::ptr::{null_mut, write_bytes}; + use std::alloc::{alloc, dealloc, realloc as rust_realloc, Layout}; + + const HEADER: usize = core::mem::size_of::() * 2; + + #[no_mangle] + unsafe extern "C" fn malloc(size: usize) -> *mut u8 { + let layout = Layout::from_size_align_unchecked(size + HEADER, HEADER); + let ptr = alloc(layout); + if ptr.is_null() { + return null_mut(); + } + *ptr.cast::() = size; + ptr.add(HEADER) + } + + #[no_mangle] + unsafe extern "C" fn free(ptr: *mut u8) { + if ptr.is_null() { + return; + } + let base = ptr.sub(HEADER); + let size = *base.cast::(); + dealloc( + base, + Layout::from_size_align_unchecked(size + HEADER, HEADER), + ); + } + + #[no_mangle] + unsafe extern "C" fn realloc(ptr: *mut u8, new_size: usize) -> *mut u8 { + if ptr.is_null() { + return malloc(new_size); + } + let base = ptr.sub(HEADER); + let size = *base.cast::(); + let layout = Layout::from_size_align_unchecked(size + HEADER, HEADER); + let new = rust_realloc(base, layout, new_size + HEADER); + if new.is_null() { + return null_mut(); + } + *new.cast::() = new_size; + new.add(HEADER) + } + + #[no_mangle] + unsafe extern "C" fn calloc(count: usize, size: usize) -> *mut u8 { + let Some(total) = count.checked_mul(size) else { + return null_mut(); + }; + let ptr = malloc(total); + if !ptr.is_null() { + write_bytes(ptr, 0, total); + } + ptr + } + + #[no_mangle] + unsafe extern "C" fn abort() -> ! { + std::process::abort() + } +} + /// The highlighting queries for ggsql syntax pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm"); diff --git a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c deleted file mode 100644 index 149ba8ac1..000000000 --- a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c +++ /dev/null @@ -1,167 +0,0 @@ -// This file implements a very simple allocator for external scanners running -// in Wasm. Allocation is just bumping a static pointer and growing the heap -// as needed, and freeing is just adding the freed region to a free list. -// When additional memory is allocated, the free list is searched first. -// If there is not a suitable region in the free list, the heap is -// grown as necessary, and the allocation is made at the end of the heap. -// When the heap is reset, all allocated memory is considered freed. - -#include -#include -#include - -extern void tree_sitter_debug_message(const char *, size_t); - -#define PAGESIZE 0x10000 -#define MAX_HEAP_SIZE (1024 * 1024 * 1024) - -typedef struct Region { - size_t size; - struct Region *next; - char data[0]; -} Region; - -static Region *heap_end = NULL; -static Region *heap_start = NULL; -static Region *next = NULL; -static Region *free_list = NULL; - -// Get the region metadata for the given heap pointer. -static inline Region *region_for_ptr(void *ptr) { - return ((Region *)ptr) - 1; -} - -// Get the location of the next region after the given region, -// if the given region had the given size. -static inline Region *region_after(Region *self, size_t len) { - char *address = self->data + len; - char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3); - return (Region *)aligned; -} - -static void *get_heap_end() { - return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE); -} - -static int grow_heap(size_t size) { - size_t new_page_count = ((size - 1) / PAGESIZE) + 1; - return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX; -} - -// Grows the heap if necessary to fit a region at the _end_ of the heap -// ending at `region_end` by `size` bytes. -// -// Returns 0 if the heap could not be grown, 1 otherwise. -static inline int grow_heap_for_region(Region *region_end, size_t size) { - if (region_end > heap_end) { - if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) return 0; - if (!grow_heap(size)) return 0; - heap_end = get_heap_end(); - } - return 1; -} - -// Clear out the heap, and move it to the given address. -void reset_heap(void *new_heap_start) { - heap_start = new_heap_start; - next = new_heap_start; - heap_end = get_heap_end(); - free_list = NULL; -} - -__attribute__((weak)) -void *malloc(size_t size) { - if (size == 0) return NULL; - - Region *prev = NULL; - Region *curr = free_list; - while (curr != NULL) { - if (curr->size >= size) { - if (prev == NULL) { - free_list = curr->next; - } else { - prev->next = curr->next; - } - return &curr->data; - } - prev = curr; - curr = curr->next; - } - - Region *region_end = region_after(next, size); - - if (!grow_heap_for_region(region_end, size)) return NULL; - - void *result = &next->data; - next->size = size; - next = region_end; - - return result; -} - -__attribute__((weak)) -void free(void *ptr) { - if (ptr == NULL) return; - - Region *region = region_for_ptr(ptr); - Region *region_end = region_after(region, region->size); - - // When freeing the last allocated pointer, re-use that - // pointer for the next allocation. - if (region_end == next) { - next = region; - } else { - region->next = free_list; - free_list = region; - } -} - -__attribute__((weak)) -void *calloc(size_t count, size_t size) { - void *result = malloc(count * size); - if (!result) return NULL; - memset(result, 0, count * size); - return result; -} - -__attribute__((weak)) -void *realloc(void *ptr, size_t new_size) { - if (ptr == NULL) { - return malloc(new_size); - } - if (new_size == 0) { - free(ptr); - return NULL; - } - - - Region *region = region_for_ptr(ptr); - Region *region_end = region_after(region, region->size); - - // When reallocating the last allocated region, resize - // in place if possible, return the same pointer, and - // skip copying the data. - if (region_end == next) { - Region *new_region_end = region_after(region, new_size); - - size_t additional_size = (char *)new_region_end - (char *)heap_end; - if (!grow_heap_for_region(new_region_end, additional_size)) return NULL; - - region->size = new_size; - next = new_region_end; - return ®ion->data; - } - - void *result = malloc(new_size); - if (!result) return NULL; - - size_t copy_size = region->size < new_size ? region->size : new_size; - memcpy(result, ®ion->data, copy_size); - - free(ptr); - return result; -} - -__attribute__((noreturn)) void abort(void) { - __builtin_trap(); -} From 2f239397fa514dd692e5991706b633d7960c3226 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 10 Jun 2026 11:52:19 +0100 Subject: [PATCH 04/11] Extension loading refinements --- ggsql-wasm/library/build.mjs | 2 +- ggsql-wasm/library/src/extensions.ts | 224 +++++++++++++++++++++------ ggsql-wasm/library/tsconfig.json | 4 +- ggsql-wasm/src/lib.rs | 5 +- 4 files changed, 184 insertions(+), 51 deletions(-) diff --git a/ggsql-wasm/library/build.mjs b/ggsql-wasm/library/build.mjs index 4a51888cd..77df5bcd6 100644 --- a/ggsql-wasm/library/build.mjs +++ b/ggsql-wasm/library/build.mjs @@ -11,7 +11,7 @@ const buildOptions = { outfile: join(__dirname, "dist/lib.js"), format: "esm", platform: "browser", - target: "es2020", + target: "es2022", sourcemap: true, }; diff --git a/ggsql-wasm/library/src/extensions.ts b/ggsql-wasm/library/src/extensions.ts index 9c5ee6e24..a2ae399d8 100644 --- a/ggsql-wasm/library/src/extensions.ts +++ b/ggsql-wasm/library/src/extensions.ts @@ -15,6 +15,12 @@ interface LoadedExtension { exports: Record; } +const PAGE = 65536; + +// Dedicated shadow-stack size for each extension. The stack sits above the +// extension's data segment and grows downward. +const EXT_STACK_SIZE = 16 * 1024 * 1024; + const registry = new Map(); let lastError: string | null = null; let nextHandle = 1; @@ -24,6 +30,28 @@ let sharedMemory: WebAssembly.Memory | null = null; let sharedTable: WebAssembly.Table | null = null; let hostExports: WebAssembly.Exports | null = null; +// Canonical table index per function, so the same function always has the +// same "address". +const tableIndexCache = new Map(); + +function canonicalTableIndex(fn: Function): number { + const cached = tableIndexCache.get(fn); + if (cached !== undefined) return cached; + const idx = sharedTable!.grow(1); + sharedTable!.set(idx, fn as any); + tableIndexCache.set(fn, idx); + return idx; +} + +function cacheTableRange(start: number, end: number): void { + for (let i = start; i < end; i++) { + const fn = sharedTable!.get(i); + if (typeof fn === "function" && !tableIndexCache.has(fn)) { + tableIndexCache.set(fn, i); + } + } +} + export function initExtensionLoader(wasmExports: WebAssembly.Exports): void { hostExports = wasmExports; sharedMemory = wasmExports.memory as WebAssembly.Memory; @@ -32,6 +60,8 @@ export function initExtensionLoader(wasmExports: WebAssembly.Exports): void { if (!sharedMemory) throw new Error("Main module does not export 'memory'"); if (!sharedTable) throw new Error("Main module does not export '__indirect_function_table'"); + cacheTableRange(0, sharedTable.length); + (globalThis as any).__sqlite_ext = { dlOpen, dlSym, @@ -48,6 +78,11 @@ export async function installExtension( throw new Error("Call initExtensionLoader() before installExtension()"); } + if (registry.has(name)) { + console.warn(`[ext] extension '${name}' is already installed; skipping`); + return; + } + let bytes: ArrayBuffer; if (typeof wasmSource === "string") { const response = await fetch(wasmSource); @@ -55,30 +90,54 @@ export async function installExtension( bytes = await response.arrayBuffer(); } else if (wasmSource instanceof Response) { bytes = await wasmSource.arrayBuffer(); + } else if (ArrayBuffer.isView(wasmSource)) { + bytes = + wasmSource.byteOffset === 0 && wasmSource.byteLength === wasmSource.buffer.byteLength + ? (wasmSource.buffer as ArrayBuffer) + : (wasmSource.buffer.slice( + wasmSource.byteOffset, + wasmSource.byteOffset + wasmSource.byteLength, + ) as ArrayBuffer); } else { - bytes = wasmSource instanceof ArrayBuffer ? wasmSource : (wasmSource as Uint8Array).buffer; + bytes = wasmSource as ArrayBuffer; } + const wasmBytes = new Uint8Array(bytes); const extModule = await WebAssembly.compile(bytes); + // Memory layout: [data segment (dylink.0 memory_size)][stack][lpad page]. + // The dylink.0 section declares the module's data+bss size; the file size + // is only a (typically over-, possibly under-) estimate kept as a fallback. + const dylink = parseDylinkMemInfo(wasmBytes); + let dataSize: number; + if (dylink) { + dataSize = dylink.memorySize; + if (1 << dylink.memoryAlign > PAGE) { + console.warn( + `[ext] '${name}' requests 2^${dylink.memoryAlign} memory alignment; only page alignment is provided`, + ); + } + } else { + console.warn(`[ext] '${name}' has no dylink.0 section; sizing data segment from file size`); + dataSize = bytes.byteLength; + } + + const dataBytes = alignUp(dataSize, PAGE); const currentBytes = sharedMemory.buffer.byteLength; - const extraPages = Math.ceil(bytes.byteLength / 65536) + 1; - sharedMemory.grow(extraPages); + sharedMemory.grow((dataBytes + EXT_STACK_SIZE + PAGE) / PAGE); const memBase = currentBytes; - - const stackTop = memBase + extraPages * 65536; - sharedMemory.grow(1); + const stackTop = memBase + dataBytes + EXT_STACK_SIZE; // Small scratch area for __wasm_lpad_context (3 x i32 = 12 bytes) - const lpadContextAddr = stackTop + 65536 - 64; + const lpadContextAddr = stackTop + PAGE - 64; const moduleExportDescs = WebAssembly.Module.exports(extModule); - // Determine how many table slots the extension needs by inspecting its - // element segments. The wasm binary encodes table entries for all functions - // that may be called indirectly — far more than just the exported ones. - const tableSlots = countElementSegmentEntries(new Uint8Array(bytes)); + // Table slots for the module's element segments. dylink.0 states the count; + // fall back to parsing the element section. Later needs (GOT entries, + // dlSym exports) grow the table on demand via canonicalTableIndex. + const tableSlots = dylink?.tableSize ?? countElementSegmentEntries(wasmBytes); const tableBase = sharedTable.length; - sharedTable.grow(tableSlots + 64); + sharedTable.grow(tableSlots); const imports: WebAssembly.Imports = { env: { @@ -102,7 +161,7 @@ export async function installExtension( const moduleImportDescs = WebAssembly.Module.imports(extModule); for (const imp of moduleImportDescs) { - if (imp.module === "env" && imp.name in (imports.env as Record)) { + if (imp.module === "env" && Object.hasOwn(imports.env as object, imp.name)) { continue; } @@ -124,8 +183,10 @@ export async function installExtension( const sym = imp.name; (imports.env as Record)[sym] = (...args: unknown[]) => { const fn = extInstance?.exports[sym]; - if (typeof fn === "function") return (fn as Function)(...args); - return 0; + if (typeof fn !== "function") { + throw new Error(`[ext] self-import '${sym}' called before instantiation completed`); + } + return (fn as Function)(...args); }; } else if (imp.name === "__ext_trap") { const trapNames: Record = { 1: "abort()", 2: "__assert_fail()", 3: "abort() [stubs]" }; @@ -141,15 +202,30 @@ export async function installExtension( throw new Error("_Unwind_RaiseException: no cpp exception tag"); }; } else if (imp.name === "_Unwind_CallPersonality") { + // Minimal personality: The enclosing landing pad handles the exception + // Offsets are the libc++abi wasm32 __cxa_exception layout. + const ADJUSTED_PTR_OFFSET = -8; + const THROWN_OBJECT_OFFSET = 32; + const LPAD_SELECTOR_OFFSET = 8; + const URC_HANDLER_FOUND = 6; (imports.env as Record)[imp.name] = (excPtr: number) => { const view = new DataView(sharedMemory!.buffer); - // Set adjustedPtr (excPtr - 8) to point to the thrown object (excPtr + 32) - view.setUint32(excPtr - 8, excPtr + 32, true); - view.setInt32(lpadContextAddr + 8, 1, true); - return 6; // _URC_HANDLER_FOUND + view.setUint32(excPtr + ADJUSTED_PTR_OFFSET, excPtr + THROWN_OBJECT_OFFSET, true); + view.setInt32(lpadContextAddr + LPAD_SELECTOR_OFFSET, 1, true); + return URC_HANDLER_FOUND; }; } else if (imp.name === "_Unwind_DeleteException") { - (imports.env as Record)[imp.name] = () => {}; + (imports.env as Record)[imp.name] = (excPtr: number) => { + // _Unwind_Exception holds a cleanup function pointer at offset 8 + // libc++abi points it at the routine that destroys and frees the + // exception object. + const URC_FOREIGN_EXCEPTION_CAUGHT = 1; + const cleanupIdx = new DataView(sharedMemory!.buffer).getUint32(excPtr + 8, true); + if (cleanupIdx) { + const fn = sharedTable!.get(cleanupIdx); + if (typeof fn === "function") fn(URC_FOREIGN_EXCEPTION_CAUGHT, excPtr); + } + }; } else { // Unresolved import: stub it to return 0. Warn once per symbol so a // genuinely missing dependency is visible without flooding the console. @@ -167,7 +243,9 @@ export async function installExtension( } if (imp.module === "env" && (imp.kind as string) === "tag") { - const tag = new WebAssembly.Tag({ parameters: ["i32"] }); + const params = (imp as { type?: { parameters?: WebAssembly.ValueType[] } }).type + ?.parameters ?? ["i32"]; + const tag = new WebAssembly.Tag({ parameters: params as WebAssembly.ValueType[] }); (imports.env as Record)[imp.name] = tag; if (imp.name === "__cpp_exception") { cppExceptionTag = tag; @@ -178,22 +256,31 @@ export async function installExtension( if (!imports[imp.module]) imports[imp.module] = {}; const hostFn = hostExports[imp.name]; if (typeof hostFn === "function") { - const idx = sharedTable.length; - sharedTable.grow(1); - sharedTable.set(idx, hostFn as any); (imports[imp.module] as Record)[imp.name] = - new WebAssembly.Global({ value: "i32", mutable: true }, idx); + new WebAssembly.Global({ value: "i32", mutable: true }, canonicalTableIndex(hostFn)); } else if (imp.module === "GOT.mem" && imp.name === "__wasm_lpad_context") { (imports[imp.module] as Record)[imp.name] = new WebAssembly.Global({ value: "i32", mutable: true }, lpadContextAddr); + } else if (extExportNames.has(imp.name) || moduleExportDescs.some((e) => e.name === imp.name)) { + // Defined by the extension itself — resolved after instantiation. + (imports[imp.module] as Record)[imp.name] = + new WebAssembly.Global({ value: "i32", mutable: true }, 0); } else { + console.warn(`[ext] unresolved ${imp.module} import '${imp.name}' bound to address 0`); (imports[imp.module] as Record)[imp.name] = new WebAssembly.Global({ value: "i32", mutable: true }, 0); } } } - extInstance = new WebAssembly.Instance(extModule, imports); + // Async instantiation: Chrome disallows synchronous WebAssembly.Instance + // on the main thread for modules larger than 8MB. + extInstance = await WebAssembly.instantiate(extModule, imports); + + // The element segments just populated [tableBase, tableBase + tableSlots); + // record those indices as the canonical addresses of the extension's + // functions so GOT fixups and dlSym reuse them. + cacheTableRange(tableBase, sharedTable.length); // PIC shared modules export __wasm_apply_data_relocs which patches data // segment entries (vtables, function pointers) using GOT.func/GOT.mem values. @@ -215,10 +302,7 @@ export async function installExtension( if (g && g.value === 0) { const fn = extInstance.exports[imp.name]; if (typeof fn === "function") { - const idx = sharedTable.length; - sharedTable.grow(1); - sharedTable.set(idx, fn as any); - g.value = idx; + g.value = canonicalTableIndex(fn); fixedAny = true; } } @@ -248,10 +332,7 @@ export async function installExtension( for (const exp of moduleExportDescs) { if (exp.kind === "function") { const fn = extInstance.exports[exp.name]; - const idx = sharedTable.length; - sharedTable.grow(1); - sharedTable.set(idx, fn as any); - extExports[exp.name] = idx; + extExports[exp.name] = canonicalTableIndex(fn as Function); } } @@ -261,6 +342,10 @@ export async function installExtension( }); } +function alignUp(value: number, alignment: number): number { + return Math.ceil(value / alignment) * alignment; +} + function readLEB128(data: Uint8Array, pos: number): [number, number] { let val = 0, shift = 0; while (true) { @@ -272,6 +357,55 @@ function readLEB128(data: Uint8Array, pos: number): [number, number] { return [val, pos]; } +interface DylinkMemInfo { + memorySize: number; + memoryAlign: number; + tableSize: number; + tableAlign: number; +} + +// Parse the WASM_DYLINK_MEM_INFO subsection of the dylink.0 custom section, +// which declares the memory (data + bss) and table sizes a PIC shared module +// needs from the dynamic linker. +function parseDylinkMemInfo(wasm: Uint8Array): DylinkMemInfo | null { + let pos = 8; + while (pos < wasm.length) { + const sid = wasm[pos++]; + let size: number; + [size, pos] = readLEB128(wasm, pos); + const end = pos + size; + if (sid === 0) { + let nlen: number, p: number; + [nlen, p] = readLEB128(wasm, pos); + const sectionName = new TextDecoder().decode(wasm.subarray(p, p + nlen)); + if (sectionName === "dylink.0") { + let q = p + nlen; + while (q < end) { + const sub = wasm[q++]; + let ssize: number; + [ssize, q] = readLEB128(wasm, q); + const send = q + ssize; + if (sub === 1) { + // WASM_DYLINK_MEM_INFO + let memorySize: number, memoryAlign: number, tableSize: number, tableAlign: number; + [memorySize, q] = readLEB128(wasm, q); + [memoryAlign, q] = readLEB128(wasm, q); + [tableSize, q] = readLEB128(wasm, q); + [tableAlign, q] = readLEB128(wasm, q); + return { memorySize, memoryAlign, tableSize, tableAlign }; + } + q = send; + } + return null; + } + } + pos = end; + } + return null; +} + +// Fallback for modules without a dylink.0 section: count the entries of the +// active element segments to size the table reservation. function countElementSegmentEntries(wasm: Uint8Array): number { let pos = 8; let total = 0; @@ -285,17 +419,17 @@ function countElementSegmentEntries(wasm: Uint8Array): number { pos = p2; for (let i = 0; i < count; i++) { const flags = wasm[pos++]; - if (flags === 0) { - while (wasm[pos] !== 0x0b) pos++; - pos++; - let [numElem, p3] = readLEB128(wasm, pos); - pos = p3; - total += numElem; - for (let j = 0; j < numElem; j++) { - [, pos] = readLEB128(wasm, pos); - } - } else { - break; + if (flags !== 0) break; + // Offset expression: (i32.const ) or (global.get ), then end. + const op = wasm[pos++]; + if (op !== 0x41 && op !== 0x23) break; + [, pos] = readLEB128(wasm, pos); + if (wasm[pos++] !== 0x0b) break; + let [numElem, p3] = readLEB128(wasm, pos); + pos = p3; + total += numElem; + for (let j = 0; j < numElem; j++) { + [, pos] = readLEB128(wasm, pos); } } break; diff --git a/ggsql-wasm/library/tsconfig.json b/ggsql-wasm/library/tsconfig.json index b2699a944..b0842e053 100644 --- a/ggsql-wasm/library/tsconfig.json +++ b/ggsql-wasm/library/tsconfig.json @@ -1,8 +1,8 @@ { "compilerOptions": { - "target": "ES2020", + "target": "ES2022", "module": "ESNext", - "lib": ["ES2020", "DOM"], + "lib": ["ES2022", "DOM"], "moduleResolution": "bundler", "strict": true, "esModuleInterop": true, diff --git a/ggsql-wasm/src/lib.rs b/ggsql-wasm/src/lib.rs index b19159f9c..6c7c16c4e 100644 --- a/ggsql-wasm/src/lib.rs +++ b/ggsql-wasm/src/lib.rs @@ -346,9 +346,8 @@ impl GgsqlContext { unsafe { conn.load_extension_enable() .map_err(|e| JsValue::from_str(&format!("Enable load_extension error: {:?}", e)))?; - let result = conn.load_extension(name, entry_point.as_deref()); - let _ = conn.load_extension_disable(); - result.map_err(|e| JsValue::from_str(&format!("Load extension error: {:?}", e)))?; + conn.load_extension(name, entry_point.as_deref()) + .map_err(|e| JsValue::from_str(&format!("Load extension error: {:?}", e)))?; } Ok(()) } From c2ce7e01da66f6c97025cf436cf7317dabd4450a Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 10 Jun 2026 11:54:27 +0100 Subject: [PATCH 05/11] Fix geometry columns in sqlite reader --- src/reader/sqlite.rs | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/src/reader/sqlite.rs b/src/reader/sqlite.rs index 9375b951e..da13f1312 100644 --- a/src/reader/sqlite.rs +++ b/src/reader/sqlite.rs @@ -101,6 +101,17 @@ impl super::SqlDialect for SqliteDialect { } } + /// Geometry columns in registered tables hold raw WKB BLOBs, which + /// SpatiaLite functions reject. Convert those to SpatiaLite's internal + /// format; native SpatiaLite geometries (e.g. from a SpatiaLite database + /// file) pass through unchanged. + fn sql_ensure_geometry(&self, column: &str) -> String { + format!( + "CASE WHEN GeometryType({column}) IS NOT NULL THEN {column} \ + ELSE GeomFromWKB({column}, 4326) END" + ) + } + fn sql_geometry_bbox(&self, column: &str, from: &str) -> String { format!( "SELECT MIN(MbrMinX({column})) AS xmin, MIN(MbrMinY({column})) AS ymin, \ @@ -699,6 +710,19 @@ fn sqlite_values_to_array(name: &str, values: Vec) -> Re } } + // A pure BLOB column (e.g. WKB geometry) maps to Arrow Binary so geometry + // auto-detection and spatial layers receive raw bytes, not a debug string. + if has_blob && !has_text { + let vals: Vec>> = values + .into_iter() + .map(|v| match v { + Value::Blob(b) => Some(b), + _ => None, + }) + .collect(); + return Ok(Arc::new(BinaryArray::from_iter(vals.iter().map(|o| o.as_deref()))) as ArrayRef); + } + if has_text || has_blob { let vals: Vec> = values .into_iter() @@ -1105,6 +1129,17 @@ mod tests { assert_eq!(crate::array_util::value_to_string(h, 0), "010203"); assert_eq!(crate::array_util::value_to_string(h, 1), "DEADBEEF"); assert_eq!(crate::array_util::value_to_string(t, 2), "null"); + + // Reading a BLOB column back yields Arrow Binary. + let back = reader + .execute_sql("SELECT b FROM blob_data ORDER BY rowid") + .unwrap(); + assert_eq!(back.column_dtype("b").unwrap(), DataType::Binary); + let col = back.column("b").unwrap(); + let arr = col.as_any().downcast_ref::().unwrap(); + assert_eq!(arr.value(0), &[0x01u8, 0x02, 0x03]); + assert_eq!(arr.value(1), &[0xDE, 0xAD, 0xBE, 0xEF]); + assert!(arr.is_null(2)); } #[test] From 62bf4b9fa5b12c4ad3bf89f1404fbdc55eb772f3 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 10 Jun 2026 12:04:08 +0100 Subject: [PATCH 06/11] Remove testing extension --- ggsql-wasm/build-wasm.sh | 3 +-- ggsql-wasm/demo/build.mjs | 2 +- ggsql-wasm/demo/src/context.ts | 12 ------------ ggsql-wasm/demo/src/main.ts | 25 +------------------------ 4 files changed, 3 insertions(+), 39 deletions(-) diff --git a/ggsql-wasm/build-wasm.sh b/ggsql-wasm/build-wasm.sh index eb4fe3840..c9358cbae 100755 --- a/ggsql-wasm/build-wasm.sh +++ b/ggsql-wasm/build-wasm.sh @@ -77,8 +77,7 @@ fi echo "Building WASM demo and Quarto integration..." (cd "$SCRIPT_DIR/demo" && npm install && npm run build) -for wasm in "$SQLITE_WASM_RS"/loadable_extensions/*/test_ext.wasm \ - "$SQLITE_WASM_RS"/loadable_extensions/*/mod_spatialite.wasm; do +for wasm in "$SQLITE_WASM_RS"/loadable_extensions/*/mod_spatialite.wasm; do if [ -f "$wasm" ]; then name="$(basename "$wasm")" echo "Copying $name..." diff --git a/ggsql-wasm/demo/build.mjs b/ggsql-wasm/demo/build.mjs index d41fed513..f47f999c5 100644 --- a/ggsql-wasm/demo/build.mjs +++ b/ggsql-wasm/demo/build.mjs @@ -25,7 +25,7 @@ copyFileSync( join(__dirname, "../../ggsql-vscode/syntaxes/ggsql.tmLanguage.json"), join(distDir, "ggsql.tmLanguage.json"), ); -for (const ext of ["test_ext", "mod_spatialite"]) { +for (const ext of ["mod_spatialite"]) { try { copyFileSync( join(__dirname, `../pkg/${ext}.wasm`), diff --git a/ggsql-wasm/demo/src/context.ts b/ggsql-wasm/demo/src/context.ts index 465622cdf..24b33354b 100644 --- a/ggsql-wasm/demo/src/context.ts +++ b/ggsql-wasm/demo/src/context.ts @@ -22,18 +22,6 @@ export class WasmContextManager { await installExtension(name, url); } - loadExtension(name: string): void { - this.getContext().load_extension(name, undefined); - // SpatiaLite needs its spatial_ref_sys table populated before functions - // like ST_Transform can resolve SRIDs. Initialise it once on first load. - if (name === "mod_spatialite" && !this.spatialMetadataReady) { - this.spatialMetadataReady = true; - this.getContext().execute_sql("SELECT InitSpatialMetaData(1)"); - } - } - - private spatialMetadataReady = false; - private getContext(): GgsqlContext { if (!this.context) { throw new Error("Context not initialized. Call initialize() first."); diff --git a/ggsql-wasm/demo/src/main.ts b/ggsql-wasm/demo/src/main.ts index 2f8fb43a1..d51ce8d53 100644 --- a/ggsql-wasm/demo/src/main.ts +++ b/ggsql-wasm/demo/src/main.ts @@ -157,18 +157,6 @@ function initializeExamples() { button.className = "example-button"; button.textContent = example.name; button.onclick = () => { - if (example.loadExtension) { - try { - contextManager.loadExtension(example.loadExtension); - console.log(`[ext] load_extension("${example.loadExtension}") succeeded`); - } catch (e: any) { - console.error(`[ext] load_extension("${example.loadExtension}") FAILED:`, e); - if (!e.toString().includes("already loaded")) { - showProblems([`Extension load error: ${e}`], []); - return; - } - } - } editorManager.setValue(example.query); }; examplesList.appendChild(button); @@ -198,17 +186,7 @@ function initializeMobileExamples() { select.addEventListener("change", () => { const idx = parseInt(select.value, 10); if (!isNaN(idx) && examples[idx]) { - const example = examples[idx]; - if (example.loadExtension) { - try { - contextManager.loadExtension(example.loadExtension); - console.log(`[ext] load_extension("${example.loadExtension}") succeeded`); - } catch (e: any) { - console.error(`[ext] load_extension("${example.loadExtension}") FAILED:`, e); - return; - } - } - editorManager.setValue(example.query); + editorManager.setValue(examples[idx].query); } }); } @@ -224,7 +202,6 @@ async function main() { // Install extensions (fetch + compile, but don't load into SQLite yet) setStatus("Installing extensions...", "loading"); - await contextManager.installExtension("test_ext", WASM_BASE + "test_ext.wasm"); await contextManager.installExtension("mod_spatialite", WASM_BASE + "mod_spatialite.wasm"); setStatus("Initializing editor...", "loading"); From e7b176ca805ae7fb399e06a40f8783f906f22832 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 10 Jun 2026 13:24:32 +0100 Subject: [PATCH 07/11] Fix graticules in sqlite --- src/plot/projection/coord/map.rs | 19 +++++++------------ src/reader/mod.rs | 8 ++++++++ src/reader/sqlite.rs | 13 +++++-------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/src/plot/projection/coord/map.rs b/src/plot/projection/coord/map.rs index d27b24e37..44344bdd7 100644 --- a/src/plot/projection/coord/map.rs +++ b/src/plot/projection/coord/map.rs @@ -258,15 +258,11 @@ impl BBox { dialect: &dyn SqlDialect, execute_query: &dyn Fn(&str) -> crate::Result, ) -> Option { - let envelope = format!( - "ST_MakeEnvelope({}, {}, {}, {})", - self.xmin, self.ymin, self.xmax, self.ymax - ); + let envelope = dialect.sql_make_envelope(self.xmin, self.ymin, self.xmax, self.ymax); let transformed = dialect.sql_st_transform(&envelope, &self.crs, target_crs); - let sql = format!( - "SELECT ST_XMin(g) AS xmin, ST_YMin(g) AS ymin, \ - ST_XMax(g) AS xmax, ST_YMax(g) AS ymax \ - FROM (SELECT {transformed} AS g)" + let sql = dialect.sql_geometry_bbox( + "g", + &format!("(SELECT {transformed} AS g) AS \"__ggsql_bbox__\""), ); execute_query(&sql) .ok() @@ -374,10 +370,9 @@ fn graticule_bbox( // degenerate or incomplete values. Use the clip boundary extent which // correctly represents the visible hemisphere. if let Some(wkt) = clip_boundary_wkt { - let sql = format!( - "SELECT ST_XMin(g) AS xmin, ST_YMin(g) AS ymin, \ - ST_XMax(g) AS xmax, ST_YMax(g) AS ymax \ - FROM (SELECT ST_GeomFromText('{wkt}') AS g)" + let sql = dialect.sql_geometry_bbox( + "g", + &format!("(SELECT ST_GeomFromText('{wkt}') AS g) AS \"__ggsql_bbox__\""), ); if let Ok(df) = execute_query(&sql) { if let Some(clip_bbox) = BBox::from_df(&df, "EPSG:4326") { diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 3c108c5c5..a6609d0c8 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -201,6 +201,14 @@ pub trait SqlDialect { ) } + /// SQL expression building a rectangular polygon from corner coordinates. + /// + /// Default uses the PostGIS-style `ST_MakeEnvelope`. Override for backends + /// with different function names (e.g. SpatiaLite uses `BuildMbr`). + fn sql_make_envelope(&self, xmin: f64, ymin: f64, xmax: f64, ymax: f64) -> String { + format!("ST_MakeEnvelope({xmin}, {ymin}, {xmax}, {ymax})") + } + /// SQL statements to run before spatial operations. /// /// Override for backends that need an extension loaded (e.g. DuckDB spatial). diff --git a/src/reader/sqlite.rs b/src/reader/sqlite.rs index da13f1312..cd35cdd7c 100644 --- a/src/reader/sqlite.rs +++ b/src/reader/sqlite.rs @@ -101,15 +101,12 @@ impl super::SqlDialect for SqliteDialect { } } - /// Geometry columns in registered tables hold raw WKB BLOBs, which - /// SpatiaLite functions reject. Convert those to SpatiaLite's internal - /// format; native SpatiaLite geometries (e.g. from a SpatiaLite database - /// file) pass through unchanged. + fn sql_make_envelope(&self, xmin: f64, ymin: f64, xmax: f64, ymax: f64) -> String { + format!("BuildMbr({xmin}, {ymin}, {xmax}, {ymax})") + } + fn sql_ensure_geometry(&self, column: &str) -> String { - format!( - "CASE WHEN GeometryType({column}) IS NOT NULL THEN {column} \ - ELSE GeomFromWKB({column}, 4326) END" - ) + format!("COALESCE(GeomFromWKB({column}, 4326), {column})") } fn sql_geometry_bbox(&self, column: &str, from: &str) -> String { From 27a2939aa3ef533c62fe68c20e6c5516be7b398a Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 10 Jun 2026 13:49:07 +0100 Subject: [PATCH 08/11] Add spatial examples --- ggsql-wasm/demo/src/examples.ts | 122 +++++++++++++------------------- ggsql-wasm/demo/src/main.ts | 50 +++++++++++-- 2 files changed, 94 insertions(+), 78 deletions(-) diff --git a/ggsql-wasm/demo/src/examples.ts b/ggsql-wasm/demo/src/examples.ts index 4157348ae..b65779477 100644 --- a/ggsql-wasm/demo/src/examples.ts +++ b/ggsql-wasm/demo/src/examples.ts @@ -2,7 +2,7 @@ export interface Example { name: string; query: string; section: string; - loadExtension?: string; + extensions?: string[]; } export const examples: Example[] = [ @@ -213,80 +213,60 @@ DRAW point MAPPING bill_len AS x, bill_dep AS y, body_mass AS size LABEL title => 'Penguin Measurements', x => 'Bill Length (mm)', y => 'Bill Depth (mm)'`, }, - // === Extensions === + // === Spatial === { - section: "Extensions", - name: "Wasm Extension", - query: `-- Loaded from test_ext.wasm via the SQLite extension API -SELECT test_ext_hello() AS greeting`, - loadExtension: "test_ext", + section: "Spatial", + extensions: ["mod_spatialite"], + name: "World map", + query: `-- The spatial layer draws geographic geometries. The geometry column +-- of ggsql:world is detected automatically, so no mapping is needed. +VISUALISE FROM ggsql:world +DRAW spatial`, }, { - section: "Extensions", - name: "SpatiaLite", - query: `-- SpatiaLite reprojects world cities from WGS84 lon/lat (EPSG:4326) --- to Web Mercator metres (EPSG:3857) via PROJ, then plots them as a map. -WITH cities(name, lon, lat) AS ( - VALUES - ('London', -0.1276, 51.5074), - ('New York', -74.0060, 40.7128), - ('Tokyo', 139.6917, 35.6895), - ('Sydney', 151.2093, -33.8688), - ('Cape Town', 18.4241, -33.9249), - ('Rio de Janeiro', -43.1729, -22.9068), - ('Moscow', 37.6173, 55.7558) -) -SELECT - name, - ST_X(ST_Transform(MakePoint(lon, lat, 4326), 3857)) AS x, - ST_Y(ST_Transform(MakePoint(lon, lat, 4326), 3857)) AS y -FROM cities -VISUALISE x AS x, y AS y -DRAW point SETTING size => 6 -DRAW text MAPPING name AS label SETTING vjust => 'bottom', offset => [0, -8] -LABEL - title => 'World cities in Web Mercator (EPSG:3857)', - subtitle => 'Reprojected from WGS84 with SpatiaLite ST_Transform (PROJ)', - x => 'Easting (m)', - y => 'Northing (m)'`, - loadExtension: "mod_spatialite", + section: "Spatial", + extensions: ["mod_spatialite"], + name: "Choropleth", + query: `-- Shade each country by a variable. Population is heavily skewed, +-- so a log scale makes the gradient readable. +VISUALISE FROM ggsql:world +DRAW spatial + MAPPING population AS fill + SETTING opacity => 1 +SCALE fill TO viridis VIA log +LABEL title => 'Population by country', fill => 'Population'`, }, { - section: "Extensions", - name: "World map", - query: `-- Country outlines from the built-in ggsql:world dataset. SpatiaLite --- reprojects each country (PROJ, to equal-area EPSG:6933), simplifies it --- (GEOS, 25 km) to thin the geometry, then a numbers table explodes every --- polygon ring into ordered vertices for the path layer (ggsql can't pass --- recursive CTEs to SQLite, so the vertex indices come from a join). -WITH -nums(i) AS ( - SELECT 1 + d0.d + 10 * d1.d + 100 * d2.d AS i - FROM (SELECT 0 AS d UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d0, - (SELECT 0 AS d UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d1, - (SELECT 0 AS d UNION ALL SELECT 1 UNION ALL SELECT 2 UNION ALL SELECT 3 UNION ALL SELECT 4 UNION ALL SELECT 5 UNION ALL SELECT 6 UNION ALL SELECT 7 UNION ALL SELECT 8 UNION ALL SELECT 9) d2 -), -geo AS ( - SELECT name, continent, - ST_Simplify(ST_Transform(CastToMulti(GeomFromWKB(geom, 4326)), 6933), 25000) AS g - FROM ggsql:world -), -ring AS ( - SELECT name, continent, ST_ExteriorRing(ST_GeometryN(g, nums.i)) AS r, nums.i AS pidx - FROM geo JOIN nums ON nums.i <= ST_NumGeometries(g) -) -SELECT - ring.name || '-' || ring.pidx AS ring_id, - ring.continent, - nums.i AS vidx, - ST_X(ST_PointN(ring.r, nums.i)) AS x, - ST_Y(ST_PointN(ring.r, nums.i)) AS y -FROM ring JOIN nums ON nums.i <= ST_NumPoints(ring.r) -VISUALISE x AS x, y AS y -DRAW path MAPPING continent AS color SETTING linewidth => 0.5 PARTITION BY ring_id ORDER BY vidx -LABEL - title => 'World country outlines (EPSG:6933 equal-area)', - color => 'Continent'`, - loadExtension: "mod_spatialite", + section: "Spatial", + extensions: ["mod_spatialite"], + name: "Projection", + query: `-- PROJECT TO a named map projection. Robinson is a good default +-- for world maps; try mercator, mollweide, natural or eckert4. +VISUALISE continent AS fill FROM ggsql:world +DRAW spatial +PROJECT TO robinson`, + }, + { + section: "Spatial", + extensions: ["mod_spatialite"], + name: "Globe", + query: `-- The orthographic projection shows the Earth as a globe. The origin +-- setting (lon, lat) chooses which hemisphere faces the viewer. +VISUALISE continent AS fill FROM ggsql:world +DRAW spatial +PROJECT TO orthographic + SETTING origin => (133.77, -25.27)`, + }, + { + section: "Spatial", + extensions: ["mod_spatialite"], + name: "Regional map", + query: `-- Filtering the data zooms the map to that region, and a conic +-- projection like Lambert suits a single continent. +VISUALISE continent AS fill FROM ggsql:world +DRAW spatial + FILTER continent == 'Africa' +PROJECT TO lambert + SETTING origin => (20, 5)`, }, ]; diff --git a/ggsql-wasm/demo/src/main.ts b/ggsql-wasm/demo/src/main.ts index d51ce8d53..4e3da015a 100644 --- a/ggsql-wasm/demo/src/main.ts +++ b/ggsql-wasm/demo/src/main.ts @@ -74,6 +74,43 @@ function renderTable(data: SqlResult): string { return `${ths}${bodyRows}${truncationRow}
`; } +// Fetch + compile extensions only when an example needs one. +const EXTENSION_URLS: Record = { + mod_spatialite: WASM_BASE + "mod_spatialite.wasm", +}; +const extensionInstalls = new Map>(); + +function ensureExtension(name: string): Promise { + let install = extensionInstalls.get(name); + if (!install) { + const url = EXTENSION_URLS[name]; + if (!url) { + return Promise.reject(new Error(`Unknown extension '${name}'`)); + } + setStatus(`Installing ${name} extension...`, "loading"); + install = contextManager.installExtension(name, url).catch((e) => { + extensionInstalls.delete(name); + throw e; + }); + extensionInstalls.set(name, install); + } + return install; +} + +// Install the given extensions, reporting any failure to the user. +async function ensureExtensions(names: string[] | undefined): Promise { + try { + for (const name of names ?? []) { + await ensureExtension(name); + } + return true; + } catch (e: any) { + showProblems([`Extension install error: ${e}`], []); + setStatus("Extension error", "error"); + return false; + } +} + async function executeQuery(query: string) { if (!query.trim()) { showProblems([], []); @@ -156,7 +193,8 @@ function initializeExamples() { const button = document.createElement("button"); button.className = "example-button"; button.textContent = example.name; - button.onclick = () => { + button.onclick = async () => { + await ensureExtensions(example.extensions); editorManager.setValue(example.query); }; examplesList.appendChild(button); @@ -183,10 +221,12 @@ function initializeMobileExamples() { optgroup!.appendChild(option); }); - select.addEventListener("change", () => { + select.addEventListener("change", async () => { const idx = parseInt(select.value, 10); if (!isNaN(idx) && examples[idx]) { - editorManager.setValue(examples[idx].query); + const example = examples[idx]; + await ensureExtensions(example.extensions); + editorManager.setValue(example.query); } }); } @@ -200,10 +240,6 @@ async function main() { setStatus("Loading builtin datasets...", "loading"); await contextManager.registerBuiltinDatasets(); - // Install extensions (fetch + compile, but don't load into SQLite yet) - setStatus("Installing extensions...", "loading"); - await contextManager.installExtension("mod_spatialite", WASM_BASE + "mod_spatialite.wasm"); - setStatus("Initializing editor...", "loading"); await editorManager.initialize(editorContainer, examples[0].query); From b86fe4ac79e10fc658d18ad39cfe6025da7c76c6 Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 10 Jun 2026 14:13:27 +0100 Subject: [PATCH 09/11] Instal spatial extension if required for wasm examples --- ggsql-wasm/demo/src/quarto/main.ts | 40 ++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/ggsql-wasm/demo/src/quarto/main.ts b/ggsql-wasm/demo/src/quarto/main.ts index ce16f9420..f0170ff15 100644 --- a/ggsql-wasm/demo/src/quarto/main.ts +++ b/ggsql-wasm/demo/src/quarto/main.ts @@ -1,6 +1,7 @@ import "./styles.css"; import vegaEmbed from "vega-embed"; import { WasmContextManager } from "../context"; +import { WASM_BASE } from "../wasmBase"; import { createEditor, type EditorInstance } from "./editor"; // --------------------------------------------------------------------------- @@ -58,6 +59,43 @@ function rewriteCsvRefs(query: string): string { ); } +// --------------------------------------------------------------------------- +// Extensions +// --------------------------------------------------------------------------- + +// Doc examples activate reader-specific spatial support with an +// `INSTALL spatial;` cell (DuckDB syntax). In this runtime that line is a +// cue to install the SpatiaLite extension before running later cells. +const INSTALL_SPATIAL_RE = /^\s*INSTALL\s+spatial\s*;/im; + +let spatialInstall: Promise | null = null; + +function ensureSpatialExtension(ctx: WasmContextManager): Promise { + if (!spatialInstall) { + console.log("[ggsql-quarto] Installing spatial extension…"); + spatialInstall = ctx + .installExtension("mod_spatialite", WASM_BASE + "mod_spatialite.wasm") + .catch((e) => { + spatialInstall = null; + throw e; + }); + } + return spatialInstall; +} + +async function installRequestedExtensions( + ctx: WasmContextManager, + query: string +): Promise { + if (INSTALL_SPATIAL_RE.test(query)) { + try { + await ensureSpatialExtension(ctx); + } catch (e) { + console.error("[ggsql-quarto] Spatial extension install failed:", e); + } + } +} + // --------------------------------------------------------------------------- // Vega embed options // --------------------------------------------------------------------------- @@ -173,6 +211,7 @@ async function initAndExecute( console.log(`[ggsql-quarto] Executing ${total} cells…`); for (let i = 0; i < total; i++) { const cell = cells[i]; + await installRequestedExtensions(ctx, cell.query); try { if (ctx.hasVisual(cell.rewrittenQuery)) { cell.result = ctx.execute(cell.rewrittenQuery); @@ -304,6 +343,7 @@ async function executeCell( clearError(cell); const currentQuery = rewriteCsvRefs(editorInst.getValue()); + await installRequestedExtensions(ctx, currentQuery); try { if (ctx.hasVisual(currentQuery)) { From 7436fcb61e08b5f12e5f2f500bbd355f2ab8604d Mon Sep 17 00:00:00 2001 From: George Stagg Date: Wed, 10 Jun 2026 15:56:47 +0100 Subject: [PATCH 10/11] Suggestions from code review --- ggsql-wasm/library/src/extensions.ts | 14 ++++---------- src/reader/sqlite.rs | 2 +- tree-sitter-ggsql/bindings/rust/lib.rs | 15 +++++++++++++-- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/ggsql-wasm/library/src/extensions.ts b/ggsql-wasm/library/src/extensions.ts index a2ae399d8..3e01e53c3 100644 --- a/ggsql-wasm/library/src/extensions.ts +++ b/ggsql-wasm/library/src/extensions.ts @@ -227,17 +227,11 @@ export async function installExtension( } }; } else { - // Unresolved import: stub it to return 0. Warn once per symbol so a - // genuinely missing dependency is visible without flooding the console. + // Unresolved import: stub it to fail fast when called. const unresName = imp.name; - let warned = false; - (imports.env as Record)[imp.name] = (...args: unknown[]) => { - if (!warned) { - warned = true; - console.warn(`[ext] unresolved import '${unresName}' stubbed to return 0`); - } - void args; - return 0; + console.warn(`[ext] unresolved import '${unresName}' will throw if called`); + (imports.env as Record)[imp.name] = () => { + throw new Error(`[ext] call to unresolved import '${unresName}'`); }; } } diff --git a/src/reader/sqlite.rs b/src/reader/sqlite.rs index cd35cdd7c..8f1e1f9f6 100644 --- a/src/reader/sqlite.rs +++ b/src/reader/sqlite.rs @@ -709,7 +709,7 @@ fn sqlite_values_to_array(name: &str, values: Vec) -> Re // A pure BLOB column (e.g. WKB geometry) maps to Arrow Binary so geometry // auto-detection and spatial layers receive raw bytes, not a debug string. - if has_blob && !has_text { + if has_blob && !has_text && !has_int && !has_real { let vals: Vec>> = values .into_iter() .map(|v| match v { diff --git a/tree-sitter-ggsql/bindings/rust/lib.rs b/tree-sitter-ggsql/bindings/rust/lib.rs index 6cf5b96d3..8eed77ea7 100644 --- a/tree-sitter-ggsql/bindings/rust/lib.rs +++ b/tree-sitter-ggsql/bindings/rust/lib.rs @@ -35,7 +35,12 @@ mod wasm_alloc { #[no_mangle] unsafe extern "C" fn malloc(size: usize) -> *mut u8 { - let layout = Layout::from_size_align_unchecked(size + HEADER, HEADER); + let Some(total) = size.checked_add(HEADER) else { + return null_mut(); + }; + let Ok(layout) = Layout::from_size_align(total, HEADER) else { + return null_mut(); + }; let ptr = alloc(layout); if ptr.is_null() { return null_mut(); @@ -62,10 +67,16 @@ mod wasm_alloc { if ptr.is_null() { return malloc(new_size); } + let Some(new_total) = new_size.checked_add(HEADER) else { + return null_mut(); + }; + if Layout::from_size_align(new_total, HEADER).is_err() { + return null_mut(); + } let base = ptr.sub(HEADER); let size = *base.cast::(); let layout = Layout::from_size_align_unchecked(size + HEADER, HEADER); - let new = rust_realloc(base, layout, new_size + HEADER); + let new = rust_realloc(base, layout, new_total); if new.is_null() { return null_mut(); } From 53ef541bb1fe64f3d071346bab060e100e9343bb Mon Sep 17 00:00:00 2001 From: George Stagg Date: Thu, 11 Jun 2026 13:17:06 +0100 Subject: [PATCH 11/11] Download spatialite module binary --- .github/workflows/build.yaml | 9 ------- .github/workflows/publish.yaml | 28 +------------------- .github/workflows/release-packages.yml | 24 +---------------- Cargo.lock | 1 + Cargo.toml | 2 +- ggsql-wasm/CLAUDE.md | 6 +++-- ggsql-wasm/build-wasm.sh | 36 ++++++++++++++------------ 7 files changed, 28 insertions(+), 78 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 0c890eac6..3d762b62a 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -84,12 +84,3 @@ jobs: - name: Build WASM package working-directory: ggsql-wasm run: wasm-pack build --target web --profile wasm --no-opt - - # TODO: drop once rustwasm/wasm-pack#1092 is resolved. - - name: Rebuild wasm bindings with --keep-lld-exports - run: | - wb=$(find "$HOME/.cache/.wasm-pack" "$HOME/Library/Caches/.wasm-pack" -name wasm-bindgen -type f 2>/dev/null | sort -V | tail -1 || true) - : "${wb:?cached wasm-bindgen not found}" - "$wb" --target web --keep-lld-exports \ - --out-dir ggsql-wasm/pkg \ - target/wasm32-unknown-unknown/wasm/ggsql_wasm.wasm diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index 10de20fa6..66e089c08 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -66,34 +66,8 @@ jobs: - name: Install Jupyter kernel run: ./target/release/ggsql-jupyter --install - - name: Build WASM library - working-directory: ggsql-wasm/library - run: npm install && npm run build - - name: Build WASM package - working-directory: ggsql-wasm - run: wasm-pack build --target web --profile wasm --no-opt - - # TODO: drop once rustwasm/wasm-pack#1092 is resolved. - - name: Rebuild wasm bindings with --keep-lld-exports - run: | - wb=$(find "$HOME/.cache/.wasm-pack" "$HOME/Library/Caches/.wasm-pack" -name wasm-bindgen -type f 2>/dev/null | sort -V | tail -1 || true) - : "${wb:?cached wasm-bindgen not found}" - "$wb" --target web --keep-lld-exports \ - --out-dir ggsql-wasm/pkg \ - target/wasm32-unknown-unknown/wasm/ggsql_wasm.wasm - - - name: Optimise WASM binary - working-directory: ggsql-wasm - run: wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz --all-features - - - name: Build WASM demo - working-directory: ggsql-wasm/demo - run: npm install && npm run build - - - name: Copying output to doc/wasm... - working-directory: ggsql-wasm/demo - run: cp -r dist ../../doc/wasm + run: ./ggsql-wasm/build-wasm.sh - name: Copy CHANGELOG.md to root run: cp CHANGELOG.md doc/ diff --git a/.github/workflows/release-packages.yml b/.github/workflows/release-packages.yml index 66fe41d8b..5019de1a4 100644 --- a/.github/workflows/release-packages.yml +++ b/.github/workflows/release-packages.yml @@ -487,30 +487,8 @@ jobs: - name: Install wasm-opt run: cargo install wasm-opt - - name: Build WASM library - working-directory: ggsql-wasm/library - run: npm install && npm run build - - name: Build WASM package - working-directory: ggsql-wasm - run: wasm-pack build --target web --profile wasm --no-opt - - # TODO: drop once rustwasm/wasm-pack#1092 is resolved. - - name: Rebuild wasm bindings with --keep-lld-exports - run: | - wb=$(find "$HOME/.cache/.wasm-pack" "$HOME/Library/Caches/.wasm-pack" -name wasm-bindgen -type f 2>/dev/null | sort -V | tail -1 || true) - : "${wb:?cached wasm-bindgen not found}" - "$wb" --target web --keep-lld-exports \ - --out-dir ggsql-wasm/pkg \ - target/wasm32-unknown-unknown/wasm/ggsql_wasm.wasm - - - name: Optimise WASM binary - working-directory: ggsql-wasm - run: wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz --all-features - - - name: Add snippets/ to package files - working-directory: ggsql-wasm/pkg - run: npm pkg set 'files[]=snippets/' + run: ./ggsql-wasm/build-wasm.sh - name: Create npm tarball working-directory: ggsql-wasm/pkg diff --git a/Cargo.lock b/Cargo.lock index 1b58ff27a..0acea443d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4049,6 +4049,7 @@ dependencies = [ [[package]] name = "sqlite-wasm-rs" version = "0.5.5" +source = "git+https://github.com/ggsql-dev/sqlite-wasm-rs.git?branch=loadable-extensions#119e2c7e5b0d0c136dc931a8df089c2914113817" dependencies = [ "cc", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index 5ecd7f28d..e7a1e9596 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -82,4 +82,4 @@ opt-level = "z" panic = "abort" [patch.crates-io] -sqlite-wasm-rs = { path = "../sqlite-wasm-rs" } +sqlite-wasm-rs = { git = "https://github.com/ggsql-dev/sqlite-wasm-rs.git", branch = "loadable-extensions" } diff --git a/ggsql-wasm/CLAUDE.md b/ggsql-wasm/CLAUDE.md index 458b3c89d..b40b22679 100644 --- a/ggsql-wasm/CLAUDE.md +++ b/ggsql-wasm/CLAUDE.md @@ -22,6 +22,7 @@ ggsql-wasm/ │ └── src/ UI code (editor + Vega-Lite preview) └── pkg/ wasm-pack output (committed; consumed by library/ and demo/) ├── ggsql_wasm_bg.wasm + ├── mod_spatialite.wasm ├── ggsql_wasm.js, .d.ts └── package.json ``` @@ -49,8 +50,9 @@ This sequentially: 1. `npm install && npm run build` in `library/` — produces the typed JS wrapper. 2. `wasm-pack build --target web --profile wasm --no-opt` — compiles `src/lib.rs` to `pkg/`. The `wasm` profile is defined in the workspace `Cargo.toml` (release-style, `opt-level = "z"`, LTO, `panic = "abort"`). 3. `wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz` — shrinks the binary further. -4. `npm install && npm run build` in `demo/` — bundles the playground UI. -5. Copies `demo/dist/` to `/doc/wasm/` so Quarto can serve it under the docs site. +4. Downloads the prebuilt `mod_spatialite.wasm` from the [ggsql-dev/sqlite-wasm-rs releases](https://github.com/ggsql-dev/sqlite-wasm-rs/releases) into `pkg/`, caching it under `/target/wasm-extensions/`. +5. `npm install && npm run build` in `demo/` — bundles the playground UI (copies extension wasm from `pkg/` into `dist/`). +6. Copies `demo/dist/` to `/doc/wasm/` so Quarto can serve it under the docs site. Flags: diff --git a/ggsql-wasm/build-wasm.sh b/ggsql-wasm/build-wasm.sh index c9358cbae..b8ffeb387 100755 --- a/ggsql-wasm/build-wasm.sh +++ b/ggsql-wasm/build-wasm.sh @@ -31,17 +31,10 @@ check_wasm32_support() { echo "Building WASM library..." (cd "$SCRIPT_DIR/library" && npm install && npm run build) -SQLITE_WASM_RS="${SQLITE_WASM_RS:-$REPO_ROOT/../sqlite-wasm-rs}" - if [ "$SKIP_BINARY" = false ]; then echo "Checking wasm build prerequisites..." check_wasm32_support - if [ -d "$SQLITE_WASM_RS/loadable_extensions" ]; then - echo "Building loadable extensions..." - make -C "$SQLITE_WASM_RS/loadable_extensions" - fi - echo "Building WASM binary..." rm -rf "$SCRIPT_DIR/pkg" # start clean so stale wasm-bindgen snippets don't accumulate (cd "$SCRIPT_DIR" && wasm-pack build --target web --profile wasm --no-opt) @@ -74,17 +67,28 @@ else echo "Skipping WASM binary build (--skip-binary)." fi -echo "Building WASM demo and Quarto integration..." -(cd "$SCRIPT_DIR/demo" && npm install && npm run build) +SPATIALITE_TAG="spatialite-5.1.0-wasm" +SPATIALITE_URL="https://github.com/ggsql-dev/sqlite-wasm-rs/releases/download/$SPATIALITE_TAG/mod_spatialite.wasm" -for wasm in "$SQLITE_WASM_RS"/loadable_extensions/*/mod_spatialite.wasm; do - if [ -f "$wasm" ]; then - name="$(basename "$wasm")" - echo "Copying $name..." - cp "$wasm" "$SCRIPT_DIR/pkg/" - cp "$wasm" "$SCRIPT_DIR/demo/dist/" 2>/dev/null || true +# SPATIALITE_WASM overrides the download with a locally built binary. +if [ -n "${SPATIALITE_WASM:-}" ]; then + echo "Using local mod_spatialite.wasm: $SPATIALITE_WASM" + cp "$SPATIALITE_WASM" "$SCRIPT_DIR/pkg/mod_spatialite.wasm" +else + CACHED="$REPO_ROOT/target/wasm-extensions/$SPATIALITE_TAG/mod_spatialite.wasm" + if [ ! -f "$CACHED" ]; then + echo "Downloading mod_spatialite.wasm ($SPATIALITE_TAG)..." + mkdir -p "$(dirname "$CACHED")" + curl -sSfL -o "$CACHED.tmp" "$SPATIALITE_URL" + mv "$CACHED.tmp" "$CACHED" + else + echo "Using cached mod_spatialite.wasm: $CACHED" fi -done + cp "$CACHED" "$SCRIPT_DIR/pkg/mod_spatialite.wasm" +fi + +echo "Building WASM demo and Quarto integration..." +(cd "$SCRIPT_DIR/demo" && npm install && npm run build) echo "Copying output to doc/wasm..." rm -rf "$REPO_ROOT/doc/wasm"