diff --git a/.cargo/config.toml b/.cargo/config.toml index 96805a81d..18c384acb 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -4,4 +4,17 @@ LIBSQLITE3_FLAGS = "-DSQLITE_ENABLE_MATH_FUNCTIONS" [target.wasm32-unknown-unknown] rustflags = [ "-C", "link-args=-z stack-size=16777216", + "-C", "link-args=--export-table --growable-table", + "-C", "link-args=--export=acos --export=asin --export=atan --export=atan2 --export=cos --export=exp --export=fmod --export=log --export=pow --export=sin --export=tan", + "-C", "link-args=--export=rust_sqlite_wasm_malloc --export=rust_sqlite_wasm_free --export=rust_sqlite_wasm_realloc --export=rust_sqlite_wasm_localtime --export=rust_sqlite_wasm_errno_location", + "-C", "link-args=--export=rust_sqlite_wasm_atoi --export=rust_sqlite_wasm_strtod --export=rust_sqlite_wasm_strtol --export=rust_sqlite_wasm_bsearch --export=rust_sqlite_wasm_qsort", + "-C", "link-args=--export=rust_sqlite_wasm_strcat --export=rust_sqlite_wasm_strchr --export=rust_sqlite_wasm_strcmp --export=rust_sqlite_wasm_strcpy --export=rust_sqlite_wasm_strlen --export=rust_sqlite_wasm_strncmp", + "-C", "link-args=--export=sqlite3_open_v2 --export=sqlite3_close --export=sqlite3_exec --export=sqlite3_prepare_v2 --export=sqlite3_step --export=sqlite3_reset --export=sqlite3_finalize", + "-C", "link-args=--export=sqlite3_bind_blob --export=sqlite3_bind_double --export=sqlite3_bind_int --export=sqlite3_bind_int64 --export=sqlite3_bind_null --export=sqlite3_bind_text", + "-C", "link-args=--export=sqlite3_column_blob --export=sqlite3_column_bytes --export=sqlite3_column_count --export=sqlite3_column_double --export=sqlite3_column_int64 --export=sqlite3_column_text --export=sqlite3_column_type", + "-C", "link-args=--export=sqlite3_value_double --export=sqlite3_value_int64 --export=sqlite3_value_type --export=sqlite3_result_double --export=sqlite3_result_int --export=sqlite3_result_null", + "-C", "link-args=--export=sqlite3_create_function --export=sqlite3_errmsg --export=sqlite3_config --export=sqlite3_initialize --export=sqlite3_last_insert_rowid --export=sqlite3_test_control", + "-C", "link-args=--export=sqlite3_malloc --export=sqlite3_free --export=sqlite3_mprintf --export=sqlite3_snprintf --export=sqlite3_vsnprintf --export=sqlite3_get_table --export=sqlite3_free_table", + "-C", "link-args=--export=sqlite3_libversion --export=sqlite3_libversion_number --export=sqlite3_uri_int64", + "-C", "link-args=--export=sqlite3_vfs_find --export=sqlite3_vfs_register --export=sqlite3_vfs_unregister", ] diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index a8234ff95..66e089c08 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -66,25 +66,8 @@ jobs: - name: Install Jupyter kernel run: ./target/release/ggsql-jupyter --install - - name: Build WASM library - working-directory: ggsql-wasm/library - run: npm install && npm run build - - name: Build WASM package - working-directory: ggsql-wasm - run: wasm-pack build --target web --profile wasm --no-opt - - - name: Optimise WASM binary - working-directory: ggsql-wasm - run: wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz --all-features - - - name: Build WASM demo - working-directory: ggsql-wasm/demo - run: npm install && npm run build - - - name: Copying output to doc/wasm... - working-directory: ggsql-wasm/demo - run: cp -r dist ../../doc/wasm + run: ./ggsql-wasm/build-wasm.sh - name: Copy CHANGELOG.md to root run: cp CHANGELOG.md doc/ diff --git a/.github/workflows/release-packages.yml b/.github/workflows/release-packages.yml index 7a22cc198..5019de1a4 100644 --- a/.github/workflows/release-packages.yml +++ b/.github/workflows/release-packages.yml @@ -487,17 +487,8 @@ jobs: - name: Install wasm-opt run: cargo install wasm-opt - - name: Build WASM library - working-directory: ggsql-wasm/library - run: npm install && npm run build - - name: Build WASM package - working-directory: ggsql-wasm - run: wasm-pack build --target web --profile wasm --no-opt - - - name: Optimise WASM binary - working-directory: ggsql-wasm - run: wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz --all-features + run: ./ggsql-wasm/build-wasm.sh - name: Create npm tarball working-directory: ggsql-wasm/pkg diff --git a/Cargo.lock b/Cargo.lock index 2dbe4d44b..0acea443d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4048,9 +4048,8 @@ dependencies = [ [[package]] name = "sqlite-wasm-rs" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b2c760607300407ddeaee518acf28c795661b7108c75421303dbefb237d3a36" +version = "0.5.5" +source = "git+https://github.com/ggsql-dev/sqlite-wasm-rs.git?branch=loadable-extensions#119e2c7e5b0d0c136dc931a8df089c2914113817" dependencies = [ "cc", "js-sys", diff --git a/Cargo.toml b/Cargo.toml index c949a0df4..e7a1e9596 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,7 @@ arrow = { version = "58", default-features = false } duckdb = { version = "~1.10502", features = ["bundled", "vtab-arrow"] } parquet = { version = "58", default-features = false, features = ["arrow", "snap"] } bytes = "1" -rusqlite = { version = "0.38", features = ["bundled", "chrono"] } +rusqlite = { version = "0.38", features = ["bundled", "chrono", "load_extension"] } # ODBC toml_edit = "0.22" @@ -80,3 +80,6 @@ strip = true inherits = "release" opt-level = "z" panic = "abort" + +[patch.crates-io] +sqlite-wasm-rs = { git = "https://github.com/ggsql-dev/sqlite-wasm-rs.git", branch = "loadable-extensions" } diff --git a/ggsql-wasm/CLAUDE.md b/ggsql-wasm/CLAUDE.md index 458b3c89d..b40b22679 100644 --- a/ggsql-wasm/CLAUDE.md +++ b/ggsql-wasm/CLAUDE.md @@ -22,6 +22,7 @@ ggsql-wasm/ │ └── src/ UI code (editor + Vega-Lite preview) └── pkg/ wasm-pack output (committed; consumed by library/ and demo/) ├── ggsql_wasm_bg.wasm + ├── mod_spatialite.wasm ├── ggsql_wasm.js, .d.ts └── package.json ``` @@ -49,8 +50,9 @@ This sequentially: 1. `npm install && npm run build` in `library/` — produces the typed JS wrapper. 2. `wasm-pack build --target web --profile wasm --no-opt` — compiles `src/lib.rs` to `pkg/`. The `wasm` profile is defined in the workspace `Cargo.toml` (release-style, `opt-level = "z"`, LTO, `panic = "abort"`). 3. `wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz` — shrinks the binary further. -4. `npm install && npm run build` in `demo/` — bundles the playground UI. -5. Copies `demo/dist/` to `/doc/wasm/` so Quarto can serve it under the docs site. +4. Downloads the prebuilt `mod_spatialite.wasm` from the [ggsql-dev/sqlite-wasm-rs releases](https://github.com/ggsql-dev/sqlite-wasm-rs/releases) into `pkg/`, caching it under `/target/wasm-extensions/`. +5. `npm install && npm run build` in `demo/` — bundles the playground UI (copies extension wasm from `pkg/` into `dist/`). +6. Copies `demo/dist/` to `/doc/wasm/` so Quarto can serve it under the docs site. Flags: diff --git a/ggsql-wasm/Cargo.toml b/ggsql-wasm/Cargo.toml index 1ce69e2fd..ca67029c4 100644 --- a/ggsql-wasm/Cargo.toml +++ b/ggsql-wasm/Cargo.toml @@ -16,7 +16,7 @@ wasm-bindgen = "0.2" wasm-bindgen-futures = "0.4" js-sys = "0.3" arrow = { workspace = true } -ggsql = { path = "../src", default-features = false, features = ["vegalite", "sqlite", "builtin-data"] } +ggsql = { path = "../src", default-features = false, features = ["vegalite", "sqlite", "builtin-data", "spatial"] } serde_json = "1" [target.'cfg(not(target_arch = "wasm32"))'.dependencies] @@ -24,7 +24,7 @@ tokio = { version = "1.35", features = ["full"] } [target.'cfg(target_arch = "wasm32")'.dependencies] tokio = { version = "1.35", default-features = false } -sqlite-wasm-rs = "0.5.2" +sqlite-wasm-rs = { version = "0.5.2", features = ["loadable-extensions"] } # Transitive dep feature overrides for wasm32-unknown-unknown. # Cargo's feature unification activates these on the transitive deps. # - getrandom: pulled in by arrow (via ahash/const-random), needs "js" for wasm diff --git a/ggsql-wasm/build-wasm.sh b/ggsql-wasm/build-wasm.sh index afd5c3c02..b8ffeb387 100755 --- a/ggsql-wasm/build-wasm.sh +++ b/ggsql-wasm/build-wasm.sh @@ -22,28 +22,71 @@ check_wasm32_support() { echo "Install an LLVM/clang toolchain with wasm backend support (e.g. 'sudo apt-get install llvm' on Debian/Ubuntu)." >&2 exit 1 fi + if ! command -v wasm-pack >/dev/null 2>&1; then + echo "Error: wasm-pack not found. Install with: cargo install wasm-pack" >&2 + exit 1 + fi } echo "Building WASM library..." (cd "$SCRIPT_DIR/library" && npm install && npm run build) if [ "$SKIP_BINARY" = false ]; then - echo "Checking wasm32 compiler support..." + echo "Checking wasm build prerequisites..." check_wasm32_support echo "Building WASM binary..." + rm -rf "$SCRIPT_DIR/pkg" # start clean so stale wasm-bindgen snippets don't accumulate (cd "$SCRIPT_DIR" && wasm-pack build --target web --profile wasm --no-opt) + # wasm-bindgen is invoked directly so we can pass --keep-lld-exports, + # which preserves the LLD symbols that loadable extensions import. + # wasm-pack cannot forward that flag (rustwasm/wasm-pack#1092). + echo "Re-running wasm-bindgen with --keep-lld-exports..." + WASM_BINDGEN="$(find "$HOME/Library/Caches/.wasm-pack" "$HOME/.cache/.wasm-pack" -name wasm-bindgen -type f 2>/dev/null | sort -V | tail -1 || true)" + if [ -z "$WASM_BINDGEN" ]; then + echo "Error: could not locate wasm-pack's cached wasm-bindgen." >&2 + exit 1 + fi + "$WASM_BINDGEN" \ + --target web \ + --keep-lld-exports \ + --out-dir "$SCRIPT_DIR/pkg" \ + "$REPO_ROOT/target/wasm32-unknown-unknown/wasm/ggsql_wasm.wasm" + if [ "$SKIP_OPT" = false ]; then echo "Optimising WASM binary..." (cd "$SCRIPT_DIR" && wasm-opt pkg/ggsql_wasm_bg.wasm -o pkg/ggsql_wasm_bg.wasm -Oz --all-features) else echo "Skipping wasm-opt (--skip-opt)." fi + + echo "Adding snippets/ to package files..." + (cd "$SCRIPT_DIR/pkg" && npm pkg set 'files[]=snippets/') else echo "Skipping WASM binary build (--skip-binary)." fi +SPATIALITE_TAG="spatialite-5.1.0-wasm" +SPATIALITE_URL="https://github.com/ggsql-dev/sqlite-wasm-rs/releases/download/$SPATIALITE_TAG/mod_spatialite.wasm" + +# SPATIALITE_WASM overrides the download with a locally built binary. +if [ -n "${SPATIALITE_WASM:-}" ]; then + echo "Using local mod_spatialite.wasm: $SPATIALITE_WASM" + cp "$SPATIALITE_WASM" "$SCRIPT_DIR/pkg/mod_spatialite.wasm" +else + CACHED="$REPO_ROOT/target/wasm-extensions/$SPATIALITE_TAG/mod_spatialite.wasm" + if [ ! -f "$CACHED" ]; then + echo "Downloading mod_spatialite.wasm ($SPATIALITE_TAG)..." + mkdir -p "$(dirname "$CACHED")" + curl -sSfL -o "$CACHED.tmp" "$SPATIALITE_URL" + mv "$CACHED.tmp" "$CACHED" + else + echo "Using cached mod_spatialite.wasm: $CACHED" + fi + cp "$CACHED" "$SCRIPT_DIR/pkg/mod_spatialite.wasm" +fi + echo "Building WASM demo and Quarto integration..." (cd "$SCRIPT_DIR/demo" && npm install && npm run build) diff --git a/ggsql-wasm/demo/build.mjs b/ggsql-wasm/demo/build.mjs index 8b4cc8c2c..f47f999c5 100644 --- a/ggsql-wasm/demo/build.mjs +++ b/ggsql-wasm/demo/build.mjs @@ -25,6 +25,14 @@ copyFileSync( join(__dirname, "../../ggsql-vscode/syntaxes/ggsql.tmLanguage.json"), join(distDir, "ggsql.tmLanguage.json"), ); +for (const ext of ["mod_spatialite"]) { + try { + copyFileSync( + join(__dirname, `../pkg/${ext}.wasm`), + join(distDir, `${ext}.wasm`), + ); + } catch (_) {} +} // Build Monaco editor web worker console.log("Building Monaco editor worker..."); diff --git a/ggsql-wasm/demo/package-lock.json b/ggsql-wasm/demo/package-lock.json index 86e043c17..4a6437e5b 100644 --- a/ggsql-wasm/demo/package-lock.json +++ b/ggsql-wasm/demo/package-lock.json @@ -22,6 +22,18 @@ "vscode-textmate": "^9.3.0" } }, + "../library": { + "name": "ggsql-wasm-lib", + "version": "0.0.0", + "extraneous": true, + "dependencies": { + "hyparquet": "^1.25.0" + }, + "devDependencies": { + "esbuild": "^0.27.0", + "typescript": "^5.9.0" + } + }, "../pkg": { "name": "ggsql-wasm", "version": "0.3.3", diff --git a/ggsql-wasm/demo/src/context.ts b/ggsql-wasm/demo/src/context.ts index e895418f1..24b33354b 100644 --- a/ggsql-wasm/demo/src/context.ts +++ b/ggsql-wasm/demo/src/context.ts @@ -1,4 +1,8 @@ -import init, { GgsqlContext } from "ggsql-wasm"; +import init, { + GgsqlContext, + initExtensionLoader, + installExtension, +} from "ggsql-wasm"; import { WASM_BASE } from "./wasmBase"; export class WasmContextManager { @@ -8,11 +12,16 @@ export class WasmContextManager { async initialize(): Promise { if (this.initialized) return; - await init(WASM_BASE + "ggsql_wasm_bg.wasm"); + const wasmExports = await init(WASM_BASE + "ggsql_wasm_bg.wasm"); + initExtensionLoader(wasmExports); this.context = new GgsqlContext(); this.initialized = true; } + async installExtension(name: string, url: string): Promise { + await installExtension(name, url); + } + private getContext(): GgsqlContext { if (!this.context) { throw new Error("Context not initialized. Call initialize() first."); diff --git a/ggsql-wasm/demo/src/examples.ts b/ggsql-wasm/demo/src/examples.ts index 61b9acba1..b65779477 100644 --- a/ggsql-wasm/demo/src/examples.ts +++ b/ggsql-wasm/demo/src/examples.ts @@ -2,6 +2,7 @@ export interface Example { name: string; query: string; section: string; + extensions?: string[]; } export const examples: Example[] = [ @@ -211,4 +212,61 @@ VISUALISE DRAW point MAPPING bill_len AS x, bill_dep AS y, body_mass AS size LABEL title => 'Penguin Measurements', x => 'Bill Length (mm)', y => 'Bill Depth (mm)'`, }, + + // === Spatial === + { + section: "Spatial", + extensions: ["mod_spatialite"], + name: "World map", + query: `-- The spatial layer draws geographic geometries. The geometry column +-- of ggsql:world is detected automatically, so no mapping is needed. +VISUALISE FROM ggsql:world +DRAW spatial`, + }, + { + section: "Spatial", + extensions: ["mod_spatialite"], + name: "Choropleth", + query: `-- Shade each country by a variable. Population is heavily skewed, +-- so a log scale makes the gradient readable. +VISUALISE FROM ggsql:world +DRAW spatial + MAPPING population AS fill + SETTING opacity => 1 +SCALE fill TO viridis VIA log +LABEL title => 'Population by country', fill => 'Population'`, + }, + { + section: "Spatial", + extensions: ["mod_spatialite"], + name: "Projection", + query: `-- PROJECT TO a named map projection. Robinson is a good default +-- for world maps; try mercator, mollweide, natural or eckert4. +VISUALISE continent AS fill FROM ggsql:world +DRAW spatial +PROJECT TO robinson`, + }, + { + section: "Spatial", + extensions: ["mod_spatialite"], + name: "Globe", + query: `-- The orthographic projection shows the Earth as a globe. The origin +-- setting (lon, lat) chooses which hemisphere faces the viewer. +VISUALISE continent AS fill FROM ggsql:world +DRAW spatial +PROJECT TO orthographic + SETTING origin => (133.77, -25.27)`, + }, + { + section: "Spatial", + extensions: ["mod_spatialite"], + name: "Regional map", + query: `-- Filtering the data zooms the map to that region, and a conic +-- projection like Lambert suits a single continent. +VISUALISE continent AS fill FROM ggsql:world +DRAW spatial + FILTER continent == 'Africa' +PROJECT TO lambert + SETTING origin => (20, 5)`, + }, ]; diff --git a/ggsql-wasm/demo/src/main.ts b/ggsql-wasm/demo/src/main.ts index 30cffda44..4e3da015a 100644 --- a/ggsql-wasm/demo/src/main.ts +++ b/ggsql-wasm/demo/src/main.ts @@ -5,6 +5,7 @@ import { WasmContextManager } from "./context"; import { EditorManager } from "./editor"; import { TableManager } from "./tableManager"; import { examples } from "./examples"; +import { WASM_BASE } from "./wasmBase"; // State const contextManager = new WasmContextManager(); @@ -73,6 +74,43 @@ function renderTable(data: SqlResult): string { return `${ths}${bodyRows}${truncationRow}
`; } +// Fetch + compile extensions only when an example needs one. +const EXTENSION_URLS: Record = { + mod_spatialite: WASM_BASE + "mod_spatialite.wasm", +}; +const extensionInstalls = new Map>(); + +function ensureExtension(name: string): Promise { + let install = extensionInstalls.get(name); + if (!install) { + const url = EXTENSION_URLS[name]; + if (!url) { + return Promise.reject(new Error(`Unknown extension '${name}'`)); + } + setStatus(`Installing ${name} extension...`, "loading"); + install = contextManager.installExtension(name, url).catch((e) => { + extensionInstalls.delete(name); + throw e; + }); + extensionInstalls.set(name, install); + } + return install; +} + +// Install the given extensions, reporting any failure to the user. +async function ensureExtensions(names: string[] | undefined): Promise { + try { + for (const name of names ?? []) { + await ensureExtension(name); + } + return true; + } catch (e: any) { + showProblems([`Extension install error: ${e}`], []); + setStatus("Extension error", "error"); + return false; + } +} + async function executeQuery(query: string) { if (!query.trim()) { showProblems([], []); @@ -155,9 +193,9 @@ function initializeExamples() { const button = document.createElement("button"); button.className = "example-button"; button.textContent = example.name; - button.onclick = () => { + button.onclick = async () => { + await ensureExtensions(example.extensions); editorManager.setValue(example.query); - //executeQuery(example.query); }; examplesList.appendChild(button); }); @@ -183,10 +221,12 @@ function initializeMobileExamples() { optgroup!.appendChild(option); }); - select.addEventListener("change", () => { + select.addEventListener("change", async () => { const idx = parseInt(select.value, 10); if (!isNaN(idx) && examples[idx]) { - editorManager.setValue(examples[idx].query); + const example = examples[idx]; + await ensureExtensions(example.extensions); + editorManager.setValue(example.query); } }); } diff --git a/ggsql-wasm/demo/src/quarto/main.ts b/ggsql-wasm/demo/src/quarto/main.ts index ce16f9420..f0170ff15 100644 --- a/ggsql-wasm/demo/src/quarto/main.ts +++ b/ggsql-wasm/demo/src/quarto/main.ts @@ -1,6 +1,7 @@ import "./styles.css"; import vegaEmbed from "vega-embed"; import { WasmContextManager } from "../context"; +import { WASM_BASE } from "../wasmBase"; import { createEditor, type EditorInstance } from "./editor"; // --------------------------------------------------------------------------- @@ -58,6 +59,43 @@ function rewriteCsvRefs(query: string): string { ); } +// --------------------------------------------------------------------------- +// Extensions +// --------------------------------------------------------------------------- + +// Doc examples activate reader-specific spatial support with an +// `INSTALL spatial;` cell (DuckDB syntax). In this runtime that line is a +// cue to install the SpatiaLite extension before running later cells. +const INSTALL_SPATIAL_RE = /^\s*INSTALL\s+spatial\s*;/im; + +let spatialInstall: Promise | null = null; + +function ensureSpatialExtension(ctx: WasmContextManager): Promise { + if (!spatialInstall) { + console.log("[ggsql-quarto] Installing spatial extension…"); + spatialInstall = ctx + .installExtension("mod_spatialite", WASM_BASE + "mod_spatialite.wasm") + .catch((e) => { + spatialInstall = null; + throw e; + }); + } + return spatialInstall; +} + +async function installRequestedExtensions( + ctx: WasmContextManager, + query: string +): Promise { + if (INSTALL_SPATIAL_RE.test(query)) { + try { + await ensureSpatialExtension(ctx); + } catch (e) { + console.error("[ggsql-quarto] Spatial extension install failed:", e); + } + } +} + // --------------------------------------------------------------------------- // Vega embed options // --------------------------------------------------------------------------- @@ -173,6 +211,7 @@ async function initAndExecute( console.log(`[ggsql-quarto] Executing ${total} cells…`); for (let i = 0; i < total; i++) { const cell = cells[i]; + await installRequestedExtensions(ctx, cell.query); try { if (ctx.hasVisual(cell.rewrittenQuery)) { cell.result = ctx.execute(cell.rewrittenQuery); @@ -304,6 +343,7 @@ async function executeCell( clearError(cell); const currentQuery = rewriteCsvRefs(editorInst.getValue()); + await installRequestedExtensions(ctx, currentQuery); try { if (ctx.hasVisual(currentQuery)) { diff --git a/ggsql-wasm/library/build.mjs b/ggsql-wasm/library/build.mjs index 4a51888cd..77df5bcd6 100644 --- a/ggsql-wasm/library/build.mjs +++ b/ggsql-wasm/library/build.mjs @@ -11,7 +11,7 @@ const buildOptions = { outfile: join(__dirname, "dist/lib.js"), format: "esm", platform: "browser", - target: "es2020", + target: "es2022", sourcemap: true, }; diff --git a/ggsql-wasm/library/package.json b/ggsql-wasm/library/package.json index d3ddd51ce..884cb3d02 100644 --- a/ggsql-wasm/library/package.json +++ b/ggsql-wasm/library/package.json @@ -3,6 +3,7 @@ "version": "0.0.0", "private": true, "type": "module", + "main": "dist/lib.js", "scripts": { "build": "node build.mjs", "dev": "node build.mjs --watch", diff --git a/ggsql-wasm/library/src/extensions.ts b/ggsql-wasm/library/src/extensions.ts new file mode 100644 index 000000000..3e01e53c3 --- /dev/null +++ b/ggsql-wasm/library/src/extensions.ts @@ -0,0 +1,474 @@ +// Wasm Exception Handling proposal types +declare global { + namespace WebAssembly { + interface Tag {} + const Tag: { new (descriptor: { parameters: ValueType[] }): Tag }; + interface Exception {} + const Exception: { + new (tag: Tag, payload: unknown[], options?: { traceStack?: boolean }): Exception; + }; + } +} + +interface LoadedExtension { + instance: WebAssembly.Instance; + exports: Record; +} + +const PAGE = 65536; + +// Dedicated shadow-stack size for each extension. The stack sits above the +// extension's data segment and grows downward. +const EXT_STACK_SIZE = 16 * 1024 * 1024; + +const registry = new Map(); +let lastError: string | null = null; +let nextHandle = 1; +const handleMap = new Map(); + +let sharedMemory: WebAssembly.Memory | null = null; +let sharedTable: WebAssembly.Table | null = null; +let hostExports: WebAssembly.Exports | null = null; + +// Canonical table index per function, so the same function always has the +// same "address". +const tableIndexCache = new Map(); + +function canonicalTableIndex(fn: Function): number { + const cached = tableIndexCache.get(fn); + if (cached !== undefined) return cached; + const idx = sharedTable!.grow(1); + sharedTable!.set(idx, fn as any); + tableIndexCache.set(fn, idx); + return idx; +} + +function cacheTableRange(start: number, end: number): void { + for (let i = start; i < end; i++) { + const fn = sharedTable!.get(i); + if (typeof fn === "function" && !tableIndexCache.has(fn)) { + tableIndexCache.set(fn, i); + } + } +} + +export function initExtensionLoader(wasmExports: WebAssembly.Exports): void { + hostExports = wasmExports; + sharedMemory = wasmExports.memory as WebAssembly.Memory; + sharedTable = wasmExports.__indirect_function_table as WebAssembly.Table; + + if (!sharedMemory) throw new Error("Main module does not export 'memory'"); + if (!sharedTable) throw new Error("Main module does not export '__indirect_function_table'"); + + cacheTableRange(0, sharedTable.length); + + (globalThis as any).__sqlite_ext = { + dlOpen, + dlSym, + dlClose, + dlError, + }; +} + +export async function installExtension( + name: string, + wasmSource: BufferSource | Response | string, +): Promise { + if (!sharedMemory || !sharedTable || !hostExports) { + throw new Error("Call initExtensionLoader() before installExtension()"); + } + + if (registry.has(name)) { + console.warn(`[ext] extension '${name}' is already installed; skipping`); + return; + } + + let bytes: ArrayBuffer; + if (typeof wasmSource === "string") { + const response = await fetch(wasmSource); + if (!response.ok) throw new Error(`Failed to fetch extension: ${response.status}`); + bytes = await response.arrayBuffer(); + } else if (wasmSource instanceof Response) { + bytes = await wasmSource.arrayBuffer(); + } else if (ArrayBuffer.isView(wasmSource)) { + bytes = + wasmSource.byteOffset === 0 && wasmSource.byteLength === wasmSource.buffer.byteLength + ? (wasmSource.buffer as ArrayBuffer) + : (wasmSource.buffer.slice( + wasmSource.byteOffset, + wasmSource.byteOffset + wasmSource.byteLength, + ) as ArrayBuffer); + } else { + bytes = wasmSource as ArrayBuffer; + } + + const wasmBytes = new Uint8Array(bytes); + const extModule = await WebAssembly.compile(bytes); + + // Memory layout: [data segment (dylink.0 memory_size)][stack][lpad page]. + // The dylink.0 section declares the module's data+bss size; the file size + // is only a (typically over-, possibly under-) estimate kept as a fallback. + const dylink = parseDylinkMemInfo(wasmBytes); + let dataSize: number; + if (dylink) { + dataSize = dylink.memorySize; + if (1 << dylink.memoryAlign > PAGE) { + console.warn( + `[ext] '${name}' requests 2^${dylink.memoryAlign} memory alignment; only page alignment is provided`, + ); + } + } else { + console.warn(`[ext] '${name}' has no dylink.0 section; sizing data segment from file size`); + dataSize = bytes.byteLength; + } + + const dataBytes = alignUp(dataSize, PAGE); + const currentBytes = sharedMemory.buffer.byteLength; + sharedMemory.grow((dataBytes + EXT_STACK_SIZE + PAGE) / PAGE); + const memBase = currentBytes; + const stackTop = memBase + dataBytes + EXT_STACK_SIZE; + // Small scratch area for __wasm_lpad_context (3 x i32 = 12 bytes) + const lpadContextAddr = stackTop + PAGE - 64; + + const moduleExportDescs = WebAssembly.Module.exports(extModule); + + // Table slots for the module's element segments. dylink.0 states the count; + // fall back to parsing the element section. Later needs (GOT entries, + // dlSym exports) grow the table on demand via canonicalTableIndex. + const tableSlots = dylink?.tableSize ?? countElementSegmentEntries(wasmBytes); + const tableBase = sharedTable.length; + sharedTable.grow(tableSlots); + + const imports: WebAssembly.Imports = { + env: { + memory: sharedMemory, + __indirect_function_table: sharedTable, + __memory_base: new WebAssembly.Global({ value: "i32", mutable: false }, memBase), + __table_base: new WebAssembly.Global({ value: "i32", mutable: false }, tableBase), + __stack_pointer: new WebAssembly.Global({ value: "i32", mutable: true }, stackTop), + }, + }; + + // Know which functions the extension itself exports (before instantiation). + // PIC --shared modules both import AND export the same symbols — env imports + // are for direct calls, GOT is for indirect. We use lazy trampolines so that + // direct calls to self-defined symbols bounce to the extension's own export. + const extExportNames = new Set( + moduleExportDescs.filter((e) => e.kind === "function").map((e) => e.name), + ); + let extInstance: WebAssembly.Instance | null = null; + let cppExceptionTag: WebAssembly.Tag | null = null; + + const moduleImportDescs = WebAssembly.Module.imports(extModule); + for (const imp of moduleImportDescs) { + if (imp.module === "env" && Object.hasOwn(imports.env as object, imp.name)) { + continue; + } + + if (imp.module === "env" && imp.kind === "function") { + const hostFn = hostExports[imp.name]; + if (typeof hostFn === "function") { + (imports.env as Record)[imp.name] = hostFn; + } else if (imp.name === "abort") { + (imports.env as Record)[imp.name] = () => { + throw new Error("[ext] abort() called from extension"); + }; + } else if (imp.name === "exit") { + (imports.env as Record)[imp.name] = (code: number) => { + throw new Error(`[ext] exit(${code}) called from extension`); + }; + } else if (extExportNames.has(imp.name)) { + // Symbol defined in the extension itself — lazy trampoline that calls + // the extension's own export once the instance exists. + const sym = imp.name; + (imports.env as Record)[sym] = (...args: unknown[]) => { + const fn = extInstance?.exports[sym]; + if (typeof fn !== "function") { + throw new Error(`[ext] self-import '${sym}' called before instantiation completed`); + } + return (fn as Function)(...args); + }; + } else if (imp.name === "__ext_trap") { + const trapNames: Record = { 1: "abort()", 2: "__assert_fail()", 3: "abort() [stubs]" }; + (imports.env as Record)[imp.name] = (code: number) => { + const name = code >= 100 ? `exit(${code - 100})` : (trapNames[code] ?? `trap(${code})`); + throw new Error(`[ext] ${name} called from extension`); + }; + } else if (imp.name === "_Unwind_RaiseException") { + (imports.env as Record)[imp.name] = (excPtr: number) => { + if (cppExceptionTag) { + throw new WebAssembly.Exception(cppExceptionTag, [excPtr], { traceStack: true }); + } + throw new Error("_Unwind_RaiseException: no cpp exception tag"); + }; + } else if (imp.name === "_Unwind_CallPersonality") { + // Minimal personality: The enclosing landing pad handles the exception + // Offsets are the libc++abi wasm32 __cxa_exception layout. + const ADJUSTED_PTR_OFFSET = -8; + const THROWN_OBJECT_OFFSET = 32; + const LPAD_SELECTOR_OFFSET = 8; + const URC_HANDLER_FOUND = 6; + (imports.env as Record)[imp.name] = (excPtr: number) => { + const view = new DataView(sharedMemory!.buffer); + view.setUint32(excPtr + ADJUSTED_PTR_OFFSET, excPtr + THROWN_OBJECT_OFFSET, true); + view.setInt32(lpadContextAddr + LPAD_SELECTOR_OFFSET, 1, true); + return URC_HANDLER_FOUND; + }; + } else if (imp.name === "_Unwind_DeleteException") { + (imports.env as Record)[imp.name] = (excPtr: number) => { + // _Unwind_Exception holds a cleanup function pointer at offset 8 + // libc++abi points it at the routine that destroys and frees the + // exception object. + const URC_FOREIGN_EXCEPTION_CAUGHT = 1; + const cleanupIdx = new DataView(sharedMemory!.buffer).getUint32(excPtr + 8, true); + if (cleanupIdx) { + const fn = sharedTable!.get(cleanupIdx); + if (typeof fn === "function") fn(URC_FOREIGN_EXCEPTION_CAUGHT, excPtr); + } + }; + } else { + // Unresolved import: stub it to fail fast when called. + const unresName = imp.name; + console.warn(`[ext] unresolved import '${unresName}' will throw if called`); + (imports.env as Record)[imp.name] = () => { + throw new Error(`[ext] call to unresolved import '${unresName}'`); + }; + } + } + + if (imp.module === "env" && (imp.kind as string) === "tag") { + const params = (imp as { type?: { parameters?: WebAssembly.ValueType[] } }).type + ?.parameters ?? ["i32"]; + const tag = new WebAssembly.Tag({ parameters: params as WebAssembly.ValueType[] }); + (imports.env as Record)[imp.name] = tag; + if (imp.name === "__cpp_exception") { + cppExceptionTag = tag; + } + } + + if ((imp.module === "GOT.func" || imp.module === "GOT.mem") && imp.kind === "global") { + if (!imports[imp.module]) imports[imp.module] = {}; + const hostFn = hostExports[imp.name]; + if (typeof hostFn === "function") { + (imports[imp.module] as Record)[imp.name] = + new WebAssembly.Global({ value: "i32", mutable: true }, canonicalTableIndex(hostFn)); + } else if (imp.module === "GOT.mem" && imp.name === "__wasm_lpad_context") { + (imports[imp.module] as Record)[imp.name] = + new WebAssembly.Global({ value: "i32", mutable: true }, lpadContextAddr); + } else if (extExportNames.has(imp.name) || moduleExportDescs.some((e) => e.name === imp.name)) { + // Defined by the extension itself — resolved after instantiation. + (imports[imp.module] as Record)[imp.name] = + new WebAssembly.Global({ value: "i32", mutable: true }, 0); + } else { + console.warn(`[ext] unresolved ${imp.module} import '${imp.name}' bound to address 0`); + (imports[imp.module] as Record)[imp.name] = + new WebAssembly.Global({ value: "i32", mutable: true }, 0); + } + } + } + + // Async instantiation: Chrome disallows synchronous WebAssembly.Instance + // on the main thread for modules larger than 8MB. + extInstance = await WebAssembly.instantiate(extModule, imports); + + // The element segments just populated [tableBase, tableBase + tableSlots); + // record those indices as the canonical addresses of the extension's + // functions so GOT fixups and dlSym reuse them. + cacheTableRange(tableBase, sharedTable.length); + + // PIC shared modules export __wasm_apply_data_relocs which patches data + // segment entries (vtables, function pointers) using GOT.func/GOT.mem values. + const applyRelocs = extInstance.exports.__wasm_apply_data_relocs as Function | undefined; + if (applyRelocs) { + applyRelocs(); + + // The module's start function (__wasm_apply_global_relocs) initialises + // GOT.mem entries but never GOT.func: a shared library can't assign its + // own table indices, so that's the dynamic linker's job — which, in the + // browser, is us. Resolve the GOT entries the host didn't provide and the + // module left at 0 (its own vtable/function-pointer symbols) from the + // module's exports, then re-run the data relocs so vtable slots get + // patched with the now-correct indices. + let fixedAny = false; + for (const imp of moduleImportDescs) { + if (imp.module === "GOT.func" && imp.kind === "global") { + const g = (imports["GOT.func"] as Record)?.[imp.name]; + if (g && g.value === 0) { + const fn = extInstance.exports[imp.name]; + if (typeof fn === "function") { + g.value = canonicalTableIndex(fn); + fixedAny = true; + } + } + } + if (imp.module === "GOT.mem" && imp.kind === "global") { + const g = (imports["GOT.mem"] as Record)?.[imp.name]; + if (g && g.value === 0) { + const exp = extInstance.exports[imp.name]; + if (exp && typeof exp === "object" && "value" in exp) { + g.value = (exp as WebAssembly.Global).value + memBase; + fixedAny = true; + } + } + } + } + if (fixedAny) { + applyRelocs(); + } + } + + const callCtors = extInstance.exports.__wasm_call_ctors as Function | undefined; + if (callCtors) { + callCtors(); + } + + const extExports: Record = {}; + for (const exp of moduleExportDescs) { + if (exp.kind === "function") { + const fn = extInstance.exports[exp.name]; + extExports[exp.name] = canonicalTableIndex(fn as Function); + } + } + + registry.set(name, { + instance: extInstance, + exports: extExports, + }); +} + +function alignUp(value: number, alignment: number): number { + return Math.ceil(value / alignment) * alignment; +} + +function readLEB128(data: Uint8Array, pos: number): [number, number] { + let val = 0, shift = 0; + while (true) { + const b = data[pos++]; + val |= (b & 0x7f) << shift; + shift += 7; + if (!(b & 0x80)) break; + } + return [val, pos]; +} + +interface DylinkMemInfo { + memorySize: number; + memoryAlign: number; + tableSize: number; + tableAlign: number; +} + +// Parse the WASM_DYLINK_MEM_INFO subsection of the dylink.0 custom section, +// which declares the memory (data + bss) and table sizes a PIC shared module +// needs from the dynamic linker. +function parseDylinkMemInfo(wasm: Uint8Array): DylinkMemInfo | null { + let pos = 8; + while (pos < wasm.length) { + const sid = wasm[pos++]; + let size: number; + [size, pos] = readLEB128(wasm, pos); + const end = pos + size; + if (sid === 0) { + let nlen: number, p: number; + [nlen, p] = readLEB128(wasm, pos); + const sectionName = new TextDecoder().decode(wasm.subarray(p, p + nlen)); + if (sectionName === "dylink.0") { + let q = p + nlen; + while (q < end) { + const sub = wasm[q++]; + let ssize: number; + [ssize, q] = readLEB128(wasm, q); + const send = q + ssize; + if (sub === 1) { + // WASM_DYLINK_MEM_INFO + let memorySize: number, memoryAlign: number, tableSize: number, tableAlign: number; + [memorySize, q] = readLEB128(wasm, q); + [memoryAlign, q] = readLEB128(wasm, q); + [tableSize, q] = readLEB128(wasm, q); + [tableAlign, q] = readLEB128(wasm, q); + return { memorySize, memoryAlign, tableSize, tableAlign }; + } + q = send; + } + return null; + } + } + pos = end; + } + return null; +} + +// Fallback for modules without a dylink.0 section: count the entries of the +// active element segments to size the table reservation. +function countElementSegmentEntries(wasm: Uint8Array): number { + let pos = 8; + let total = 0; + while (pos < wasm.length) { + const sid = wasm[pos++]; + let [size, p] = readLEB128(wasm, pos); + pos = p; + const end = pos + size; + if (sid === 9) { + let [count, p2] = readLEB128(wasm, pos); + pos = p2; + for (let i = 0; i < count; i++) { + const flags = wasm[pos++]; + if (flags !== 0) break; + // Offset expression: (i32.const ) or (global.get ), then end. + const op = wasm[pos++]; + if (op !== 0x41 && op !== 0x23) break; + [, pos] = readLEB128(wasm, pos); + if (wasm[pos++] !== 0x0b) break; + let [numElem, p3] = readLEB128(wasm, pos); + pos = p3; + total += numElem; + for (let j = 0; j < numElem; j++) { + [, pos] = readLEB128(wasm, pos); + } + } + break; + } + pos = end; + } + return total || 256; +} + +function dlOpen(filename: string): number { + lastError = null; + const name = filename.replace(/^.*[\\/]/, "").replace(/\.wasm$/, ""); + if (!registry.has(name)) { + lastError = `Extension '${name}' not installed. Call installExtension() first.`; + return 0; + } + const handle = nextHandle++; + handleMap.set(handle, name); + return handle; +} + +function dlSym(handle: number, symbol: string): number { + lastError = null; + const name = handleMap.get(handle); + if (!name) { + lastError = `Invalid extension handle: ${handle}`; + return 0; + } + const ext = registry.get(name); + if (!ext) { + lastError = `Extension '${name}' not found in registry`; + return 0; + } + const idx = ext.exports[symbol]; + if (idx === undefined) { + lastError = `Symbol '${symbol}' not found in extension '${name}'`; + return 0; + } + return idx; +} + +function dlClose(handle: number): void { + handleMap.delete(handle); +} + +function dlError(): string | null { + return lastError; +} diff --git a/ggsql-wasm/library/src/index.ts b/ggsql-wasm/library/src/index.ts index 3b130354f..a6de6a9fa 100644 --- a/ggsql-wasm/library/src/index.ts +++ b/ggsql-wasm/library/src/index.ts @@ -2,11 +2,16 @@ export { convert_csv } from "./csv"; export { convert_parquet } from "./parquet"; +// Extension loading +export { initExtensionLoader, installExtension } from "./extensions"; + // Types export interface ColumnDescriptor { name: string; type: ColumnType; - values: Float64Array | Uint8Array | string[]; + // "binary" columns carry one Uint8Array per row; all others use the typed + // forms below. + values: Float64Array | Uint8Array | string[] | Uint8Array[]; nulls: Uint8Array; } @@ -16,7 +21,8 @@ export type ColumnType = | "bool" | "date" | "datetime" - | "string"; + | "string" + | "binary"; export const EPOCH = Date.UTC(1970, 0, 1); export const MS_PER_DAY = 86400000; diff --git a/ggsql-wasm/library/src/parquet.ts b/ggsql-wasm/library/src/parquet.ts index 7789161b3..f09763961 100644 --- a/ggsql-wasm/library/src/parquet.ts +++ b/ggsql-wasm/library/src/parquet.ts @@ -22,6 +22,8 @@ export async function convert_parquet( const rows: Record[] = await parquetReadObjects({ file: asyncBuffer, + geoparquet: false, + utf8: false, }); if (rows.length === 0) return []; @@ -41,6 +43,7 @@ function inferColumnType(values: unknown[]): ColumnType { let hasNumber = false; let hasBool = false; let hasDate = false; + let hasBinary = false; let allSafeInt = true; let allMidnight = true; @@ -48,7 +51,9 @@ function inferColumnType(values: unknown[]): ColumnType { const v = values[i]; if (v === null || v === undefined) continue; - if (v instanceof Date) { + if (v instanceof Uint8Array) { + hasBinary = true; + } else if (v instanceof Date) { hasDate = true; if ( v.getUTCHours() !== 0 || @@ -71,6 +76,7 @@ function inferColumnType(values: unknown[]): ColumnType { } } + if (hasBinary) return "binary"; if (hasDate) return allMidnight ? "date" : "datetime"; if (hasBool && !hasNumber) return "bool"; if (hasNumber) return allSafeInt ? "i64" : "f64"; @@ -142,6 +148,21 @@ function buildColumn(name: string, rawValues: unknown[]): ColumnDescriptor { return { name, type, values, nulls }; } + if (type === "binary") { + const values: Uint8Array[] = []; + for (let i = 0; i < len; i++) { + const v = rawValues[i]; + if (v === null || v === undefined) { + values.push(new Uint8Array(0)); + nulls[i] = 0; + } else { + values.push(v as Uint8Array); + nulls[i] = 1; + } + } + return { name, type, values, nulls }; + } + // string const values: string[] = []; for (let i = 0; i < len; i++) { diff --git a/ggsql-wasm/library/tsconfig.json b/ggsql-wasm/library/tsconfig.json index b2699a944..b0842e053 100644 --- a/ggsql-wasm/library/tsconfig.json +++ b/ggsql-wasm/library/tsconfig.json @@ -1,8 +1,8 @@ { "compilerOptions": { - "target": "ES2020", + "target": "ES2022", "module": "ESNext", - "lib": ["ES2020", "DOM"], + "lib": ["ES2022", "DOM"], "moduleResolution": "bundler", "strict": true, "esModuleInterop": true, diff --git a/ggsql-wasm/src/lib.rs b/ggsql-wasm/src/lib.rs index 8fa1b3660..6c7c16c4e 100644 --- a/ggsql-wasm/src/lib.rs +++ b/ggsql-wasm/src/lib.rs @@ -1,5 +1,5 @@ use arrow::array::{ - ArrayRef, BooleanArray, Date32Array, Float64Array, Int64Array, StringArray, + ArrayRef, BinaryArray, BooleanArray, Date32Array, Float64Array, Int64Array, StringArray, TimestampMillisecondArray, }; use ggsql::array_util::value_to_string; @@ -16,16 +16,47 @@ use std::sync::Arc; use wasm_bindgen::prelude::*; // ============================================================================ -// JS bridge declarations — CSV and Parquet parsing only +// JS bridge declarations // ============================================================================ #[wasm_bindgen(module = "/library/dist/lib.js")] extern "C" { - #[wasm_bindgen(catch)] - async fn convert_parquet(data: &[u8]) -> Result; + #[wasm_bindgen(catch, js_name = convert_parquet)] + async fn convert_parquet_js(data: &[u8]) -> Result; - #[wasm_bindgen(catch)] - fn convert_csv(data: &[u8]) -> Result; + #[wasm_bindgen(catch, js_name = convert_csv)] + fn convert_csv_js(data: &[u8]) -> Result; + + #[wasm_bindgen(catch, js_name = initExtensionLoader)] + fn init_extension_loader_js(exports: &JsValue) -> Result<(), JsValue>; + + #[wasm_bindgen(catch, js_name = installExtension)] + async fn install_extension_js(name: &str, source: JsValue) -> Result; +} + +// ============================================================================ +// Package exports — forward to the JS helpers above +// ============================================================================ + +#[wasm_bindgen(js_name = convert_csv)] +pub fn convert_csv_export(data: &[u8]) -> Result { + convert_csv_js(data) +} + +#[wasm_bindgen(js_name = convert_parquet)] +pub async fn convert_parquet_export(data: &[u8]) -> Result { + convert_parquet_js(data).await +} + +#[wasm_bindgen(js_name = initExtensionLoader)] +pub fn init_extension_loader(exports: JsValue) -> Result<(), JsValue> { + init_extension_loader_js(&exports) +} + +#[wasm_bindgen(js_name = installExtension)] +pub async fn install_extension(name: String, source: JsValue) -> Result<(), JsValue> { + install_extension_js(&name, source).await?; + Ok(()) } // ============================================================================ @@ -117,6 +148,21 @@ fn columns_js_to_dataframe(columns_js: JsValue) -> Result { .collect(); Arc::new(StringArray::from(values)) } + "binary" => { + // One Uint8Array per row (e.g. WKB geometry from GeoParquet). + let arr = js_sys::Array::from(&values_js); + let values: Vec>> = (0..arr.length()) + .zip(nulls.iter()) + .map(|(j, &n)| { + if n != 0 { + Some(js_sys::Uint8Array::new(&arr.get(j)).to_vec()) + } else { + None + } + }) + .collect(); + Arc::new(BinaryArray::from_iter(values.iter().map(|o| o.as_deref()))) + } "date" => { // Date32: days since Unix epoch let raw = js_sys::Float64Array::new(&values_js).to_vec(); @@ -251,7 +297,7 @@ impl GgsqlContext { /// Register a CSV file as a table from raw bytes pub fn register_csv(&self, name: &str, data: &[u8]) -> Result<(), JsValue> { - let columns_js = convert_csv(data) + let columns_js = convert_csv_js(data) .map_err(|e| JsValue::from_str(&format!("CSV parse error: {:?}", e)))?; let df = columns_js_to_dataframe(columns_js)?; let reader = self.reader.borrow(); @@ -262,7 +308,7 @@ impl GgsqlContext { /// Register a Parquet file as a table from raw bytes pub async fn register_parquet(&self, name: &str, data: &[u8]) -> Result<(), JsValue> { - let columns_js = convert_parquet(data) + let columns_js = convert_parquet_js(data) .await .map_err(|e| JsValue::from_str(&format!("Parquet parse error: {:?}", e)))?; let df = columns_js_to_dataframe(columns_js)?; @@ -277,7 +323,7 @@ impl GgsqlContext { for &name in ggsql::reader::data::KNOWN_DATASETS { if let Some(bytes) = ggsql::reader::data::builtin_parquet_bytes(name) { let table_name = ggsql::naming::builtin_data_table(name); - let columns_js = convert_parquet(bytes).await.map_err(|e| { + let columns_js = convert_parquet_js(bytes).await.map_err(|e| { JsValue::from_str(&format!("Parquet error for '{}': {:?}", name, e)) })?; let df = columns_js_to_dataframe(columns_js)?; @@ -290,6 +336,22 @@ impl GgsqlContext { Ok(()) } + /// Load a previously installed SQLite extension. + /// + /// `entry_point` is the C init function name. If omitted, SQLite + /// derives it from the extension name. + pub fn load_extension(&self, name: &str, entry_point: Option) -> Result<(), JsValue> { + let reader = self.reader.borrow(); + let conn = reader.connection(); + unsafe { + conn.load_extension_enable() + .map_err(|e| JsValue::from_str(&format!("Enable load_extension error: {:?}", e)))?; + conn.load_extension(name, entry_point.as_deref()) + .map_err(|e| JsValue::from_str(&format!("Load extension error: {:?}", e)))?; + } + Ok(()) + } + /// Unregister a table pub fn unregister(&self, name: &str) -> Result<(), JsValue> { let reader = self.reader.borrow(); diff --git a/src/plot/projection/coord/map.rs b/src/plot/projection/coord/map.rs index d27b24e37..44344bdd7 100644 --- a/src/plot/projection/coord/map.rs +++ b/src/plot/projection/coord/map.rs @@ -258,15 +258,11 @@ impl BBox { dialect: &dyn SqlDialect, execute_query: &dyn Fn(&str) -> crate::Result, ) -> Option { - let envelope = format!( - "ST_MakeEnvelope({}, {}, {}, {})", - self.xmin, self.ymin, self.xmax, self.ymax - ); + let envelope = dialect.sql_make_envelope(self.xmin, self.ymin, self.xmax, self.ymax); let transformed = dialect.sql_st_transform(&envelope, &self.crs, target_crs); - let sql = format!( - "SELECT ST_XMin(g) AS xmin, ST_YMin(g) AS ymin, \ - ST_XMax(g) AS xmax, ST_YMax(g) AS ymax \ - FROM (SELECT {transformed} AS g)" + let sql = dialect.sql_geometry_bbox( + "g", + &format!("(SELECT {transformed} AS g) AS \"__ggsql_bbox__\""), ); execute_query(&sql) .ok() @@ -374,10 +370,9 @@ fn graticule_bbox( // degenerate or incomplete values. Use the clip boundary extent which // correctly represents the visible hemisphere. if let Some(wkt) = clip_boundary_wkt { - let sql = format!( - "SELECT ST_XMin(g) AS xmin, ST_YMin(g) AS ymin, \ - ST_XMax(g) AS xmax, ST_YMax(g) AS ymax \ - FROM (SELECT ST_GeomFromText('{wkt}') AS g)" + let sql = dialect.sql_geometry_bbox( + "g", + &format!("(SELECT ST_GeomFromText('{wkt}') AS g) AS \"__ggsql_bbox__\""), ); if let Ok(df) = execute_query(&sql) { if let Some(clip_bbox) = BBox::from_df(&df, "EPSG:4326") { diff --git a/src/reader/mod.rs b/src/reader/mod.rs index 3c108c5c5..a6609d0c8 100644 --- a/src/reader/mod.rs +++ b/src/reader/mod.rs @@ -201,6 +201,14 @@ pub trait SqlDialect { ) } + /// SQL expression building a rectangular polygon from corner coordinates. + /// + /// Default uses the PostGIS-style `ST_MakeEnvelope`. Override for backends + /// with different function names (e.g. SpatiaLite uses `BuildMbr`). + fn sql_make_envelope(&self, xmin: f64, ymin: f64, xmax: f64, ymax: f64) -> String { + format!("ST_MakeEnvelope({xmin}, {ymin}, {xmax}, {ymax})") + } + /// SQL statements to run before spatial operations. /// /// Override for backends that need an extension loaded (e.g. DuckDB spatial). diff --git a/src/reader/sqlite.rs b/src/reader/sqlite.rs index 301948dce..8f1e1f9f6 100644 --- a/src/reader/sqlite.rs +++ b/src/reader/sqlite.rs @@ -101,6 +101,14 @@ impl super::SqlDialect for SqliteDialect { } } + fn sql_make_envelope(&self, xmin: f64, ymin: f64, xmax: f64, ymax: f64) -> String { + format!("BuildMbr({xmin}, {ymin}, {xmax}, {ymax})") + } + + fn sql_ensure_geometry(&self, column: &str) -> String { + format!("COALESCE(GeomFromWKB({column}, 4326), {column})") + } + fn sql_geometry_bbox(&self, column: &str, from: &str) -> String { format!( "SELECT MIN(MbrMinX({column})) AS xmin, MIN(MbrMinY({column})) AS ymin, \ @@ -213,26 +221,6 @@ impl Default for SqliteReader { } } -/// Validate a table name -fn validate_table_name(name: &str) -> Result<()> { - if name.is_empty() { - return Err(GgsqlError::ReaderError("Table name cannot be empty".into())); - } - - let forbidden = ['\0', '\n', '\r']; - for ch in forbidden { - if name.contains(ch) { - return Err(GgsqlError::ReaderError(format!( - "Table name '{}' contains invalid character '{}'", - name, - ch.escape_default() - ))); - } - } - - Ok(()) -} - /// Map an Arrow DataType to a SQLite column type string fn arrow_type_to_sqlite(dtype: &DataType) -> &'static str { match dtype { @@ -249,6 +237,7 @@ fn arrow_type_to_sqlite(dtype: &DataType) -> &'static str { DataType::Date32 => "TEXT", DataType::Timestamp(_, _) => "TEXT", DataType::Time64(_) => "TEXT", + DataType::Binary | DataType::LargeBinary => "BLOB", _ => "TEXT", } } @@ -352,6 +341,14 @@ fn array_value_to_sqlite(array: &ArrayRef, row_idx: usize) -> rusqlite::types::V .and_then(|t| to_sql_value(&t)) .unwrap_or(Value::Null) } + DataType::Binary => { + let arr = array.as_any().downcast_ref::().unwrap(); + Value::Blob(arr.value(row_idx).to_vec()) + } + DataType::LargeBinary => { + let arr = array.as_any().downcast_ref::().unwrap(); + Value::Blob(arr.value(row_idx).to_vec()) + } _ => { // Fallback: use array_util::value_to_string Value::Text(crate::array_util::value_to_string(array, row_idx)) @@ -445,7 +442,7 @@ impl Reader for SqliteReader { } fn register(&self, name: &str, df: DataFrame, replace: bool) -> Result<()> { - validate_table_name(name)?; + super::validate_table_name(name)?; if self.table_exists(name) { if replace { @@ -710,6 +707,19 @@ fn sqlite_values_to_array(name: &str, values: Vec) -> Re } } + // A pure BLOB column (e.g. WKB geometry) maps to Arrow Binary so geometry + // auto-detection and spatial layers receive raw bytes, not a debug string. + if has_blob && !has_text && !has_int && !has_real { + let vals: Vec>> = values + .into_iter() + .map(|v| match v { + Value::Blob(b) => Some(b), + _ => None, + }) + .collect(); + return Ok(Arc::new(BinaryArray::from_iter(vals.iter().map(|o| o.as_deref()))) as ArrayRef); + } + if has_text || has_blob { let vals: Vec> = values .into_iter() @@ -1092,6 +1102,43 @@ mod tests { // Should fall back to String since we have mixed types } + #[test] + fn test_binary_column_stored_as_blob() { + let reader = SqliteReader::new().unwrap(); + + // Arrow Binary must reach SQLite as a BLOB (not stringified), so spatial + // functions like GeomFromWKB receive raw bytes. + let blobs: ArrayRef = Arc::new(BinaryArray::from(vec![ + Some([0x01u8, 0x02, 0x03].as_slice()), + Some([0xDE, 0xAD, 0xBE, 0xEF].as_slice()), + None, + ])); + let df = DataFrame::new(vec![("b", blobs)]).unwrap(); + reader.register("blob_data", df, false).unwrap(); + + let result = reader + .execute_sql("SELECT typeof(b) AS t, hex(b) AS h FROM blob_data ORDER BY rowid") + .unwrap(); + assert_eq!(result.height(), 3); + let t = result.column("t").unwrap(); + let h = result.column("h").unwrap(); + assert_eq!(crate::array_util::value_to_string(t, 0), "blob"); + assert_eq!(crate::array_util::value_to_string(h, 0), "010203"); + assert_eq!(crate::array_util::value_to_string(h, 1), "DEADBEEF"); + assert_eq!(crate::array_util::value_to_string(t, 2), "null"); + + // Reading a BLOB column back yields Arrow Binary. + let back = reader + .execute_sql("SELECT b FROM blob_data ORDER BY rowid") + .unwrap(); + assert_eq!(back.column_dtype("b").unwrap(), DataType::Binary); + let col = back.column("b").unwrap(); + let arr = col.as_any().downcast_ref::().unwrap(); + assert_eq!(arr.value(0), &[0x01u8, 0x02, 0x03]); + assert_eq!(arr.value(1), &[0xDE, 0xAD, 0xBE, 0xEF]); + assert!(arr.is_null(2)); + } + #[test] fn test_date_column_roundtrip() { let reader = SqliteReader::new().unwrap(); diff --git a/tree-sitter-ggsql/bindings/rust/build.rs b/tree-sitter-ggsql/bindings/rust/build.rs index 3b755f68c..d9e1d491d 100644 --- a/tree-sitter-ggsql/bindings/rust/build.rs +++ b/tree-sitter-ggsql/bindings/rust/build.rs @@ -137,7 +137,6 @@ fn main() { .include(&src_dir) .opt_level_str(opt_level) .file(sysroot_dir.join("src").join("stdio.c")) - .file(sysroot_dir.join("src").join("stdlib.c")) .file(sysroot_dir.join("src").join("string.c")) .file(sysroot_dir.join("src").join("wctype.c")) .compile("stdlib"); diff --git a/tree-sitter-ggsql/bindings/rust/lib.rs b/tree-sitter-ggsql/bindings/rust/lib.rs index 41cd36f96..8eed77ea7 100644 --- a/tree-sitter-ggsql/bindings/rust/lib.rs +++ b/tree-sitter-ggsql/bindings/rust/lib.rs @@ -19,6 +19,89 @@ pub fn language() -> Language { /// The node types and field names used by the ggsql grammar pub const NODE_TYPES: &str = include_str!("../../src/node-types.json"); +/// The C libc allocator for wasm32-unknown-unknown builds. +/// +/// The C code linked into the module (the generated parser and the +/// tree-sitter runtime) has no libc, so `malloc` and friends are defined +/// here on the Rust global allocator and the whole module shares one heap. +/// Each allocation carries a header recording its size, so the `Layout` +/// can be reconstructed on free. +#[cfg(all(target_arch = "wasm32", target_os = "unknown"))] +mod wasm_alloc { + use core::ptr::{null_mut, write_bytes}; + use std::alloc::{alloc, dealloc, realloc as rust_realloc, Layout}; + + const HEADER: usize = core::mem::size_of::() * 2; + + #[no_mangle] + unsafe extern "C" fn malloc(size: usize) -> *mut u8 { + let Some(total) = size.checked_add(HEADER) else { + return null_mut(); + }; + let Ok(layout) = Layout::from_size_align(total, HEADER) else { + return null_mut(); + }; + let ptr = alloc(layout); + if ptr.is_null() { + return null_mut(); + } + *ptr.cast::() = size; + ptr.add(HEADER) + } + + #[no_mangle] + unsafe extern "C" fn free(ptr: *mut u8) { + if ptr.is_null() { + return; + } + let base = ptr.sub(HEADER); + let size = *base.cast::(); + dealloc( + base, + Layout::from_size_align_unchecked(size + HEADER, HEADER), + ); + } + + #[no_mangle] + unsafe extern "C" fn realloc(ptr: *mut u8, new_size: usize) -> *mut u8 { + if ptr.is_null() { + return malloc(new_size); + } + let Some(new_total) = new_size.checked_add(HEADER) else { + return null_mut(); + }; + if Layout::from_size_align(new_total, HEADER).is_err() { + return null_mut(); + } + let base = ptr.sub(HEADER); + let size = *base.cast::(); + let layout = Layout::from_size_align_unchecked(size + HEADER, HEADER); + let new = rust_realloc(base, layout, new_total); + if new.is_null() { + return null_mut(); + } + *new.cast::() = new_size; + new.add(HEADER) + } + + #[no_mangle] + unsafe extern "C" fn calloc(count: usize, size: usize) -> *mut u8 { + let Some(total) = count.checked_mul(size) else { + return null_mut(); + }; + let ptr = malloc(total); + if !ptr.is_null() { + write_bytes(ptr, 0, total); + } + ptr + } + + #[no_mangle] + unsafe extern "C" fn abort() -> ! { + std::process::abort() + } +} + /// The highlighting queries for ggsql syntax pub const HIGHLIGHTS_QUERY: &str = include_str!("../../queries/highlights.scm"); diff --git a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdio.c b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdio.c index e9031a0b8..57758debe 100644 --- a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdio.c +++ b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdio.c @@ -106,6 +106,7 @@ static int ptr_to_str(void *ptr, char *buffer) { return 2 + len; } +__attribute__((weak)) char *strncpy(char *dest, const char *src, size_t n) { char *d = dest; const char *s = src; @@ -259,6 +260,7 @@ static int vsnprintf_impl(char *buffer, size_t buffsz, const char *format, va_li return total_chars; } +__attribute__((weak)) int snprintf(char *restrict buffer, size_t buffsz, const char *restrict format, ...) { if (!buffer || buffsz == 0 || !format) return -1; @@ -270,38 +272,45 @@ int snprintf(char *restrict buffer, size_t buffsz, const char *restrict format, return result; } +__attribute__((weak)) int vsnprintf(char *restrict buffer, size_t buffsz, const char *restrict format, va_list vlist) { return vsnprintf_impl(buffer, buffsz, format, vlist); } +__attribute__((weak)) int fclose(FILE *stream) { (void)stream; return 0; } +__attribute__((weak)) FILE* fdopen(int fd, const char *mode) { (void)fd; (void)mode; return 0; } +__attribute__((weak)) int fputc(int c, FILE *stream) { (void)stream; return c; } +__attribute__((weak)) int fputs(const char *restrict str, FILE *restrict stream) { (void)str; (void)stream; return 0; } +__attribute__((weak)) size_t fwrite(const void *restrict buffer, size_t size, size_t nmemb, FILE *restrict stream) { (void)buffer; (void)stream; return size * nmemb; } +__attribute__((weak)) int fprintf(FILE *restrict stream, const char *restrict format, ...) { (void)stream; (void)format; diff --git a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c deleted file mode 100644 index 0a4510735..000000000 --- a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/stdlib.c +++ /dev/null @@ -1,163 +0,0 @@ -// This file implements a very simple allocator for external scanners running -// in Wasm. Allocation is just bumping a static pointer and growing the heap -// as needed, and freeing is just adding the freed region to a free list. -// When additional memory is allocated, the free list is searched first. -// If there is not a suitable region in the free list, the heap is -// grown as necessary, and the allocation is made at the end of the heap. -// When the heap is reset, all allocated memory is considered freed. - -#include -#include -#include - -extern void tree_sitter_debug_message(const char *, size_t); - -#define PAGESIZE 0x10000 -#define MAX_HEAP_SIZE (1024 * 1024 * 1024) - -typedef struct Region { - size_t size; - struct Region *next; - char data[0]; -} Region; - -static Region *heap_end = NULL; -static Region *heap_start = NULL; -static Region *next = NULL; -static Region *free_list = NULL; - -// Get the region metadata for the given heap pointer. -static inline Region *region_for_ptr(void *ptr) { - return ((Region *)ptr) - 1; -} - -// Get the location of the next region after the given region, -// if the given region had the given size. -static inline Region *region_after(Region *self, size_t len) { - char *address = self->data + len; - char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3); - return (Region *)aligned; -} - -static void *get_heap_end() { - return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE); -} - -static int grow_heap(size_t size) { - size_t new_page_count = ((size - 1) / PAGESIZE) + 1; - return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX; -} - -// Grows the heap if necessary to fit a region at the _end_ of the heap -// ending at `region_end` by `size` bytes. -// -// Returns 0 if the heap could not be grown, 1 otherwise. -static inline int grow_heap_for_region(Region *region_end, size_t size) { - if (region_end > heap_end) { - if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) return 0; - if (!grow_heap(size)) return 0; - heap_end = get_heap_end(); - } - return 1; -} - -// Clear out the heap, and move it to the given address. -void reset_heap(void *new_heap_start) { - heap_start = new_heap_start; - next = new_heap_start; - heap_end = get_heap_end(); - free_list = NULL; -} - -void *malloc(size_t size) { - if (size == 0) return NULL; - - Region *prev = NULL; - Region *curr = free_list; - while (curr != NULL) { - if (curr->size >= size) { - if (prev == NULL) { - free_list = curr->next; - } else { - prev->next = curr->next; - } - return &curr->data; - } - prev = curr; - curr = curr->next; - } - - Region *region_end = region_after(next, size); - - if (!grow_heap_for_region(region_end, size)) return NULL; - - void *result = &next->data; - next->size = size; - next = region_end; - - return result; -} - -void free(void *ptr) { - if (ptr == NULL) return; - - Region *region = region_for_ptr(ptr); - Region *region_end = region_after(region, region->size); - - // When freeing the last allocated pointer, re-use that - // pointer for the next allocation. - if (region_end == next) { - next = region; - } else { - region->next = free_list; - free_list = region; - } -} - -void *calloc(size_t count, size_t size) { - void *result = malloc(count * size); - if (!result) return NULL; - memset(result, 0, count * size); - return result; -} - -void *realloc(void *ptr, size_t new_size) { - if (ptr == NULL) { - return malloc(new_size); - } - if (new_size == 0) { - free(ptr); - return NULL; - } - - - Region *region = region_for_ptr(ptr); - Region *region_end = region_after(region, region->size); - - // When reallocating the last allocated region, resize - // in place if possible, return the same pointer, and - // skip copying the data. - if (region_end == next) { - Region *new_region_end = region_after(region, new_size); - - size_t additional_size = (char *)new_region_end - (char *)heap_end; - if (!grow_heap_for_region(new_region_end, additional_size)) return NULL; - - region->size = new_size; - next = new_region_end; - return ®ion->data; - } - - void *result = malloc(new_size); - if (!result) return NULL; - - size_t copy_size = region->size < new_size ? region->size : new_size; - memcpy(result, ®ion->data, copy_size); - - free(ptr); - return result; -} - -__attribute__((noreturn)) void abort(void) { - __builtin_trap(); -} diff --git a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/string.c b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/string.c index 3f1b9a0fa..1a79a39b7 100644 --- a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/string.c +++ b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/string.c @@ -1,6 +1,7 @@ #include // Derived from musl (MIT): https://git.musl-libc.org/cgit/musl/tree/src/string/memchr.c +__attribute__((weak)) void *memchr(const void *src, int c, size_t n) { const unsigned char *s = src; c = (unsigned char)c; @@ -8,6 +9,7 @@ void *memchr(const void *src, int c, size_t n) { return n ? (void *)s : 0; } +__attribute__((weak)) int memcmp(const void *lhs, const void *rhs, size_t count) { const unsigned char *l = lhs; const unsigned char *r = rhs; @@ -21,6 +23,7 @@ int memcmp(const void *lhs, const void *rhs, size_t count) { return 0; } +__attribute__((weak)) void *memcpy(void *restrict dst, const void *restrict src, size_t size) { unsigned char *d = dst; const unsigned char *s = src; @@ -30,6 +33,7 @@ void *memcpy(void *restrict dst, const void *restrict src, size_t size) { return dst; } +__attribute__((weak)) void *memmove(void *dst, const void *src, size_t count) { unsigned char *d = dst; const unsigned char *s = src; @@ -47,6 +51,7 @@ void *memmove(void *dst, const void *src, size_t count) { return dst; } +__attribute__((weak)) void *memset(void *dst, int value, size_t count) { unsigned char *p = dst; while (count--) { @@ -55,6 +60,7 @@ void *memset(void *dst, int value, size_t count) { return dst; } +__attribute__((weak)) char *strchr(const char *str, int c) { while (*str != (char)c) { if (*str == '\0') { @@ -65,12 +71,14 @@ char *strchr(const char *str, int c) { return (char *)str; } +__attribute__((weak)) size_t strlen(const char *str) { const char *s = str; while (*s) s++; return s - str; } +__attribute__((weak)) int strncmp(const char *left, const char *right, size_t n) { while (n-- > 0) { if (*left != *right) { diff --git a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/wctype.c b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/wctype.c index 4bcc276f7..6959ac98b 100644 --- a/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/wctype.c +++ b/tree-sitter-ggsql/bindings/rust/wasm-sysroot/src/wctype.c @@ -1,13 +1,16 @@ #include +__attribute__((weak)) int iswlower(wint_t wch) { return (unsigned)wch - L'a' < 26; } +__attribute__((weak)) int iswupper(wint_t wch) { return (unsigned)wch - L'A' < 26; } +__attribute__((weak)) int iswpunct(wint_t wch) { return (wch >= 33 && wch <= 47) || (wch >= 58 && wch <= 64) ||