From bd6d2910dcc1d31242262b272492100c82be9038 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 4 Mar 2026 00:34:42 -0700 Subject: [PATCH 1/2] =?UTF-8?q?perf:=20use=20single=20transaction=20for=20?= =?UTF-8?q?AST=20node=20insertion=20(3.6s=20=E2=86=92=20350ms)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit buildAstNodes was calling tx(rows) per-file inside the loop, creating 172 separate SQLite transactions each with a BEGIN/COMMIT/fsync cycle. All other phases (CFG, dataflow, complexity) correctly used a single wrapping transaction. Collect all rows across files first, then insert in one tx(allRows) call. Benchmarked: astMs drops from ~3600ms to ~350ms (native) and ~547ms (WASM). Native per-file build time: 24.9 → 8.5 ms/file. Impact: 1 functions changed, 12 affected --- src/ast.js | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/src/ast.js b/src/ast.js index c88d73a..59acdf0 100644 --- a/src/ast.js +++ b/src/ast.js @@ -165,13 +165,12 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { } }); - let totalInserted = 0; + const allRows = []; for (const [relPath, symbols] of fileSymbols) { - const rows = []; const defs = symbols.definitions || []; - // Pre-load all node IDs for this file into a map + // Pre-load all node IDs for this file into a map (read-only, fast) const nodeIdMap = new Map(); for (const row of bulkGetNodeIds.all(relPath)) { nodeIdMap.set(`${row.name}|${row.kind}|${row.line}`, row.id); @@ -186,7 +185,7 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { parentNodeId = nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null; } - rows.push({ + allRows.push({ file: relPath, line: call.line, kind: 'call', @@ -205,7 +204,7 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { // WASM path: walk the tree-sitter AST const astRows = []; walkAst(symbols._tree.rootNode, defs, relPath, astRows, nodeIdMap); - rows.push(...astRows); + allRows.push(...astRows); } else if (symbols.astNodes?.length) { // Native path: use pre-extracted AST nodes from Rust for (const n of symbols.astNodes) { @@ -215,7 +214,7 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { parentNodeId = nodeIdMap.get(`${parentDef.name}|${parentDef.kind}|${parentDef.line}`) || null; } - rows.push({ + allRows.push({ file: relPath, line: n.line, kind: n.kind, @@ -227,14 +226,13 @@ export async function buildAstNodes(db, fileSymbols, _rootDir, _engineOpts) { } } } + } - if (rows.length > 0) { - tx(rows); - totalInserted += rows.length; - } + if (allRows.length > 0) { + tx(allRows); } - debug(`AST extraction: ${totalInserted} nodes stored`); + debug(`AST extraction: ${allRows.length} nodes stored`); } /** From ef23a817136114fdf1af5415877491c40f94b296 Mon Sep 17 00:00:00 2001 From: carlos-alm <127798846+carlos-alm@users.noreply.github.com> Date: Wed, 4 Mar 2026 00:45:10 -0700 Subject: [PATCH 2/2] chore: bump version to 3.0.3 --- CHANGELOG.md | 6 ++++++ crates/codegraph-core/Cargo.toml | 2 +- package-lock.json | 4 ++-- package.json | 2 +- 4 files changed, 10 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fd9c0e0..4573de3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,12 @@ All notable changes to this project will be documented in this file. See [commit-and-tag-version](https://github.com/absolute-version/commit-and-tag-version) for commit guidelines. +## [3.0.3](https://github.com/optave/codegraph/compare/v3.0.2...v3.0.3) (2026-03-04) + +### Performance + +* **ast:** use single transaction for AST node insertion — astMs drops from ~3600ms to ~350ms (native) and ~547ms (WASM), reducing overall native build from 24.9 to 8.5 ms/file ([#333](https://github.com/optave/codegraph/pull/333)) + ## [3.0.2](https://github.com/optave/codegraph/compare/v3.0.1...v3.0.2) (2026-03-04) **Dataflow goes multi-language, build performance recovery, and native engine parity fixes.** This patch extends dataflow analysis from JS/TS-only to all 11 supported languages, recovers build performance lost after CFG/dataflow became default-on, fixes language-aware identifier collection in dataflow, and closes a native engine scoping bug for constants. diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index d58c746..2f0a110 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "codegraph-core" -version = "3.0.2" +version = "3.0.3" edition = "2021" license = "Apache-2.0" diff --git a/package-lock.json b/package-lock.json index d1081c5..fe1e288 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@optave/codegraph", - "version": "3.0.2", + "version": "3.0.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@optave/codegraph", - "version": "3.0.2", + "version": "3.0.3", "license": "Apache-2.0", "dependencies": { "better-sqlite3": "^12.6.2", diff --git a/package.json b/package.json index 80562f6..144220d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@optave/codegraph", - "version": "3.0.2", + "version": "3.0.3", "description": "Local code graph CLI — parse codebases with tree-sitter, build dependency graphs, query them", "type": "module", "main": "src/index.js",