diff --git a/CHANGELOG.md b/CHANGELOG.md index c9703a785..1701a5d9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,10 @@ and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## [Unreleased] +### New Features + +- **CodeGraph now indexes Elixir** (`.ex` / `.exs`) — modules (including nested), `def`/`defp`/`defmacro`/`defmacrop`/`defguard`/`defdelegate` with public/private visibility, multi-clause functions folded into one symbol, `alias`/`import`/`require`/`use` dependencies (including multi-alias `alias A.{B, C}` expansion), `defprotocol`/`defimpl` (with `implements` edges), `defstruct`/`defexception`, and call edges (qualified `Mod.fun` and local calls, including inside `if`/`case`/`with`/pipe bodies). Because tree-sitter-elixir parses every construct as a generic `call`, extraction dispatches on the macro identifier rather than node types; Phoenix/OTP codebases get the full explore / impact / callers surface. + ## [1.0.0] - 2026-06-12 diff --git a/README.md b/README.md index 09f1f4209..5674b0d51 100644 --- a/README.md +++ b/README.md @@ -236,7 +236,7 @@ CodeGraph cuts **tokens, tool calls, and wall-clock time on every repo** — acr | **Full-Text Search** | Find code by name instantly across your entire codebase, powered by FTS5 | | **Impact Analysis** | Trace callers, callees, and the full impact radius of any symbol before making changes | | **Always Fresh** | File watcher uses native OS events (FSEvents/inotify/ReadDirectoryChangesW) with debounced auto-sync — the graph stays current as you code, zero config | -| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Scala, Dart, Lua, Luau, R, Svelte, Vue, Astro, Liquid, Pascal/Delphi | +| **20+ Languages** | TypeScript, JavaScript, Python, Go, Rust, Java, C#, PHP, Ruby, C, C++, Objective-C, Swift, Kotlin, Scala, Dart, Lua, Luau, R, Elixir, Svelte, Vue, Astro, Liquid, Pascal/Delphi | | **Framework-aware Routes** | Recognizes web-framework routing files and links URL patterns to their handlers across 17 frameworks | | **Mixed iOS / React Native / Expo** | Closes cross-language flows that static parsing misses: Swift ↔ ObjC bridging, React Native legacy bridge + TurboModules + Fabric view components, native → JS event emitters, Expo Modules | | **100% Local** | No data leaves your machine. No API keys. No external services. SQLite database only | @@ -673,6 +673,7 @@ is written): | Lua | `.lua` | Full support (functions, methods with receivers, local variables, `require` imports, call edges) | | R | `.R` `.r` | Full support (functions in every assignment form, S4/R5/R6 classes with methods, `library`/`require` imports, `source()` file references, call edges) | | Luau | `.luau` | Full support (everything in Lua, plus `type`/`export type` aliases, typed signatures, and Roblox instance-path `require`) | +| Elixir | `.ex`, `.exs` | Full support (modules incl. nested, `def`/`defp`/`defmacro`/`defguard`/`defdelegate` with visibility, multi-clause folding, `alias`/`import`/`require`/`use` deps incl. multi-alias `A.{B, C}`, `defprotocol`/`defimpl` with implements edges, `defstruct`/`defexception`, call edges) | ## Measured cross-file coverage diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index df825f529..4d90cec25 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -7161,3 +7161,207 @@ GeomPoint <- ggproto("GeomPoint", Geom, }); }); }); + +describe('Elixir Extraction', () => { + describe('Language detection', () => { + it('should detect .ex and .exs files', () => { + expect(detectLanguage('lib/my_app/accounts.ex')).toBe('elixir'); + expect(detectLanguage('test/accounts_test.exs')).toBe('elixir'); + }); + }); + + describe('Module extraction', () => { + it('should extract a defmodule as a module node', () => { + const code = ` +defmodule MyApp.Accounts do + @moduledoc "Accounts context" +end +`; + const result = extractFromSource('lib/accounts.ex', code); + const mod = result.nodes.find((n) => n.kind === 'module'); + expect(mod).toMatchObject({ kind: 'module', name: 'MyApp.Accounts', language: 'elixir' }); + }); + + it('should extract nested modules', () => { + const code = ` +defmodule A do + defmodule B do + end +end +`; + const result = extractFromSource('lib/a.ex', code); + const names = result.nodes.filter((n) => n.kind === 'module').map((n) => n.name); + expect(names).toContain('A'); + expect(names).toContain('B'); + }); + }); + + describe('Function extraction', () => { + it('should extract def as a public function', () => { + const code = ` +defmodule M do + def get_user(id), do: id +end +`; + const result = extractFromSource('lib/m.ex', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'get_user'); + expect(fn).toBeDefined(); + expect(fn?.visibility).toBe('public'); + }); + + it('should extract defp as a private function', () => { + const code = ` +defmodule M do + defp helper(x), do: x +end +`; + const result = extractFromSource('lib/m.ex', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'helper'); + expect(fn).toBeDefined(); + expect(fn?.visibility).toBe('private'); + }); + + it('should extract defmacro', () => { + const code = ` +defmodule M do + defmacro mymacro(x) do + quote do: unquote(x) + end +end +`; + const result = extractFromSource('lib/m.ex', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'mymacro'); + expect(fn).toBeDefined(); + }); + + it('should extract a function with a guard', () => { + const code = ` +defmodule M do + def bar(x) when is_integer(x), do: x +end +`; + const result = extractFromSource('lib/m.ex', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'bar'); + expect(fn).toBeDefined(); + }); + + it('should dedupe multiple clauses of the same name/arity into one node', () => { + const code = ` +defmodule M do + def get(id) when is_integer(id), do: id + def get(_), do: nil +end +`; + const result = extractFromSource('lib/m.ex', code); + const gets = result.nodes.filter((n) => n.kind === 'function' && n.name === 'get'); + expect(gets.length).toBe(1); + }); + }); + + describe('Imports / dependencies', () => { + it('should extract alias', () => { + const code = ` +defmodule M do + alias MyApp.Repo +end +`; + const result = extractFromSource('lib/m.ex', code); + const imp = result.nodes.find((n) => n.kind === 'import'); + expect(imp?.name).toBe('MyApp.Repo'); + }); + + it('should expand multi-alias into one import each', () => { + const code = ` +defmodule M do + alias MyApp.Accounts.{User, Profile} +end +`; + const result = extractFromSource('lib/m.ex', code); + const names = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(names).toContain('MyApp.Accounts.User'); + expect(names).toContain('MyApp.Accounts.Profile'); + }); + + it('should extract import, require, and use', () => { + const code = ` +defmodule M do + import Ecto.Query + require Logger + use GenServer +end +`; + const result = extractFromSource('lib/m.ex', code); + const names = result.nodes.filter((n) => n.kind === 'import').map((n) => n.name); + expect(names).toContain('Ecto.Query'); + expect(names).toContain('Logger'); + expect(names).toContain('GenServer'); + }); + }); + + describe('Structural macros', () => { + it('should extract defprotocol as an interface and its callbacks', () => { + const code = ` +defprotocol Sizeable do + def size(data) +end +`; + const result = extractFromSource('lib/sizeable.ex', code); + const proto = result.nodes.find((n) => n.kind === 'interface' && n.name === 'Sizeable'); + expect(proto).toBeDefined(); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'size'); + expect(fn).toBeDefined(); + }); + + it('should extract defstruct as a struct node', () => { + const code = ` +defmodule User do + defstruct [:id, :name] +end +`; + const result = extractFromSource('lib/user.ex', code); + const st = result.nodes.find((n) => n.kind === 'struct'); + expect(st).toBeDefined(); + }); + + it('should extract defimpl with an implements reference', () => { + const code = ` +defimpl Sizeable, for: List do + def size(list), do: length(list) +end +`; + const result = extractFromSource('lib/sizeable_list.ex', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'size'); + expect(fn).toBeDefined(); + const impl = result.unresolvedReferences.find( + (r) => r.referenceKind === 'implements' && r.referenceName === 'Sizeable' + ); + expect(impl).toBeDefined(); + }); + + it('should extract defdelegate as a function', () => { + const code = ` +defmodule M do + defdelegate len(list), to: List, as: :length +end +`; + const result = extractFromSource('lib/m.ex', code); + const fn = result.nodes.find((n) => n.kind === 'function' && n.name === 'len'); + expect(fn).toBeDefined(); + }); + }); + + describe('Call edges', () => { + it('should record a qualified call inside a function body', () => { + const code = ` +defmodule M do + def clean(name), do: String.trim(name) +end +`; + const result = extractFromSource('lib/m.ex', code); + const call = result.unresolvedReferences.find( + (r) => r.referenceKind === 'calls' && r.referenceName === 'String.trim' + ); + expect(call).toBeDefined(); + }); + }); +}); diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index ef6307a92..e8ccda4a7 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -39,6 +39,7 @@ const WASM_GRAMMAR_FILES: Record = { r: 'tree-sitter-r.wasm', luau: 'tree-sitter-luau.wasm', objc: 'tree-sitter-objc.wasm', + elixir: 'tree-sitter-elixir.wasm', }; /** @@ -96,6 +97,9 @@ export const EXTENSION_MAP: Record = { '.vue': 'vue', '.astro': 'astro', '.r': 'r', + // Elixir source (.ex) and scripts (.exs) + '.ex': 'elixir', + '.exs': 'elixir', '.pas': 'pascal', '.dpr': 'pascal', '.dpk': 'pascal', @@ -404,6 +408,7 @@ export function getLanguageDisplayName(language: Language): string { go: 'Go', rust: 'Rust', r: 'R', + elixir: 'Elixir', java: 'Java', c: 'C', cpp: 'C++', diff --git a/src/extraction/languages/elixir.ts b/src/extraction/languages/elixir.ts new file mode 100644 index 000000000..7a3341ec3 --- /dev/null +++ b/src/extraction/languages/elixir.ts @@ -0,0 +1,285 @@ +import type { Node as SyntaxNode } from 'web-tree-sitter'; +import { getNodeText } from '../tree-sitter-helpers'; +import type { LanguageExtractor, ExtractorContext } from '../tree-sitter-types'; + +/** + * Elixir extraction. + * + * tree-sitter-elixir is metaprogramming-first: there are almost no dedicated + * declaration node types. `defmodule`, `def`, `defp`, `alias`, `import`, `if`, + * `case` … all parse as the SAME shape — a `call` node: + * + * (call target: (identifier "") (arguments …) (do_block …)?) + * + * So the whole language is handled through the `visitNode` hook, which inspects + * each `call`'s target identifier and dispatches on the macro name. Real + * function calls (target not a known macro) become `calls` edges; control-flow + * special forms (`if`/`case`/…) are descended into but not recorded as calls. + */ + +/** Macros that introduce a callable definition. */ +const DEF_MACROS = new Set([ + 'def', 'defp', 'defmacro', 'defmacrop', 'defguard', 'defguardp', 'defdelegate', +]); +/** The subset of DEF_MACROS that are private. */ +const PRIVATE_DEFS = new Set(['defp', 'defmacrop', 'defguardp']); +/** Dependency macros — each records an `import` node (module dependency). */ +const DEP_MACROS = new Set(['alias', 'import', 'require', 'use']); +/** Macros that define the enclosing module's data shape. */ +const STRUCT_MACROS = new Set(['defstruct', 'defexception']); +/** + * Kernel special forms / control-flow macros. They parse as `call`s but are not + * meaningful call edges; we still descend into their bodies for nested calls. + */ +const SKIP_CALL = new Set([ + 'if', 'unless', 'case', 'cond', 'with', 'for', 'try', 'receive', + 'quote', 'unquote', 'unquote_splicing', 'fn', 'super', +]); + +/** The `target` of a call: the macro/function identifier (or a `dot` for `Mod.fun`). */ +function callTarget(node: SyntaxNode): SyntaxNode | null { + return node.childForFieldName('target') ?? node.namedChild(0); +} + +/** The `arguments` node of a call, if any. */ +function callArgs(node: SyntaxNode): SyntaxNode | null { + return ( + node.childForFieldName('arguments') ?? + node.namedChildren.find((c) => c.type === 'arguments') ?? + null + ); +} + +/** The trailing `do … end` block of a call, if present. */ +function doBlock(node: SyntaxNode): SyntaxNode | null { + return node.namedChildren.find((c) => c.type === 'do_block') ?? null; +} + +/** + * Extract a definition's simple name from a def-macro's arguments. Handles: + * def foo(a) → call(target: foo) + * def foo(a) when guard → binary_operator(when){ left: call(target: foo) } + * def foo → identifier(foo) (zero-arg, no parens) + */ +function defName(args: SyntaxNode | null): string | null { + if (!args) return null; + let head = args.namedChild(0); + if (!head) return null; + // `when` guard wraps the head: take its left operand. + if (head.type === 'binary_operator') { + head = head.childForFieldName('left') ?? head; + } + if (head.type === 'call') { + const t = callTarget(head); + // A simple identifier target — its own text is the name. + return t && t.type === 'identifier' ? t.text : (t ? t.text : null); + } + if (head.type === 'identifier') { + return head.text; + } + return null; +} + +/** + * Predict the qualifiedName the core will assign (scope node names joined by + * `::`). Used to fold a multi-clause function — Elixir lists each clause as its + * own `def`, but they are one symbol; a second node with the same qualifiedName + * only creates resolver ambiguity. + */ +function qualifiedNameFor(ctx: ExtractorContext, name: string): string { + const parts: string[] = []; + for (const id of ctx.nodeStack) { + const n = ctx.nodes.find((x) => x.id === id); + if (n && n.kind !== 'file') parts.push(n.name); + } + parts.push(name); + return parts.join('::'); +} + +/** Visit every body a def can carry — a `do … end` block and/or a `do:` keyword. */ +function visitDefBody(node: SyntaxNode, ctx: ExtractorContext): void { + const block = doBlock(node); + if (block) { + for (let i = 0; i < block.namedChildCount; i++) { + const child = block.namedChild(i); + if (child) ctx.visitNode(child); + } + } + const args = callArgs(node); + const kw = args?.namedChildren.find((c) => c.type === 'keywords'); + if (kw) { + for (let i = 0; i < kw.namedChildCount; i++) { + const pair = kw.namedChild(i); + if (pair?.type === 'pair') { + const val = pair.childForFieldName('value') ?? pair.namedChild(1); + if (val) ctx.visitNode(val); + } + } + } +} + +export const elixirExtractor: LanguageExtractor = { + // Elixir has no dedicated declaration node types — everything is dispatched + // through visitNode below. `callTypes` is empty because the hook records + // calls itself (the core's extractCall keys off a `function` field Elixir + // lacks). + functionTypes: [], + classTypes: [], + methodTypes: [], + interfaceTypes: [], + structTypes: [], + enumTypes: [], + typeAliasTypes: [], + importTypes: [], + callTypes: [], + variableTypes: [], + nameField: 'target', + bodyField: 'do_block', + paramsField: 'arguments', + + visitNode: (node, ctx) => { + // Module attributes: `@doc …`, `@spec …`, `@moduledoc …`, `@my_attr …` + // parse as `unary_operator` over a call. Not symbols in v1 — consume so the + // inner call isn't mis-recorded as a call to "doc"/"spec". + if (node.type === 'unary_operator') { + const txt = getNodeText(node, ctx.source); + if (txt.trimStart().startsWith('@')) return true; + return false; // -x / !flag / ^pin — let the core descend for nested calls + } + + if (node.type !== 'call') return false; + + const target = callTarget(node); + if (!target) return false; + // A qualified target (`Mod.fun`) is always a real call, never a macro. + const macro = target.type === 'identifier' ? target.text : null; + const args = callArgs(node); + + // --- defmodule / defprotocol --- + if (macro === 'defmodule' || macro === 'defprotocol') { + const alias = args?.namedChildren.find((c) => c.type === 'alias'); + const name = alias ? getNodeText(alias, ctx.source) : ''; + const kind = macro === 'defprotocol' ? 'interface' : 'module'; + const created = ctx.createNode(kind, name, node); + const block = doBlock(node); + if (created && block) { + ctx.pushScope(created.id); + for (let i = 0; i < block.namedChildCount; i++) { + const child = block.namedChild(i); + if (child) ctx.visitNode(child); + } + ctx.popScope(); + } + return true; + } + + // --- defimpl Protocol, for: Type --- + if (macro === 'defimpl') { + const aliases = args?.namedChildren.filter((c) => c.type === 'alias') ?? []; + const protocol = aliases[0] ? getNodeText(aliases[0], ctx.source) : ''; + // `for:` target type, if present + const kw = args?.namedChildren.find((c) => c.type === 'keywords'); + const forPair = kw?.namedChildren.find( + (p) => p.type === 'pair' && (p.childForFieldName('key')?.text ?? '').replace(/:$/, '') === 'for' + ); + const forVal = forPair?.childForFieldName('value'); + const forType = forVal ? getNodeText(forVal, ctx.source) : 'Any'; + const created = ctx.createNode('module', `${protocol}.${forType}`, node); + if (created) { + ctx.addUnresolvedReference({ + fromNodeId: created.id, + referenceName: protocol, + referenceKind: 'implements', + filePath: ctx.filePath, + line: node.startPosition.row + 1, + column: node.startPosition.column, + }); + const block = doBlock(node); + if (block) { + ctx.pushScope(created.id); + for (let i = 0; i < block.namedChildCount; i++) { + const child = block.namedChild(i); + if (child) ctx.visitNode(child); + } + ctx.popScope(); + } + } + return true; + } + + // --- def / defp / defmacro / defguard / defdelegate --- + if (macro && DEF_MACROS.has(macro)) { + const name = defName(args); + if (name) { + // Fold additional clauses of the same function into the first node, but + // still walk each clause body so all its calls are recorded. + const qn = qualifiedNameFor(ctx, name); + const existing = ctx.nodes.find((n) => n.kind === 'function' && n.qualifiedName === qn); + const fnNode = existing ?? ctx.createNode('function', name, node, { + visibility: PRIVATE_DEFS.has(macro) ? 'private' : 'public', + signature: (getNodeText(node, ctx.source).split('\n')[0] ?? '').trim(), + }); + if (fnNode) { + ctx.pushScope(fnNode.id); + visitDefBody(node, ctx); + ctx.popScope(); + } + } + return true; + } + + // --- defstruct / defexception --- + if (macro && STRUCT_MACROS.has(macro)) { + const parentId = ctx.nodeStack[ctx.nodeStack.length - 1]; + const parent = parentId ? ctx.nodes.find((n) => n.id === parentId) : undefined; + ctx.createNode('struct', parent?.name ?? macro, node); + return true; + } + + // --- alias / import / require / use --- + if (macro && DEP_MACROS.has(macro)) { + const signature = (getNodeText(node, ctx.source).split('\n')[0] ?? '').trim(); + const first = args?.namedChild(0); + if (first?.type === 'alias') { + ctx.createNode('import', getNodeText(first, ctx.source), node, { signature }); + } else if (first?.type === 'dot') { + // Multi-alias: `alias A.B.{X, Y}` → dot(left: alias "A.B", right: tuple) + const base = getNodeText(first.childForFieldName('left') ?? first, ctx.source); + const tuple = first.childForFieldName('right') ?? first.namedChildren.find((c) => c.type === 'tuple'); + if (tuple) { + for (let i = 0; i < tuple.namedChildCount; i++) { + const member = tuple.namedChild(i); + if (member?.type === 'alias') { + ctx.createNode('import', `${base}.${getNodeText(member, ctx.source)}`, node, { signature }); + } + } + } + } + return true; + } + + // --- real function call --- + let callee: string; + if (target.type === 'identifier') callee = target.text; + else callee = getNodeText(target, ctx.source); // dot → "Mod.fun" + + const callerId = ctx.nodeStack[ctx.nodeStack.length - 1]; + if (callerId && !SKIP_CALL.has(callee)) { + ctx.addUnresolvedReference({ + fromNodeId: callerId, + referenceName: callee, + referenceKind: 'calls', + filePath: ctx.filePath, + line: node.startPosition.row + 1, + column: node.startPosition.column, + }); + } + // Descend into every child except the target so nested calls in arguments, + // do-blocks (control-flow macros), and keyword bodies are still recorded. + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (child && child !== target) ctx.visitNode(child); + } + return true; + }, +}; diff --git a/src/extraction/languages/index.ts b/src/extraction/languages/index.ts index 9d4a949a5..7144898b0 100644 --- a/src/extraction/languages/index.ts +++ b/src/extraction/languages/index.ts @@ -27,6 +27,7 @@ import { luaExtractor } from './lua'; import { rExtractor } from './r'; import { luauExtractor } from './luau'; import { objcExtractor } from './objc'; +import { elixirExtractor } from './elixir'; export const EXTRACTORS: Partial> = { typescript: typescriptExtractor, @@ -51,4 +52,5 @@ export const EXTRACTORS: Partial> = { r: rExtractor, luau: luauExtractor, objc: objcExtractor, + elixir: elixirExtractor, }; diff --git a/src/types.ts b/src/types.ts index 656bb1090..b3d24b069 100644 --- a/src/types.ts +++ b/src/types.ts @@ -91,6 +91,7 @@ export const LANGUAGES = [ 'luau', 'objc', 'r', + 'elixir', 'yaml', 'twig', 'xml',