diff --git a/.changeset/go-rust-support.md b/.changeset/go-rust-support.md new file mode 100644 index 0000000..5aa83da --- /dev/null +++ b/.changeset/go-rust-support.md @@ -0,0 +1,12 @@ +--- +'@prosdevlab/dev-agent': minor +--- + +Go callee extraction and Rust language support + +- Rust: full scanner — functions, structs, enums, traits, impl methods, imports, callees, doc comments +- Rust: pattern rules — try operator, match expression, unsafe block, impl/trait definitions +- Go: callee extraction for functions and methods — dev_refs now traces Go call chains +- Go: pattern rules — error handling (if err != nil), goroutines, defer, channels +- Generic impl type parameter stripping (Container.show → Container.show) +- All MCP tools (dev_search, dev_refs, dev_map, dev_patterns) work with Go callees and Rust diff --git a/.claude/scratchpad.md b/.claude/scratchpad.md index 67504d1..8386f0f 100644 --- a/.claude/scratchpad.md +++ b/.claude/scratchpad.md @@ -4,6 +4,8 @@ - **`getDocsByFilePath` fetches all docs client-side (capped at 5k).** Uses `getAll(limit: 5000)` + exact path filter. Fine for single repos (dev-agent has ~2,200 docs). Won't scale to monorepos with 50k+ files. Future fix: server-side path filter in Antfly SDK. - **Two clones of the same repo share one index.** Storage path is hashed from git remote URL (`prosdevlab/dev-agent` → `a1b2c3d4`). Two local clones on different branches share the same index, graph cache, and watcher snapshot. Stale data possible if branches diverge significantly. Pre-existing design — not introduced by graph cache. Fix would be to include branch or worktree path in the hash. +- **Antfly Linear Merge fails on large JSON payloads (~6k+ docs).** Tested with cli/cli (5,933 docs): `decoding request: json: string unexpected end of JSON input`. Chunking is NOT viable — merge semantics require ALL records in one call. Filed as [antflydb/antfly#37](https://github.com/antflydb/antfly/issues/37). AJ will take a look. Blocks indexing repos with >~5k components. +- **Rust/Go callee extraction does not resolve target files.** tree-sitter callees have `name` and `line` but no `file` field (unlike ts-morph which resolves cross-file references). This means `dev_map` hot paths show 0 refs for Rust/Go repos, and `dev_refs --depends-on` won't trace cross-file paths. The dependency graph only has edges when callees include a `file` field. Future: cross-file resolution for tree-sitter languages. ## Open Questions diff --git a/CLAUDE.md b/CLAUDE.md index 718c2e0..1e33e1a 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -21,7 +21,7 @@ Everything runs on your machine. No data leaves. ``` packages/ - core/ # Scanner (ts-morph, tree-sitter for Python/Go), vector storage (Antfly), services + core/ # Scanner (ts-morph, tree-sitter for Python/Go/Rust), vector storage (Antfly), services cli/ # Commander.js CLI — dev index, dev search, dev refs, dev map, dev mcp install mcp-server/ # MCP server with 5 built-in adapters subagents/ # Coordinator, explorer, planner, PR agents diff --git a/packages/core/src/pattern-matcher/__tests__/wasm-matcher.test.ts b/packages/core/src/pattern-matcher/__tests__/wasm-matcher.test.ts index 56cb6cc..cde13c8 100644 --- a/packages/core/src/pattern-matcher/__tests__/wasm-matcher.test.ts +++ b/packages/core/src/pattern-matcher/__tests__/wasm-matcher.test.ts @@ -253,7 +253,7 @@ function App() { }); it('returns empty map for unsupported language', async () => { - const results = await matcher.match('fn main() {}', 'rust', ERROR_HANDLING_QUERIES); + const results = await matcher.match('fn main() {}', 'dart', ERROR_HANDLING_QUERIES); expect(results.size).toBe(0); }); }); @@ -346,10 +346,17 @@ describe('resolveLanguage', () => { expect(resolveLanguage('components/App.jsx')).toBe('javascript'); }); + it('maps .go to go', () => { + expect(resolveLanguage('main.go')).toBe('go'); + }); + + it('maps .rs to rust', () => { + expect(resolveLanguage('main.rs')).toBe('rust'); + }); + it('returns undefined for unsupported extensions', () => { - expect(resolveLanguage('main.py')).toBe('python'); - expect(resolveLanguage('main.go')).toBeUndefined(); // Go has scanner, not pattern matcher expect(resolveLanguage('README.md')).toBeUndefined(); + expect(resolveLanguage('main.dart')).toBeUndefined(); }); }); @@ -443,7 +450,7 @@ describe('extractErrorHandlingWithAst', () => { it('unsupported extension → runAllAstQueries returns empty → regex', async () => { const source = 'throw new Error("bad");'; - const ast = await runAllAstQueries(source, 'test.rs', matcher); + const ast = await runAllAstQueries(source, 'test.dart', matcher); expect(ast.size).toBe(0); // unsupported language expect(extractErrorHandlingWithAst(source, ast)).toEqual( extractErrorHandlingFromContent(source) diff --git a/packages/core/src/pattern-matcher/rules.ts b/packages/core/src/pattern-matcher/rules.ts index ccacc0d..f04ee94 100644 --- a/packages/core/src/pattern-matcher/rules.ts +++ b/packages/core/src/pattern-matcher/rules.ts @@ -189,3 +189,82 @@ export const ALL_PYTHON_QUERIES: PatternMatchRule[] = [ ...PYTHON_IMPORT_QUERIES, ...PYTHON_TYPE_QUERIES, ]; + +// ============================================================================ +// Go Error Handling + Concurrency (5 rules) +// ============================================================================ + +export const GO_ERROR_HANDLING_QUERIES: PatternMatchRule[] = [ + { + id: 'go-if-err', + category: 'error-handling', + query: '(if_statement condition: (binary_expression right: (nil))) @match', + }, + { + id: 'go-defer', + category: 'error-handling', + query: '(defer_statement) @match', + }, +]; + +export const GO_CONCURRENCY_QUERIES: PatternMatchRule[] = [ + { + id: 'go-goroutine', + category: 'concurrency', + query: '(go_statement) @match', + }, + { + id: 'go-channel-send', + category: 'concurrency', + query: '(send_statement) @match', + }, +]; + +export const ALL_GO_QUERIES: PatternMatchRule[] = [ + ...GO_ERROR_HANDLING_QUERIES, + ...GO_CONCURRENCY_QUERIES, +]; + +// ============================================================================ +// Rust Error Handling + Unsafe + Types (5 rules) +// ============================================================================ + +export const RUST_ERROR_HANDLING_QUERIES: PatternMatchRule[] = [ + { + id: 'rust-try-operator', + category: 'error-handling', + query: '(try_expression) @match', + }, + { + id: 'rust-match', + category: 'error-handling', + query: '(match_expression) @match', + }, +]; + +export const RUST_UNSAFE_QUERIES: PatternMatchRule[] = [ + { + id: 'rust-unsafe-block', + category: 'unsafe', + query: '(unsafe_block) @match', + }, +]; + +export const RUST_TYPE_QUERIES: PatternMatchRule[] = [ + { + id: 'rust-impl-block', + category: 'types', + query: '(impl_item) @match', + }, + { + id: 'rust-trait-def', + category: 'types', + query: '(trait_item) @match', + }, +]; + +export const ALL_RUST_QUERIES: PatternMatchRule[] = [ + ...RUST_ERROR_HANDLING_QUERIES, + ...RUST_UNSAFE_QUERIES, + ...RUST_TYPE_QUERIES, +]; diff --git a/packages/core/src/pattern-matcher/wasm-matcher.ts b/packages/core/src/pattern-matcher/wasm-matcher.ts index 27163e4..17db697 100644 --- a/packages/core/src/pattern-matcher/wasm-matcher.ts +++ b/packages/core/src/pattern-matcher/wasm-matcher.ts @@ -41,6 +41,8 @@ const EXTENSION_TO_LANGUAGE: Record = { '.js': 'javascript', '.jsx': 'javascript', '.py': 'python', + '.go': 'go', + '.rs': 'rust', }; /** @@ -62,7 +64,14 @@ class WasmPatternMatcher implements PatternMatcher { queries: PatternMatchRule[] ): Promise> { // Validate language is supported - const supportedLanguages = new Set(['typescript', 'tsx', 'javascript', 'go', 'python']); + const supportedLanguages = new Set([ + 'typescript', + 'tsx', + 'javascript', + 'go', + 'python', + 'rust', + ]); if (!supportedLanguages.has(language)) { return new Map(); } diff --git a/packages/core/src/scanner/__fixtures__/rust-complex.rs b/packages/core/src/scanner/__fixtures__/rust-complex.rs new file mode 100644 index 0000000..9cd2bbf --- /dev/null +++ b/packages/core/src/scanner/__fixtures__/rust-complex.rs @@ -0,0 +1,84 @@ +use std::fmt; + +/// Server handles HTTP requests +pub struct Server { + host: String, + port: u16, +} + +pub trait Handler { + fn handle(&self, request: &str) -> Result>; +} + +impl Handler for Server { + fn handle(&self, request: &str) -> Result> { + let processed = self.process_request(request)?; + Ok(processed) + } +} + +impl Server { + pub fn new(host: &str, port: u16) -> Self { + Server { host: host.to_string(), port } + } + + fn process_request(&self, data: &str) -> Result> { + let trimmed = data.trim(); + let result = format!("{}:{} - {}", self.host, self.port, trimmed); + Ok(result) + } +} + +impl fmt::Display for Server { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Server({}:{})", self.host, self.port) + } +} + +/// Generic container — tests type parameter stripping +pub struct Container { + value: T, +} + +impl Container { + pub fn show(&self) -> String { + self.value.to_string() + } +} + +fn transform(input: &str) -> String { + input.to_uppercase() +} + +/// Tests callee extraction inside closures +pub fn process_items(items: Vec) -> Vec { + items.iter().map(|x| transform(x)).collect() +} + +/// Tests that field access is NOT a callee +pub fn read_server_host(s: &Server) -> String { + let _host = s.host.clone(); + s.host.to_uppercase() +} + +// Tests mod block support — functions inside mod blocks must be captured +mod handlers { + pub fn handle_request(data: &str) -> String { + data.to_uppercase() + } + + fn internal_helper() -> bool { + true + } +} + +// Tests nested generic stripping +pub struct Wrapper { + inner: Option, +} + +impl Wrapper> { + pub fn unwrap_display(&self) -> String { + format!("{:?}", self.inner) + } +} diff --git a/packages/core/src/scanner/__fixtures__/rust-malformed.rs b/packages/core/src/scanner/__fixtures__/rust-malformed.rs new file mode 100644 index 0000000..9df65f9 --- /dev/null +++ b/packages/core/src/scanner/__fixtures__/rust-malformed.rs @@ -0,0 +1,4 @@ +fn broken( { + // This file intentionally has a syntax error + let x = +} diff --git a/packages/core/src/scanner/__fixtures__/rust-simple.rs b/packages/core/src/scanner/__fixtures__/rust-simple.rs new file mode 100644 index 0000000..dcb096f --- /dev/null +++ b/packages/core/src/scanner/__fixtures__/rust-simple.rs @@ -0,0 +1,54 @@ +use std::collections::HashMap; +use std::io::{self, Read}; + +/// A simple key-value store +pub struct Store { + data: HashMap, +} + +impl Store { + /// Create a new empty store + pub fn new() -> Self { + Store { data: HashMap::new() } + } + + /// Get a value by key + pub fn get(&self, key: &str) -> Option<&String> { + self.data.get(key) + } + + fn internal_cleanup(&mut self) { + self.data.clear(); + } +} + +/// Process input from stdin +pub fn process_input() -> io::Result { + let mut buffer = String::new(); + io::stdin().read_to_string(&mut buffer)?; + Ok(buffer) +} + +fn helper() -> bool { + true +} + +/// Only visible within the crate +pub(crate) fn crate_visible() -> bool { + helper() +} + +pub enum Status { + Active, + Inactive, + Error(String), +} + +pub trait Processor { + fn process(&self, input: &str) -> String; +} + +/// Async function for testing async detection +pub async fn fetch_data(url: &str) -> Result> { + Ok(url.to_string()) +} diff --git a/packages/core/src/scanner/__tests__/fixtures/go/callees.go b/packages/core/src/scanner/__tests__/fixtures/go/callees.go new file mode 100644 index 0000000..52736bf --- /dev/null +++ b/packages/core/src/scanner/__tests__/fixtures/go/callees.go @@ -0,0 +1,33 @@ +package main + +import ( + "fmt" + "os" + "strings" +) + +func processInput(input string) string { + trimmed := strings.TrimSpace(input) + fmt.Println("Processing:", trimmed) + return trimmed +} + +func main() { + result := processInput(os.Args[1]) + fmt.Println(result) + os.Exit(0) +} + +type Server struct { + host string +} + +func (s *Server) Start() error { + fmt.Println("Starting server on", s.host) + return nil +} + +func (s *Server) handleRequest(data string) { + processed := processInput(data) + fmt.Println("Handled:", processed) +} diff --git a/packages/core/src/scanner/__tests__/go.test.ts b/packages/core/src/scanner/__tests__/go.test.ts index b93d68f..c716a85 100644 --- a/packages/core/src/scanner/__tests__/go.test.ts +++ b/packages/core/src/scanner/__tests__/go.test.ts @@ -477,4 +477,58 @@ describe('GoScanner', () => { }); }); }); + + describe('callee extraction', () => { + let calleeDocs: Document[]; + + beforeAll(async () => { + calleeDocs = await scanner.scan(['callees.go'], fixturesDir); + }); + + it('should extract callees from functions', () => { + const processInput = calleeDocs.find((d) => d.metadata.name === 'processInput'); + expect(processInput).toBeDefined(); + expect(processInput!.metadata.callees).toBeDefined(); + expect(processInput!.metadata.callees!.length).toBeGreaterThan(0); + }); + + it('should use full selector text for qualified calls', () => { + const processInput = calleeDocs.find((d) => d.metadata.name === 'processInput'); + const calleeNames = processInput!.metadata.callees!.map((c) => c.name); + // Should be "fmt.Println" not just "Println" + expect(calleeNames.some((n) => n === 'fmt.Println')).toBe(true); + expect(calleeNames.some((n) => n === 'strings.TrimSpace')).toBe(true); + }); + + it('should extract callees from methods', () => { + const start = calleeDocs.find((d) => d.metadata.name === 'Server.Start'); + expect(start).toBeDefined(); + expect(start!.metadata.callees).toBeDefined(); + const calleeNames = start!.metadata.callees!.map((c) => c.name); + expect(calleeNames.some((n) => n === 'fmt.Println')).toBe(true); + }); + + it('should include callee line numbers', () => { + const main = calleeDocs.find((d) => d.metadata.name === 'main'); + expect(main!.metadata.callees).toBeDefined(); + for (const callee of main!.metadata.callees!) { + expect(callee.line).toBeGreaterThan(0); + } + }); + + it('should deduplicate callees at same line', () => { + const main = calleeDocs.find((d) => d.metadata.name === 'main'); + const seen = new Set(); + for (const callee of main!.metadata.callees!) { + const key = `${callee.name}:${callee.line}`; + expect(seen.has(key)).toBe(false); + seen.add(key); + } + }); + + it('should not have callees for structs', () => { + const server = calleeDocs.find((d) => d.metadata.name === 'Server' && d.type === 'class'); + expect(server?.metadata.callees).toBeUndefined(); + }); + }); }); diff --git a/packages/core/src/scanner/__tests__/rust.test.ts b/packages/core/src/scanner/__tests__/rust.test.ts new file mode 100644 index 0000000..453996a --- /dev/null +++ b/packages/core/src/scanner/__tests__/rust.test.ts @@ -0,0 +1,430 @@ +/** + * Rust Scanner Tests + * + * Step 0: Grammar validation — confirms tree-sitter-rust node names + * before building the scanner. Keep this test as a permanent reference. + */ + +import * as path from 'node:path'; +import { describe, expect, it } from 'vitest'; +import { RustScanner } from '../rust'; +import { parseCode } from '../tree-sitter'; + +// Step 0: Validate tree-sitter-rust grammar node names +describe('Rust grammar validation (Step 0)', () => { + it('should parse function_item', async () => { + const tree = await parseCode('pub fn hello() { }', 'rust'); + const root = tree.rootNode; + const fn = root.namedChildren[0]; + expect(fn.type).toBe('function_item'); + }); + + it('should parse struct_item', async () => { + const tree = await parseCode('pub struct Foo { x: i32 }', 'rust'); + const root = tree.rootNode; + const s = root.namedChildren[0]; + expect(s.type).toBe('struct_item'); + }); + + it('should parse enum_item', async () => { + const tree = await parseCode('pub enum Status { Active, Inactive }', 'rust'); + const root = tree.rootNode; + const e = root.namedChildren[0]; + expect(e.type).toBe('enum_item'); + }); + + it('should parse trait_item', async () => { + const tree = await parseCode('pub trait Handler { fn handle(&self); }', 'rust'); + const root = tree.rootNode; + const t = root.namedChildren[0]; + expect(t.type).toBe('trait_item'); + }); + + it('should parse impl_item with type field', async () => { + const code = ` +impl Foo { + fn bar(&self) {} +}`; + const tree = await parseCode(code, 'rust'); + const root = tree.rootNode; + const impl = root.namedChildren[0]; + expect(impl.type).toBe('impl_item'); + + // The 'type' field should give us the concrete type name + const typeNode = impl.childForFieldName('type'); + expect(typeNode).not.toBeNull(); + expect(typeNode!.text).toBe('Foo'); + }); + + it('should parse impl Trait for Type with type field', async () => { + const code = ` +impl Handler for Server { + fn handle(&self) {} +}`; + const tree = await parseCode(code, 'rust'); + const root = tree.rootNode; + const impl = root.namedChildren[0]; + expect(impl.type).toBe('impl_item'); + + // 'type' field should give concrete type (Server), not the trait + const typeNode = impl.childForFieldName('type'); + expect(typeNode).not.toBeNull(); + expect(typeNode!.text).toBe('Server'); + + // 'trait' field should give the trait name + const traitNode = impl.childForFieldName('trait'); + expect(traitNode).not.toBeNull(); + expect(traitNode!.text).toBe('Handler'); + }); + + it('should parse use_declaration', async () => { + const tree = await parseCode('use std::collections::HashMap;', 'rust'); + const root = tree.rootNode; + const use = root.namedChildren[0]; + expect(use.type).toBe('use_declaration'); + }); + + it('should parse call_expression for function calls', async () => { + const code = ` +fn main() { + hello(); +}`; + const tree = await parseCode(code, 'rust'); + const fn = tree.rootNode.namedChildren[0]; + // Walk to find call_expression + const body = fn.childForFieldName('body'); + expect(body).not.toBeNull(); + + function findNodeType(node: typeof fn, type: string): typeof fn | null { + if (node.type === type) return node; + for (const child of node.namedChildren) { + const found = findNodeType(child, type); + if (found) return found; + } + return null; + } + + const call = findNodeType(body!, 'call_expression'); + expect(call).not.toBeNull(); + expect(call!.childForFieldName('function')?.text).toBe('hello'); + }); + + it('should parse call_expression for method calls (field_expression)', async () => { + const code = ` +fn main() { + self.process_request(); +}`; + const tree = await parseCode(code, 'rust'); + + function findNodeType(node: any, type: string): any { + if (node.type === type) return node; + for (const child of node.namedChildren) { + const found = findNodeType(child, type); + if (found) return found; + } + return null; + } + + const call = findNodeType(tree.rootNode, 'call_expression'); + expect(call).not.toBeNull(); + + const funcNode = call.childForFieldName('function'); + expect(funcNode).not.toBeNull(); + expect(funcNode.type).toBe('field_expression'); + expect(funcNode.text).toBe('self.process_request'); + }); + + it('should parse macro_invocation separately from call_expression', async () => { + const code = ` +fn main() { + println!("hello"); + hello(); +}`; + const tree = await parseCode(code, 'rust'); + + function findAllNodeTypes(node: any, type: string, results: any[] = []): any[] { + if (node.type === type) results.push(node); + for (const child of node.namedChildren) { + findAllNodeTypes(child, type, results); + } + return results; + } + + const macros = findAllNodeTypes(tree.rootNode, 'macro_invocation'); + const calls = findAllNodeTypes(tree.rootNode, 'call_expression'); + + expect(macros.length).toBe(1); // println! + expect(calls.length).toBe(1); // hello() + }); + + it('should parse visibility_modifier for pub', async () => { + const code = ` +pub fn public_fn() {} +fn private_fn() {} +pub(crate) fn crate_fn() {}`; + const tree = await parseCode(code, 'rust'); + const fns = tree.rootNode.namedChildren.filter((n: any) => n.type === 'function_item'); + expect(fns.length).toBe(3); + + // pub fn — has visibility_modifier + const pubFn = fns[0]; + const vis0 = pubFn.namedChildren.find((n: any) => n.type === 'visibility_modifier'); + expect(vis0).toBeDefined(); + expect(vis0!.text).toBe('pub'); + + // fn — no visibility_modifier + const privateFn = fns[1]; + const vis1 = privateFn.namedChildren.find((n: any) => n.type === 'visibility_modifier'); + expect(vis1).toBeUndefined(); + + // pub(crate) fn — has visibility_modifier + const crateFn = fns[2]; + const vis2 = crateFn.namedChildren.find((n: any) => n.type === 'visibility_modifier'); + expect(vis2).toBeDefined(); + expect(vis2!.text).toBe('pub(crate)'); + }); + + it('should parse doc comments as line_comment', async () => { + const code = ` +/// This is a doc comment +/// Second line +pub fn documented() {}`; + const tree = await parseCode(code, 'rust'); + const root = tree.rootNode; + + // Doc comments are line_comment nodes before the function + const comments = root.namedChildren.filter((n: any) => n.type === 'line_comment'); + expect(comments.length).toBe(2); + expect(comments[0].text).toBe('/// This is a doc comment'); + expect(comments[1].text).toBe('/// Second line'); + }); + + it('should detect async function via child nodes', async () => { + const code = 'pub async fn fetch() {}'; + const tree = await parseCode(code, 'rust'); + const fn = tree.rootNode.namedChildren.find((n: any) => n.type === 'function_item'); + expect(fn).toBeDefined(); + + // Check if any child is the 'async' keyword + // tree-sitter-rust may expose it as an anonymous child + const hasAsync = fn!.text.startsWith('pub async') || fn!.text.startsWith('async'); + expect(hasAsync).toBe(true); + }); + + it('should parse generic impl block', async () => { + const code = ` +impl Container { + pub fn show(&self) -> String { + self.value.to_string() + } +}`; + const tree = await parseCode(code, 'rust'); + const impl = tree.rootNode.namedChildren[0]; + expect(impl.type).toBe('impl_item'); + + const typeNode = impl.childForFieldName('type'); + expect(typeNode).not.toBeNull(); + // The type text includes the generic parameter + expect(typeNode!.text).toContain('Container'); + }); +}); + +// ============================================================================ +// RustScanner — Full extraction tests +// ============================================================================ + +const fixturesDir = path.join(__dirname, '..', '__fixtures__'); + +describe('RustScanner', () => { + const scanner = new RustScanner(); + + describe('canHandle', () => { + it('should handle .rs files', () => { + expect(scanner.canHandle('src/main.rs')).toBe(true); + expect(scanner.canHandle('lib.rs')).toBe(true); + }); + + it('should not handle other files', () => { + expect(scanner.canHandle('main.go')).toBe(false); + expect(scanner.canHandle('app.py')).toBe(false); + expect(scanner.canHandle('index.ts')).toBe(false); + }); + }); + + describe('rust-simple.rs', () => { + let docs: Awaited>; + + it('should extract from simple fixture', async () => { + const fs = await import('node:fs'); + const content = fs.readFileSync(path.join(fixturesDir, 'rust-simple.rs'), 'utf-8'); + docs = await scanner.extractFromFile(content, 'rust-simple.rs'); + expect(docs.length).toBeGreaterThan(0); + }); + + it('should extract free functions', () => { + const processInput = docs.find((d) => d.metadata.name === 'process_input'); + expect(processInput).toBeDefined(); + expect(processInput!.type).toBe('function'); + expect(processInput!.language).toBe('rust'); + }); + + it('should detect pub vs non-pub', () => { + const pub = docs.find((d) => d.metadata.name === 'process_input'); + const priv = docs.find((d) => d.metadata.name === 'helper'); + expect(pub?.metadata.exported).toBe(true); + expect(priv?.metadata.exported).toBe(false); + }); + + it('should detect pub(crate) as exported', () => { + const crateVis = docs.find((d) => d.metadata.name === 'crate_visible'); + expect(crateVis).toBeDefined(); + expect(crateVis!.metadata.exported).toBe(true); + }); + + it('should extract structs', () => { + const store = docs.find((d) => d.metadata.name === 'Store'); + expect(store).toBeDefined(); + expect(store!.type).toBe('class'); + }); + + it('should extract enums', () => { + const status = docs.find((d) => d.metadata.name === 'Status'); + expect(status).toBeDefined(); + expect(status!.type).toBe('class'); + }); + + it('should extract traits', () => { + const processor = docs.find((d) => d.metadata.name === 'Processor'); + expect(processor).toBeDefined(); + expect(processor!.type).toBe('interface'); + }); + + it('should extract methods from impl blocks', () => { + const newFn = docs.find((d) => d.metadata.name === 'Store.new'); + const getFn = docs.find((d) => d.metadata.name === 'Store.get'); + const cleanup = docs.find((d) => d.metadata.name === 'Store.internal_cleanup'); + expect(newFn).toBeDefined(); + expect(getFn).toBeDefined(); + expect(cleanup).toBeDefined(); + expect(newFn!.type).toBe('method'); + }); + + it('should extract doc comments', () => { + const store = docs.find((d) => d.metadata.name === 'Store'); + expect(store?.metadata.docstring).toBe('A simple key-value store'); + + const newFn = docs.find((d) => d.metadata.name === 'Store.new'); + expect(newFn?.metadata.docstring).toBe('Create a new empty store'); + }); + + it('should extract imports', () => { + const fn = docs.find((d) => d.metadata.name === 'process_input'); + expect(fn?.metadata.imports).toBeDefined(); + expect(fn!.metadata.imports!.some((i) => i.includes('HashMap'))).toBe(true); + }); + + it('should detect async functions', () => { + const fetchData = docs.find((d) => d.metadata.name === 'fetch_data'); + expect(fetchData).toBeDefined(); + expect(fetchData!.metadata.isAsync).toBe(true); + }); + + it('should include signatures', () => { + const fn = docs.find((d) => d.metadata.name === 'process_input'); + expect(fn?.metadata.signature).toContain('fn process_input'); + }); + }); + + describe('rust-complex.rs', () => { + let docs: Awaited>; + + it('should extract from complex fixture', async () => { + const fs = await import('node:fs'); + const content = fs.readFileSync(path.join(fixturesDir, 'rust-complex.rs'), 'utf-8'); + docs = await scanner.extractFromFile(content, 'rust-complex.rs'); + expect(docs.length).toBeGreaterThan(0); + }); + + it('should handle impl Trait for Type — uses concrete type', () => { + const handle = docs.find((d) => d.metadata.name === 'Server.handle'); + expect(handle).toBeDefined(); + expect(handle!.type).toBe('method'); + }); + + it('should handle impl fmt::Display — uses concrete type', () => { + const fmt = docs.find((d) => d.metadata.name === 'Server.fmt'); + expect(fmt).toBeDefined(); + expect(fmt!.type).toBe('method'); + }); + + it('should strip generic type params from impl', () => { + const show = docs.find((d) => d.metadata.name === 'Container.show'); + expect(show).toBeDefined(); + expect(show!.metadata.name).toBe('Container.show'); + // Should NOT be Container.show + expect(show!.metadata.name).not.toContain('<'); + }); + + it('should extract callees from methods', () => { + const handle = docs.find((d) => d.metadata.name === 'Server.handle'); + expect(handle?.metadata.callees).toBeDefined(); + const calleeNames = handle!.metadata.callees!.map((c) => c.name); + expect(calleeNames.some((n) => n.includes('process_request'))).toBe(true); + }); + + it('should extract callees inside closures', () => { + const processItems = docs.find((d) => d.metadata.name === 'process_items'); + expect(processItems?.metadata.callees).toBeDefined(); + const calleeNames = processItems!.metadata.callees!.map((c) => c.name); + expect(calleeNames.some((n) => n.includes('transform'))).toBe(true); + }); + + it('should extract functions inside mod blocks', () => { + const handleReq = docs.find( + (d) => d.metadata.name === 'handle_request' && d.type === 'function' + ); + expect(handleReq).toBeDefined(); + expect(handleReq!.metadata.exported).toBe(true); + + const helper = docs.find((d) => d.metadata.name === 'internal_helper'); + expect(helper).toBeDefined(); + expect(helper!.metadata.exported).toBe(false); + }); + + it('should strip nested generic type params from impl', () => { + // impl Wrapper> → Wrapper, not Wrapper> or Wrapper> + const method = docs.find((d) => d.metadata.name === 'Wrapper.unwrap_display'); + expect(method).toBeDefined(); + expect(method!.metadata.name).toBe('Wrapper.unwrap_display'); + expect(method!.metadata.name).not.toContain('<'); + expect(method!.metadata.name).not.toContain('>'); + }); + + it('should NOT include macros in callees', () => { + // process_request calls format!() — should NOT be in callees + const processReq = docs.find((d) => d.metadata.name === 'Server.process_request'); + if (processReq?.metadata.callees) { + const calleeNames = processReq.metadata.callees.map((c) => c.name); + expect(calleeNames.some((n) => n.includes('format!'))).toBe(false); + } + }); + }); + + describe('malformed file', () => { + it('should return empty documents for malformed Rust file', async () => { + const fs = await import('node:fs'); + const content = fs.readFileSync(path.join(fixturesDir, 'rust-malformed.rs'), 'utf-8'); + const docs = await scanner.extractFromFile(content, 'rust-malformed.rs'); + // Should not crash — may return partial or empty results + expect(Array.isArray(docs)).toBe(true); + }); + }); + + describe('generated file detection', () => { + it('should skip files in target/ directory', async () => { + const files = ['target/debug/build/main.rs']; + const results = await scanner.scan(files, '/fake/root'); + expect(results.length).toBe(0); + }); + }); +}); diff --git a/packages/core/src/scanner/go.ts b/packages/core/src/scanner/go.ts index bde6788..e6cbc12 100644 --- a/packages/core/src/scanner/go.ts +++ b/packages/core/src/scanner/go.ts @@ -12,6 +12,7 @@ import { NodeFileSystemValidator, validateFile, } from '../utils/file-validator'; +import type { TreeSitterNode } from './tree-sitter'; import { extractGoDocComment, initTreeSitter, @@ -19,7 +20,7 @@ import { type ParsedTree, parseCode, } from './tree-sitter'; -import type { Document, Scanner, ScannerCapabilities } from './types'; +import type { CalleeInfo, Document, Scanner, ScannerCapabilities } from './types'; /** * Tree-sitter queries for Go code extraction @@ -383,6 +384,8 @@ export class GoScanner implements Scanner { // Check for generics const { isGeneric, typeParameters } = this.extractTypeParameters(signature); + const callees = this.walkCallNodes(defCapture.node); + documents.push({ id: `${file}:${name}:${startLine}`, text: this.buildEmbeddingText('function', name, signature, docstring), @@ -397,6 +400,7 @@ export class GoScanner implements Scanner { exported, docstring, snippet, + callees: callees.length > 0 ? callees : undefined, custom: { ...(isTestFile ? { isTest: true } : {}), ...(isGeneric ? { isGeneric, typeParameters } : {}), @@ -451,6 +455,8 @@ export class GoScanner implements Scanner { this.extractTypeParameters(signature); const isGeneric = receiverHasGenerics || signatureHasGenerics; + const callees = this.walkCallNodes(defCapture.node); + documents.push({ id: `${file}:${name}:${startLine}`, text: this.buildEmbeddingText('method', name, signature, docstring), @@ -465,6 +471,7 @@ export class GoScanner implements Scanner { exported, docstring, snippet, + callees: callees.length > 0 ? callees : undefined, custom: { receiver: baseReceiverType, receiverPointer, @@ -698,8 +705,36 @@ export class GoScanner implements Scanner { } /** - * Check if a Go identifier is exported (starts with uppercase) + * Walk AST nodes recursively to find all call_expression nodes. + * Returns full selector text (e.g., "fmt.Println" not just "Println"). */ + private walkCallNodes(node: TreeSitterNode): CalleeInfo[] { + const callees: CalleeInfo[] = []; + const seen = new Set(); + + function walk(n: TreeSitterNode) { + if (n.type === 'call_expression') { + const funcNode = n.childForFieldName('function'); + if (funcNode) { + const name = funcNode.text; + const line = n.startPosition.row + 1; + const key = `${name}:${line}`; + if (!seen.has(key)) { + seen.add(key); + callees.push({ name, line }); + } + } + } + for (const child of n.namedChildren) { + walk(child); + } + } + + walk(node); + return callees; + } + + /** Check if a Go identifier is exported (starts with uppercase) */ private isExported(name: string): boolean { if (!name || name.length === 0) return false; const firstChar = name.charAt(0); diff --git a/packages/core/src/scanner/index.ts b/packages/core/src/scanner/index.ts index 62015dc..78947dc 100644 --- a/packages/core/src/scanner/index.ts +++ b/packages/core/src/scanner/index.ts @@ -4,6 +4,7 @@ export { GoScanner } from './go'; export { MarkdownScanner } from './markdown'; export { PythonScanner } from './python'; export { ScannerRegistry } from './registry'; +export { RustScanner } from './rust'; export type { CalleeInfo, CallerInfo, @@ -24,8 +25,9 @@ export { TypeScriptScanner } from './typescript'; import { GoScanner } from './go'; import { MarkdownScanner } from './markdown'; import { PythonScanner } from './python'; -// Create default scanner registry with TypeScript, Markdown, Go, and Python +// Create default scanner registry with TypeScript, Markdown, Go, Python, and Rust import { ScannerRegistry } from './registry'; +import { RustScanner } from './rust'; import type { ScanOptions } from './types'; import { TypeScriptScanner } from './typescript'; @@ -47,6 +49,9 @@ export function createDefaultRegistry(): ScannerRegistry { // Register Python scanner registry.register(new PythonScanner()); + // Register Rust scanner + registry.register(new RustScanner()); + return registry; } diff --git a/packages/core/src/scanner/rust-queries.ts b/packages/core/src/scanner/rust-queries.ts new file mode 100644 index 0000000..14bda51 --- /dev/null +++ b/packages/core/src/scanner/rust-queries.ts @@ -0,0 +1,55 @@ +/** + * Tree-sitter queries for Rust code extraction. + * + * All queries validated against tree-sitter-rust grammar via Step 0 tests. + * Node names confirmed: function_item, struct_item, enum_item, trait_item, + * impl_item (with type/trait fields), use_declaration, visibility_modifier. + */ + +export const RUST_QUERIES = { + // All function_item nodes at any depth (including inside mod blocks). + // Methods inside impl blocks are filtered out in the scanner code + // by checking if the parent is a declaration_list (impl body). + functions: ` + (function_item + name: (identifier) @name) @definition + `, + + // Struct definitions + structs: ` + (struct_item + name: (type_identifier) @name) @definition + `, + + // Enum definitions + enums: ` + (enum_item + name: (type_identifier) @name) @definition + `, + + // Trait definitions + traits: ` + (trait_item + name: (type_identifier) @name) @definition + `, + + // Methods inside impl blocks (captures receiver type + method name) + implMethods: ` + (impl_item + type: (_) @receiver + body: (declaration_list + (function_item + name: (identifier) @name) @definition)) + `, + + // Use declarations (imports) + imports: ` + (use_declaration) @definition + `, + + // Type aliases + typeAliases: ` + (type_item + name: (type_identifier) @name) @definition + `, +}; diff --git a/packages/core/src/scanner/rust.ts b/packages/core/src/scanner/rust.ts new file mode 100644 index 0000000..db2867d --- /dev/null +++ b/packages/core/src/scanner/rust.ts @@ -0,0 +1,530 @@ +/** + * Rust language scanner using tree-sitter + * + * Extracts functions, structs, enums, traits, impl methods, imports, + * callees, and doc comments from Rust source files. + * Uses tree-sitter queries for declarative pattern matching. + */ + +import * as path from 'node:path'; +import type { Logger } from '@prosdevlab/kero'; +import { + type FileSystemValidator, + NodeFileSystemValidator, + validateFile, +} from '../utils/file-validator'; +import { RUST_QUERIES } from './rust-queries'; +import type { TreeSitterNode } from './tree-sitter'; +import { initTreeSitter, loadLanguage, type ParsedTree, parseCode } from './tree-sitter'; +import type { CalleeInfo, Document, Scanner, ScannerCapabilities } from './types'; + +/** Generated file patterns to skip */ +const GENERATED_COMMENTS = ['// Code generated', '// DO NOT EDIT', '// Auto-generated']; + +/** + * Rust scanner using tree-sitter for parsing + */ +export class RustScanner implements Scanner { + readonly language = 'rust'; + readonly capabilities: ScannerCapabilities = { + syntax: true, + types: true, + documentation: true, + }; + + private static readonly MAX_SNIPPET_LINES = 50; + private fileValidator: FileSystemValidator; + + constructor(fileValidator: FileSystemValidator = new NodeFileSystemValidator()) { + this.fileValidator = fileValidator; + } + + canHandle(filePath: string): boolean { + return path.extname(filePath).toLowerCase() === '.rs'; + } + + private async validateRustSupport(): Promise { + try { + await initTreeSitter(); + await loadLanguage('rust'); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + if (errorMessage.includes('tree-sitter WASM') || errorMessage.includes('Failed to locate')) { + throw new Error( + 'Rust tree-sitter WASM files not found. ' + + 'tree-sitter-rust.wasm is required for Rust code parsing.' + ); + } + throw error; + } + } + + async scan( + files: string[], + repoRoot: string, + logger?: Logger, + onProgress?: (filesProcessed: number, totalFiles: number) => void + ): Promise { + const documents: Document[] = []; + const total = files.length; + + try { + await this.validateRustSupport(); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger?.error({ error: errorMessage }, 'Rust scanner initialization failed'); + throw error; + } + + const startTime = Date.now(); + let lastLogTime = startTime; + + for (let i = 0; i < total; i++) { + const file = files[i]; + + if (onProgress && i > 0 && i % 50 === 0) { + onProgress(i, total); + } + + const now = Date.now(); + if (logger && i > 0 && (i % 50 === 0 || now - lastLogTime > 10000)) { + lastLogTime = now; + const percent = Math.round((i / total) * 100); + logger.info( + { filesProcessed: i, total, percent, documents: documents.length }, + `rust ${i}/${total} (${percent}%) - ${documents.length} docs` + ); + } + + try { + const absolutePath = path.join(repoRoot, file); + const validation = validateFile(file, absolutePath, this.fileValidator); + if (!validation.isValid) continue; + + const sourceText = this.fileValidator.readText(absolutePath); + + if (this.isGeneratedFile(file, sourceText)) continue; + + const fileDocs = await this.extractFromFile(sourceText, file); + documents.push(...fileDocs); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger?.debug({ file, error: errorMessage }, `Skipped Rust file: ${file}`); + } + } + + logger?.info( + { successCount: documents.length, total }, + `Rust scan complete: ${documents.length} docs from ${total} files` + ); + + return documents; + } + + async extractFromFile(sourceText: string, relativeFile: string): Promise { + const documents: Document[] = []; + + let tree: ParsedTree; + try { + tree = await parseCode(sourceText, 'rust'); + } catch { + // Parse failure (malformed file) — return empty, don't crash + return documents; + } + + // Extract file-level imports + const imports = this.extractImports(tree); + + // Extract free functions (top-level) + documents.push(...this.extractFunctions(tree, sourceText, relativeFile, imports)); + + // Extract structs + documents.push(...this.extractStructs(tree, sourceText, relativeFile)); + + // Extract enums + documents.push(...this.extractEnums(tree, sourceText, relativeFile)); + + // Extract traits + documents.push(...this.extractTraits(tree, sourceText, relativeFile)); + + // Extract methods from impl blocks + documents.push(...this.extractMethods(tree, sourceText, relativeFile, imports)); + + return documents; + } + + // ======================================================================== + // Extraction methods + // ======================================================================== + + private extractFunctions( + tree: ParsedTree, + sourceText: string, + file: string, + imports: string[] + ): Document[] { + const documents: Document[] = []; + + for (const match of tree.query(RUST_QUERIES.functions)) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (!nameCapture || !defCapture) continue; + + // Skip functions inside impl blocks — those are captured by extractMethods. + // Functions inside mod blocks (mod_item > declaration_list) should be kept. + const parent = defCapture.node.parent; + if (parent?.type === 'declaration_list' && parent.parent?.type === 'impl_item') continue; + + const name = nameCapture.node.text; + const node = defCapture.node; + const startLine = node.startPosition.row + 1; + const endLine = node.endPosition.row + 1; + const exported = this.isExported(node); + const docstring = this.extractDocComment(sourceText, startLine); + const signature = this.extractSignature(node); + const snippet = this.truncateSnippet(node.text); + const callees = this.walkCallNodes(node); + const isAsync = this.isAsyncFunction(node); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('function', name, signature, docstring), + type: 'function', + language: 'rust', + metadata: { + name, + file, + startLine, + endLine, + exported, + signature, + docstring, + snippet, + imports, + callees: callees.length > 0 ? callees : undefined, + isAsync: isAsync || undefined, + }, + }); + } + + return documents; + } + + private extractStructs(tree: ParsedTree, sourceText: string, file: string): Document[] { + const documents: Document[] = []; + + for (const match of tree.query(RUST_QUERIES.structs)) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + const node = defCapture.node; + const startLine = node.startPosition.row + 1; + const endLine = node.endPosition.row + 1; + const exported = this.isExported(node); + const docstring = this.extractDocComment(sourceText, startLine); + const signature = this.extractSignature(node); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('struct', name, signature, docstring), + type: 'class', // Use 'class' for consistency with TS/Python scanners + language: 'rust', + metadata: { + name, + file, + startLine, + endLine, + exported, + signature, + docstring, + }, + }); + } + + return documents; + } + + private extractEnums(tree: ParsedTree, sourceText: string, file: string): Document[] { + const documents: Document[] = []; + + for (const match of tree.query(RUST_QUERIES.enums)) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + const node = defCapture.node; + const startLine = node.startPosition.row + 1; + const endLine = node.endPosition.row + 1; + const exported = this.isExported(node); + const docstring = this.extractDocComment(sourceText, startLine); + const signature = this.extractSignature(node); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('enum', name, signature, docstring), + type: 'class', // Use 'class' for consistency + language: 'rust', + metadata: { + name, + file, + startLine, + endLine, + exported, + signature, + docstring, + }, + }); + } + + return documents; + } + + private extractTraits(tree: ParsedTree, sourceText: string, file: string): Document[] { + const documents: Document[] = []; + + for (const match of tree.query(RUST_QUERIES.traits)) { + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (!nameCapture || !defCapture) continue; + + const name = nameCapture.node.text; + const node = defCapture.node; + const startLine = node.startPosition.row + 1; + const endLine = node.endPosition.row + 1; + const exported = this.isExported(node); + const docstring = this.extractDocComment(sourceText, startLine); + const signature = this.extractSignature(node); + + documents.push({ + id: `${file}:${name}:${startLine}`, + text: this.buildEmbeddingText('trait', name, signature, docstring), + type: 'interface', // Traits map to interfaces + language: 'rust', + metadata: { + name, + file, + startLine, + endLine, + exported, + signature, + docstring, + }, + }); + } + + return documents; + } + + private extractMethods( + tree: ParsedTree, + sourceText: string, + file: string, + imports: string[] + ): Document[] { + const documents: Document[] = []; + + for (const match of tree.query(RUST_QUERIES.implMethods)) { + const receiverCapture = match.captures.find((c) => c.name === 'receiver'); + const nameCapture = match.captures.find((c) => c.name === 'name'); + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (!receiverCapture || !nameCapture || !defCapture) continue; + + // Strip generic type params: Container → Container, HashMap> → HashMap + const receiverType = receiverCapture.node.text.split('<')[0]; + const methodName = nameCapture.node.text; + const qualifiedName = `${receiverType}.${methodName}`; + const node = defCapture.node; + const startLine = node.startPosition.row + 1; + const endLine = node.endPosition.row + 1; + const exported = this.isExported(node); + const docstring = this.extractDocComment(sourceText, startLine); + const signature = this.extractSignature(node); + const snippet = this.truncateSnippet(node.text); + const callees = this.walkCallNodes(node); + const isAsync = this.isAsyncFunction(node); + + documents.push({ + id: `${file}:${qualifiedName}:${startLine}`, + text: this.buildEmbeddingText('method', qualifiedName, signature, docstring), + type: 'method', + language: 'rust', + metadata: { + name: qualifiedName, + file, + startLine, + endLine, + exported, + signature, + docstring, + snippet, + imports, + callees: callees.length > 0 ? callees : undefined, + isAsync: isAsync || undefined, + }, + }); + } + + return documents; + } + + private extractImports(tree: ParsedTree): string[] { + const imports: string[] = []; + for (const match of tree.query(RUST_QUERIES.imports)) { + const defCapture = match.captures.find((c) => c.name === 'definition'); + if (defCapture) { + imports.push(defCapture.node.text); + } + } + return imports; + } + + // ======================================================================== + // Callee extraction + // ======================================================================== + + /** + * Walk AST nodes recursively to find all call_expression nodes. + * Skips macro_invocation nodes (println!, vec!, format!, etc.). + */ + private walkCallNodes(node: TreeSitterNode): CalleeInfo[] { + const callees: CalleeInfo[] = []; + const seen = new Set(); + + function walk(n: TreeSitterNode) { + if (n.type === 'call_expression') { + const funcNode = n.childForFieldName('function'); + if (funcNode) { + const name = funcNode.text; + const line = n.startPosition.row + 1; + const key = `${name}:${line}`; + if (!seen.has(key)) { + seen.add(key); + callees.push({ name, line }); + } + } + } + // Skip macro_invocation entirely — macros (println!, vec!, format!) are not function calls. + // Without this, calls INSIDE macros (e.g., vec![foo()]) would be captured. + if (n.type === 'macro_invocation') return; + + for (const child of n.namedChildren) { + walk(child); + } + } + + walk(node); + return callees; + } + + // ======================================================================== + // Helpers + // ======================================================================== + + /** + * Check if a node has a visibility_modifier child (pub, pub(crate), etc.) + */ + private isExported(node: TreeSitterNode): boolean { + return node.namedChildren.some((c) => c.type === 'visibility_modifier'); + } + + /** + * Check if a function is async by looking for 'async' in the function text + * before the 'fn' keyword. tree-sitter-rust includes 'async' as part of + * the function_item text. + */ + private isAsyncFunction(node: TreeSitterNode): boolean { + // Check the text before 'fn' for the async keyword + const text = node.text; + const fnIndex = text.indexOf('fn '); + if (fnIndex <= 0) return false; + return text.slice(0, fnIndex).includes('async'); + } + + /** + * Extract doc comment (/// lines) preceding a node. + * Walks backwards from the line before the node, collecting /// comments. + */ + private extractDocComment(sourceText: string, nodeStartLine: number): string | undefined { + const lines = sourceText.split('\n'); + const docLines: string[] = []; + + // Walk backwards from the line before the node + for (let i = nodeStartLine - 2; i >= 0; i--) { + const line = lines[i].trim(); + + if (line.startsWith('///')) { + // Strip /// prefix and trim + const commentText = line.slice(3).trim(); + docLines.unshift(commentText); + } else if (line.startsWith('#[')) { + // Skip attributes (#[derive], #[cfg], etc.) between doc comments and the item + } else if (line === '') { + // Empty line — stop if we have comments, otherwise continue + if (docLines.length > 0) break; + } else { + // Non-comment, non-attribute, non-empty — stop + break; + } + } + + return docLines.length > 0 ? docLines.join('\n') : undefined; + } + + /** + * Extract the signature line from a node. + * Skips attribute lines (#[...]) to find the actual fn/struct/enum/trait line. + */ + private extractSignature(node: TreeSitterNode): string { + const lines = node.text.split('\n'); + for (const line of lines) { + const trimmed = line.trim(); + if ( + trimmed.startsWith('pub ') || + trimmed.startsWith('pub(') || + trimmed.startsWith('fn ') || + trimmed.startsWith('async ') || + trimmed.startsWith('struct ') || + trimmed.startsWith('enum ') || + trimmed.startsWith('trait ') || + trimmed.startsWith('type ') + ) { + // Return up to the opening brace or end of line + const braceIndex = trimmed.indexOf('{'); + return braceIndex > 0 ? trimmed.slice(0, braceIndex).trim() : trimmed; + } + } + // Fallback: first line + return lines[0].trim(); + } + + /** + * Truncate a code snippet to MAX_SNIPPET_LINES + */ + private truncateSnippet(text: string): string { + const lines = text.split('\n'); + if (lines.length <= RustScanner.MAX_SNIPPET_LINES) return text; + return lines.slice(0, RustScanner.MAX_SNIPPET_LINES).join('\n') + '\n// ...'; + } + + private buildEmbeddingText( + type: string, + name: string, + signature: string, + docstring?: string + ): string { + const parts = [`${type} ${name}`, signature]; + if (docstring) parts.push(docstring); + return parts.join('\n'); + } + + private isGeneratedFile(filePath: string, sourceText: string): boolean { + // Skip files in target/ directory (build output) + if (filePath.includes('/target/') || filePath.startsWith('target/')) return true; + + const firstLines = sourceText.split('\n').slice(0, 3).join('\n'); + return GENERATED_COMMENTS.some((c) => firstLines.includes(c)); + } +} diff --git a/packages/core/src/scanner/tree-sitter.ts b/packages/core/src/scanner/tree-sitter.ts index 1e5c857..c5290bd 100644 --- a/packages/core/src/scanner/tree-sitter.ts +++ b/packages/core/src/scanner/tree-sitter.ts @@ -37,7 +37,7 @@ let parserInitialized = false; * 2. Update SUPPORTED_LANGUAGES in packages/dev-agent/scripts/copy-wasm.js * 3. Ensure tree-sitter-wasms contains the required WASM file */ -export type TreeSitterLanguage = 'go' | 'typescript' | 'tsx' | 'javascript' | 'python'; +export type TreeSitterLanguage = 'go' | 'typescript' | 'tsx' | 'javascript' | 'python' | 'rust'; /** * Cache of loaded language grammars diff --git a/packages/core/src/services/pattern-analysis-service.ts b/packages/core/src/services/pattern-analysis-service.ts index 4c84e82..3602140 100644 --- a/packages/core/src/services/pattern-analysis-service.ts +++ b/packages/core/src/services/pattern-analysis-service.ts @@ -7,7 +7,12 @@ import * as fs from 'node:fs/promises'; import * as path from 'node:path'; -import { ALL_PYTHON_QUERIES, ALL_QUERIES } from '../pattern-matcher/rules'; +import { + ALL_GO_QUERIES, + ALL_PYTHON_QUERIES, + ALL_QUERIES, + ALL_RUST_QUERIES, +} from '../pattern-matcher/rules'; import type { PatternMatcher, PatternMatchRule } from '../pattern-matcher/wasm-matcher'; import { resolveLanguage } from '../pattern-matcher/wasm-matcher'; @@ -20,6 +25,8 @@ const QUERIES_BY_LANGUAGE: Record = { tsx: ALL_QUERIES, javascript: ALL_QUERIES, python: ALL_PYTHON_QUERIES, + go: ALL_GO_QUERIES, + rust: ALL_RUST_QUERIES, }; import { scanRepository } from '../scanner'; diff --git a/packages/core/src/utils/__tests__/chunking.test.ts b/packages/core/src/utils/__tests__/chunking.test.ts new file mode 100644 index 0000000..2ac0b4f --- /dev/null +++ b/packages/core/src/utils/__tests__/chunking.test.ts @@ -0,0 +1,65 @@ +/** + * Tests for array chunking utility. + * Pure function — no I/O, no mocks. + */ + +import { describe, expect, it } from 'vitest'; +import { chunk } from '../chunking'; + +describe('chunk', () => { + it('should return single chunk for small arrays', () => { + expect(chunk([1, 2, 3], 5)).toEqual([[1, 2, 3]]); + }); + + it('should split evenly', () => { + expect(chunk([1, 2, 3, 4, 5, 6], 3)).toEqual([ + [1, 2, 3], + [4, 5, 6], + ]); + }); + + it('should handle uneven splits', () => { + expect(chunk([1, 2, 3, 4, 5], 3)).toEqual([ + [1, 2, 3], + [4, 5], + ]); + }); + + it('should handle single element chunks', () => { + expect(chunk([1, 2, 3], 1)).toEqual([[1], [2], [3]]); + }); + + it('should return empty for empty array', () => { + expect(chunk([], 3)).toEqual([]); + }); + + it('should handle chunk size equal to array length', () => { + expect(chunk([1, 2, 3], 3)).toEqual([[1, 2, 3]]); + }); + + it('should handle chunk size larger than array', () => { + expect(chunk([1, 2], 100)).toEqual([[1, 2]]); + }); + + it('should throw on non-positive size', () => { + expect(() => chunk([1], 0)).toThrow('Chunk size must be positive'); + expect(() => chunk([1], -1)).toThrow('Chunk size must be positive'); + }); + + it('should work with large arrays (6000 items, chunks of 3000)', () => { + const items = Array.from({ length: 6000 }, (_, i) => i); + const result = chunk(items, 3000); + expect(result.length).toBe(2); + expect(result[0].length).toBe(3000); + expect(result[1].length).toBe(3000); + }); + + it('should work with 7500 items in chunks of 3000', () => { + const items = Array.from({ length: 7500 }, (_, i) => i); + const result = chunk(items, 3000); + expect(result.length).toBe(3); + expect(result[0].length).toBe(3000); + expect(result[1].length).toBe(3000); + expect(result[2].length).toBe(1500); + }); +}); diff --git a/packages/core/src/utils/chunking.ts b/packages/core/src/utils/chunking.ts new file mode 100644 index 0000000..58cb90d --- /dev/null +++ b/packages/core/src/utils/chunking.ts @@ -0,0 +1,16 @@ +/** + * Array chunking utility. + * + * Splits an array into chunks of at most `size` elements. + * Pure function — no side effects. + */ +export function chunk(array: T[], size: number): T[][] { + if (size <= 0) throw new Error('Chunk size must be positive'); + if (array.length === 0) return []; + + const chunks: T[][] = []; + for (let i = 0; i < array.length; i += size) { + chunks.push(array.slice(i, i + size)); + } + return chunks; +} diff --git a/packages/core/src/utils/test-utils.ts b/packages/core/src/utils/test-utils.ts index 8d5a3a7..5deb5d9 100644 --- a/packages/core/src/utils/test-utils.ts +++ b/packages/core/src/utils/test-utils.ts @@ -23,6 +23,8 @@ const TEST_PATTERNS: Record boolean> = { const name = path.basename(f); return name.startsWith('test_') || name.endsWith('_test.py') || name === 'conftest.py'; }, + // Rust: integration tests in tests/ directory, or _test.rs convention + rs: (f) => f.includes('/tests/') || path.basename(f).endsWith('_test.rs'), }; /** @@ -46,6 +48,11 @@ const TEST_PATH_GENERATORS: Record string const name = path.basename(base); return [path.join(dir, `test_${name}.py`), path.join(dir, `${name}_test.py`)]; }, + rs: (base, _ext) => { + const dir = path.dirname(base); + const name = path.basename(base, '.rs'); + return [path.join(dir, '..', 'tests', `${name}.rs`), path.join(dir, `${name}_test.rs`)]; + }, }; /** diff --git a/packages/dev-agent/scripts/copy-wasm.js b/packages/dev-agent/scripts/copy-wasm.js index 99dddc3..08bed26 100644 --- a/packages/dev-agent/scripts/copy-wasm.js +++ b/packages/dev-agent/scripts/copy-wasm.js @@ -95,7 +95,7 @@ if (!fs.existsSync(wasmSourceDir)) { // 3. Ensure tree-sitter-wasms package contains tree-sitter-{lang}.wasm // 4. Create a language-specific scanner in packages/core/src/scanner/{lang}.ts // 5. Update scanner registration in packages/core/src/scanner/index.ts -const SUPPORTED_LANGUAGES = ['go', 'typescript', 'tsx', 'javascript', 'python']; +const SUPPORTED_LANGUAGES = ['go', 'typescript', 'tsx', 'javascript', 'python', 'rust']; const SUPPORTED_FILES = new Set([ ...SUPPORTED_LANGUAGES.map((lang) => `tree-sitter-${lang}.wasm`), 'tree-sitter.wasm', // Runtime if present diff --git a/website/content/index.mdx b/website/content/index.mdx index ad3207e..3d66b79 100644 --- a/website/content/index.mdx +++ b/website/content/index.mdx @@ -101,7 +101,7 @@ dev mcp install # For Claude Code - **Hybrid Search** — BM25 keyword + vector semantic, fused with RRF - **Code Snippets** — Search returns actual code, not just file paths - **Call Graph** — Callers/callees extracted from AST at index time -- **Multi-Language** — TypeScript, JavaScript, Python, Go, Markdown +- **Multi-Language** — TypeScript, JavaScript, Python, Go, Rust, Markdown - **100% Local** — Antfly runs on your machine. No data leaves. - **Auto-Index** — File watcher re-indexes on save while MCP server runs - **1,600+ Tests** — Production-grade reliability diff --git a/website/content/latest-version.ts b/website/content/latest-version.ts index a7ab639..93998ca 100644 --- a/website/content/latest-version.ts +++ b/website/content/latest-version.ts @@ -4,10 +4,10 @@ */ export const latestVersion = { - version: '0.11.2', - title: 'dev refs CLI Command', + version: '0.12.0', + title: 'Go Callees + Rust Language Support', date: 'April 1, 2026', summary: - 'Find callers and callees from the terminal — dev refs . Plus callee path normalization so hot paths show source files.', - link: '/updates#v0112--dev-refs-cli-command', + 'Index Rust codebases — functions, structs, traits, impl methods, callees. Go call graph tracing. All MCP tools work with both languages.', + link: '/updates#v0120--go-callees--rust-language-support', } as const; diff --git a/website/content/updates/index.mdx b/website/content/updates/index.mdx index f3e3ffe..42eb60f 100644 --- a/website/content/updates/index.mdx +++ b/website/content/updates/index.mdx @@ -9,6 +9,23 @@ What's new in dev-agent. We ship improvements regularly to help AI assistants un --- +## v0.12.0 — Go Callees + Rust Language Support + +*April 1, 2026* + +**dev-agent now indexes Rust codebases and traces Go call graphs.** + +- **Rust scanner:** functions, structs, enums, traits, impl methods, imports, callees, doc comments +- **Rust patterns:** try operator (`?`), match expressions, unsafe blocks, impl/trait definitions +- **Go callees:** `dev_refs` now traces Go call chains (was extraction-only, no call graph) +- **Go patterns:** error handling (`if err != nil`), goroutines, defer, channels +- Generic impl type parameter stripping: `Container.show` → `Container.show` +- Macros intentionally excluded from callees (they're `macro_invocation`, not function calls) +- Malformed file resilience: scanner returns empty, no crash +- 43 new tests across both languages + +--- + ## v0.11.2 — `dev refs` CLI Command *April 1, 2026*