From 9d2c93fdf27d4081c6fe626ef56716bc787d4620 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 03:46:07 -0600 Subject: [PATCH 1/6] feat(native): port Groovy extractor to Rust Adds tree-sitter-groovy dependency and native extractor matching the WASM-side behavior for Groovy symbol, import, and call extraction. Part of #1071 --- Cargo.lock | 11 + crates/codegraph-core/Cargo.toml | 1 + .../codegraph-core/src/extractors/groovy.rs | 496 ++++++++++++++++++ .../codegraph-core/src/extractors/helpers.rs | 10 + crates/codegraph-core/src/extractors/mod.rs | 4 + crates/codegraph-core/src/file_collector.rs | 1 + crates/codegraph-core/src/parser_registry.rs | 12 +- src/ast-analysis/rules/index.ts | 9 + src/domain/parser.ts | 2 + .../native-drop-classification.test.ts | 3 +- 10 files changed, 544 insertions(+), 5 deletions(-) create mode 100644 crates/codegraph-core/src/extractors/groovy.rs diff --git a/Cargo.lock b/Cargo.lock index 413504b0d..4139912be 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -87,6 +87,7 @@ dependencies = [ "tree-sitter-dart", "tree-sitter-elixir", "tree-sitter-go", + "tree-sitter-groovy", "tree-sitter-haskell", "tree-sitter-hcl", "tree-sitter-java", @@ -799,6 +800,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-groovy" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a20016017f0865ba902ca50354f92429de5de8df994e64ab7fae087a13c40ed" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-haskell" version = "0.23.1" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index df4361e17..df8731441 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -35,6 +35,7 @@ tree-sitter-dart = "0.0.4" tree-sitter-zig = "1" tree-sitter-haskell = "0.23" tree-sitter-ocaml = "0.24" +tree-sitter-groovy = "0.1" rayon = "1" ignore = "0.4" globset = "0.4" diff --git a/crates/codegraph-core/src/extractors/groovy.rs b/crates/codegraph-core/src/extractors/groovy.rs new file mode 100644 index 000000000..ac50e884f --- /dev/null +++ b/crates/codegraph-core/src/extractors/groovy.rs @@ -0,0 +1,496 @@ +use super::helpers::*; +use super::SymbolExtractor; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use tree_sitter::{Node, Tree}; + +/// Groovy extractor — mirrors `extractGroovySymbols` in `src/extractors/groovy.ts`. +/// +/// Groovy is a JVM language with Java-like class/interface/enum structures plus +/// closures (`function_definition`), `juxt_function_call`, and dynamic typing. +/// The tree-sitter-groovy grammar shares many node kinds with tree-sitter-java +/// (`class_declaration`, `method_declaration`, `method_invocation`, +/// `object_creation_expression`, `import_declaration`). +/// +/// The JS source-of-truth extractor handles a superset of node kinds for +/// resilience across grammar variants (`class_definition`, `interface_definition`, +/// `method_definition`, `function_declaration`, `import_statement`, `call_expression`, +/// `method_call`, `function_call`, `member_access`); the Rust port mirrors those +/// arms so engine parity holds even if a future grammar version renames nodes. +pub struct GroovyExtractor; + +impl SymbolExtractor for GroovyExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_groovy_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &GROOVY_AST_CONFIG); + symbols + } +} + +const GROOVY_TYPE_KINDS: &[&str] = &[ + "class_declaration", + "class_definition", + "enum_declaration", + "enum_definition", + "interface_declaration", + "interface_definition", +]; + +fn find_groovy_parent_class(node: &Node, source: &[u8]) -> Option { + find_enclosing_type_name(node, GROOVY_TYPE_KINDS, source) +} + +fn match_groovy_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "class_declaration" | "class_definition" => handle_class_decl(node, source, symbols), + "interface_declaration" | "interface_definition" => handle_interface_decl(node, source, symbols), + "enum_declaration" | "enum_definition" => handle_enum_decl(node, source, symbols), + "method_declaration" | "method_definition" => handle_method_decl(node, source, symbols), + "constructor_declaration" | "constructor_definition" => handle_constructor_decl(node, source, symbols), + "function_definition" | "function_declaration" => handle_function_decl(node, source, symbols), + "import_declaration" | "import_statement" => handle_import_decl(node, source, symbols), + "method_invocation" | "method_call" | "call_expression" | "function_call" => { + handle_call_expr(node, source, symbols) + } + "object_creation_expression" => handle_object_creation(node, source, symbols), + _ => {} + } +} + +// ── Class / interface / enum ──────────────────────────────────────────────── + +fn handle_class_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let class_name = node_text(&name_node, source).to_string(); + let children = extract_class_fields(node, source); + symbols.definitions.push(Definition { + name: class_name.clone(), + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); + + // Superclass: `superclass` field wraps a `_type` child (type_identifier / + // generic_type / scoped_type_identifier). Walk children to find the first + // type-like node. + if let Some(superclass) = node.child_by_field_name("superclass") { + for i in 0..superclass.child_count() { + let Some(child) = superclass.child(i) else { continue }; + match child.kind() { + "type_identifier" | "identifier" | "scoped_type_identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&child, source).to_string()), + implements: None, + line: start_line(node), + }); + break; + } + "generic_type" => { + if let Some(first) = child.child(0) { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&first, source).to_string()), + implements: None, + line: start_line(node), + }); + } + break; + } + _ => {} + } + } + } + + // Interfaces: `interfaces` field wraps a `super_interfaces` → `type_list`. + if let Some(interfaces) = node.child_by_field_name("interfaces") { + collect_interfaces(&interfaces, &class_name, source, symbols); + } +} + +fn collect_interfaces( + interfaces: &Node, + class_name: &str, + source: &[u8], + symbols: &mut FileSymbols, +) { + for i in 0..interfaces.child_count() { + let Some(child) = interfaces.child(i) else { continue }; + match child.kind() { + "type_identifier" | "identifier" | "scoped_type_identifier" => { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some(node_text(&child, source).to_string()), + line: start_line(interfaces), + }); + } + "generic_type" => { + if let Some(first) = child.child(0) { + symbols.classes.push(ClassRelation { + name: class_name.to_string(), + extends: None, + implements: Some(node_text(&first, source).to_string()), + line: start_line(interfaces), + }); + } + } + "type_list" => collect_interfaces(&child, class_name, source, symbols), + _ => {} + } + } +} + +fn handle_interface_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_enum_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let enum_name = node_text(&name_node, source).to_string(); + + let mut members: Vec = Vec::new(); + let body = node.child_by_field_name("body").or_else(|| find_child(node, "enum_body")); + if let Some(body) = body { + for i in 0..body.child_count() { + let Some(child) = body.child(i) else { continue }; + if child.kind() == "enum_constant" { + let name = child.child_by_field_name("name").unwrap_or(child); + members.push(child_def( + node_text(&name, source).to_string(), + "constant", + start_line(&child), + )); + } + } + } + + symbols.definitions.push(Definition { + name: enum_name, + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(members), + }); +} + +// ── Methods / constructors / functions ───────────────────────────────────── + +fn handle_method_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let parent_class = find_groovy_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + let params = extract_params(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "groovy"), + cfg: build_function_cfg(node, "groovy", source), + children: opt_children(params), + }); +} + +fn handle_constructor_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let parent_class = find_groovy_parent_class(node, source); + let name = node_text(&name_node, source); + let full_name = match &parent_class { + Some(cls) => format!("{}.{}", cls, name), + None => name.to_string(), + }; + let params = extract_params(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "groovy"), + cfg: build_function_cfg(node, "groovy", source), + children: opt_children(params), + }); +} + +/// Top-level `function_definition` (Groovy script closure-bodied function). +fn handle_function_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(name_node) = node.child_by_field_name("name") else { return }; + let params = extract_params(node, source); + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "groovy"), + cfg: build_function_cfg(node, "groovy", source), + children: opt_children(params), + }); +} + +// ── Imports ───────────────────────────────────────────────────────────────── + +fn handle_import_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mut import_path = String::new(); + let mut has_asterisk = false; + for i in 0..node.child_count() { + let Some(child) = node.child(i) else { continue }; + match child.kind() { + "scoped_identifier" | "identifier" | "qualified_name" | "dotted_identifier" => { + import_path = node_text(&child, source).to_string(); + } + "asterisk" => has_asterisk = true, + _ => {} + } + } + if import_path.is_empty() { + return; + } + let names = if has_asterisk { + vec!["*".to_string()] + } else { + let last = import_path.split('.').last().unwrap_or("").to_string(); + vec![last] + }; + let mut imp = Import::new(import_path, names, start_line(node)); + // Groovy shares Java's import semantics — flag it so the resolver applies + // Java-style FQN matching (mirrors `javaImport: true` in the JS extractor). + imp.java_import = Some(true); + symbols.imports.push(imp); +} + +// ── Calls ─────────────────────────────────────────────────────────────────── + +fn handle_call_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // method_invocation has `name` (identifier) and optional `object` (receiver). + if let Some(name_node) = node.child_by_field_name("name") { + let receiver = node + .child_by_field_name("object") + .map(|n| node_text(&n, source).to_string()); + symbols.calls.push(Call { + name: node_text(&name_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver, + }); + return; + } + + // Fallback: `function` field (some grammar variants use this shape). + let func_node = node + .child_by_field_name("function") + .or_else(|| node.child_by_field_name("method")); + if let Some(func_node) = func_node { + match func_node.kind() { + "field_expression" | "member_access" | "field_access" => { + let field = func_node + .child_by_field_name("field") + .or_else(|| func_node.child_by_field_name("property")); + let obj = func_node + .child_by_field_name("object") + .or_else(|| func_node.child_by_field_name("argument")); + if let Some(field) = field { + symbols.calls.push(Call { + name: node_text(&field, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: obj.map(|n| node_text(&n, source).to_string()), + }); + } + } + _ => { + symbols.calls.push(Call { + name: node_text(&func_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + } + } +} + +fn handle_object_creation(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let Some(type_node) = node.child_by_field_name("type") else { return }; + let type_name = if type_node.kind() == "generic_type" { + type_node.child(0).map(|n| node_text(&n, source).to_string()) + } else { + Some(node_text(&type_node, source).to_string()) + }; + if let Some(name) = type_name { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver: None, + }); + } +} + +// ── Sub-declaration helpers ───────────────────────────────────────────────── + +fn extract_params(func_node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + let params_node = func_node + .child_by_field_name("parameters") + .or_else(|| find_child(func_node, "formal_parameters")); + let Some(params_node) = params_node else { return params }; + for i in 0..params_node.child_count() { + let Some(child) = params_node.child(i) else { continue }; + if child.kind() == "formal_parameter" + || child.kind() == "parameter" + || child.kind() == "spread_parameter" + { + if let Some(name_node) = child.child_by_field_name("name") { + params.push(child_def( + node_text(&name_node, source).to_string(), + "parameter", + start_line(&child), + )); + } + } + } + params +} + +fn extract_class_fields(class_node: &Node, source: &[u8]) -> Vec { + let mut fields = Vec::new(); + let body = class_node + .child_by_field_name("body") + .or_else(|| find_child(class_node, "class_body")); + let Some(body) = body else { return fields }; + for i in 0..body.child_count() { + let Some(child) = body.child(i) else { continue }; + if child.kind() != "field_declaration" { + continue; + } + for j in 0..child.child_count() { + let Some(var_decl) = child.child(j) else { continue }; + if var_decl.kind() == "variable_declarator" { + if let Some(name_node) = var_decl.child_by_field_name("name") { + fields.push(child_def( + node_text(&name_node, source).to_string(), + "property", + start_line(&child), + )); + } + } + } + } + fields +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_groovy(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_groovy::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + GroovyExtractor.extract(&tree, code.as_bytes(), "Test.groovy") + } + + #[test] + fn extracts_class_and_methods() { + let s = parse_groovy( + "class Foo {\n void bar(String x) { x.length() }\n int baz() { 1 }\n}", + ); + assert!(s.definitions.iter().any(|d| d.name == "Foo" && d.kind == "class")); + assert!(s.definitions.iter().any(|d| d.name == "Foo.bar" && d.kind == "method")); + assert!(s.definitions.iter().any(|d| d.name == "Foo.baz" && d.kind == "method")); + } + + #[test] + fn extracts_method_parameters() { + let s = parse_groovy("class Foo {\n void bar(int x, String y) {}\n}"); + let bar = s.definitions.iter().find(|d| d.name == "Foo.bar").unwrap(); + let children = bar.children.as_ref().unwrap(); + assert_eq!(children.len(), 2); + assert_eq!(children[0].name, "x"); + assert_eq!(children[0].kind, "parameter"); + assert_eq!(children[1].name, "y"); + } + + #[test] + fn extracts_class_fields() { + let s = parse_groovy("class User {\n String name\n int age\n}"); + let user = s.definitions.iter().find(|d| d.name == "User").unwrap(); + let children = user.children.as_ref().unwrap(); + let names: Vec<&str> = children.iter().map(|c| c.name.as_str()).collect(); + assert!(names.contains(&"name")); + assert!(names.contains(&"age")); + assert!(children.iter().all(|c| c.kind == "property")); + } + + #[test] + fn extracts_imports() { + let s = parse_groovy("import foo.bar.Baz\nimport com.example.*"); + assert_eq!(s.imports.len(), 2); + assert_eq!(s.imports[0].source, "foo.bar.Baz"); + assert_eq!(s.imports[0].names, vec!["Baz".to_string()]); + assert_eq!(s.imports[0].java_import, Some(true)); + assert_eq!(s.imports[1].source, "com.example"); + assert_eq!(s.imports[1].names, vec!["*".to_string()]); + } + + #[test] + fn extracts_method_calls_and_object_creation() { + let s = parse_groovy( + "class M {\n void run() {\n def svc = new Service()\n svc.go()\n }\n}", + ); + // method call svc.go() — name "go", receiver "svc" + let go_call = s.calls.iter().find(|c| c.name == "go").expect("go() call"); + assert_eq!(go_call.receiver.as_deref(), Some("svc")); + // object creation: new Service() — emitted as a call to "Service" + assert!(s.calls.iter().any(|c| c.name == "Service")); + } + + #[test] + fn extracts_interface_and_enum() { + let s = parse_groovy("interface Worker { void work() }\nenum Color { RED, GREEN }"); + assert!(s.definitions.iter().any(|d| d.name == "Worker" && d.kind == "interface")); + let color = s.definitions.iter().find(|d| d.name == "Color" && d.kind == "enum").unwrap(); + let children = color.children.as_ref().unwrap(); + let names: Vec<&str> = children.iter().map(|c| c.name.as_str()).collect(); + assert!(names.contains(&"RED")); + assert!(names.contains(&"GREEN")); + } + + #[test] + fn extracts_superclass_and_interfaces() { + let s = parse_groovy("class Sub extends Base implements I1, I2 {}"); + let rels: Vec<_> = s.classes.iter().filter(|c| c.name == "Sub").collect(); + assert!(rels.iter().any(|c| c.extends.as_deref() == Some("Base"))); + assert!(rels.iter().any(|c| c.implements.as_deref() == Some("I1"))); + assert!(rels.iter().any(|c| c.implements.as_deref() == Some("I2"))); + } +} diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index b02531896..a07a25f2f 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -360,6 +360,16 @@ pub const OCAML_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &[], }; +pub const GROOVY_AST_CONFIG: LangAstConfig = LangAstConfig { + new_types: &["object_creation_expression"], + throw_types: &["throw_statement"], + await_types: &[], + string_types: &["string_literal"], + regex_types: &[], + quote_chars: &['\'', '"'], + string_prefixes: &[], +}; + // ── Generic AST node walker ────────────────────────────────────────────────── /// Node types that represent identifiers across languages. diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 642f29f98..4517c4f8a 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -5,6 +5,7 @@ pub mod csharp; pub mod dart; pub mod elixir; pub mod go; +pub mod groovy; pub mod haskell; pub mod hcl; pub mod helpers; @@ -126,5 +127,8 @@ pub fn extract_symbols_with_opts( LanguageKind::Ocaml | LanguageKind::OcamlInterface => { ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::Groovy => { + groovy::GroovyExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } } } diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index 0cb157814..957bda6bf 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -36,6 +36,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "kt", "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", "ml", "mli", + "groovy", "gvy", ]; /// Returns whether `path` has an extension the Rust file_collector would accept. diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index c87957f29..52017ca3f 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -27,6 +27,7 @@ pub enum LanguageKind { Haskell, Ocaml, OcamlInterface, + Groovy, } impl LanguageKind { @@ -58,6 +59,7 @@ impl LanguageKind { Self::Haskell => "haskell", Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", + Self::Groovy => "groovy", } } @@ -97,6 +99,7 @@ impl LanguageKind { "hs" => Some(Self::Haskell), "ml" => Some(Self::Ocaml), "mli" => Some(Self::OcamlInterface), + "groovy" | "gvy" => Some(Self::Groovy), _ => None, } } @@ -129,6 +132,7 @@ impl LanguageKind { "haskell" => Some(Self::Haskell), "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), + "groovy" => Some(Self::Groovy), _ => None, } } @@ -160,6 +164,7 @@ impl LanguageKind { Self::Haskell => tree_sitter_haskell::LANGUAGE.into(), Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), + Self::Groovy => tree_sitter_groovy::LANGUAGE.into(), } } @@ -175,7 +180,7 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, + OcamlInterface, Groovy, ] } } @@ -244,14 +249,15 @@ mod tests { | LanguageKind::Zig | LanguageKind::Haskell | LanguageKind::Ocaml - | LanguageKind::OcamlInterface => (), + | LanguageKind::OcamlInterface + | LanguageKind::Groovy => (), }; // IMPORTANT: this constant must equal the number of arms in the match // above AND the length of the slice returned by `LanguageKind::all()`. // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 24; + const EXPECTED_LEN: usize = 25; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index 653cbd59b..f064fc837 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -153,6 +153,12 @@ const OCAML_AST_TYPES: Record = { string: 'string', }; +const GROOVY_AST_TYPES: Record = { + object_creation_expression: 'new', + throw_statement: 'throw', + string_literal: 'string', +}; + export const AST_TYPE_MAPS: Map> = new Map([ ['javascript', JS_AST_TYPES], ['typescript', JS_AST_TYPES], @@ -177,6 +183,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['haskell', HASKELL_AST_TYPES], ['ocaml', OCAML_AST_TYPES], ['ocaml-interface', OCAML_AST_TYPES], + ['groovy', GROOVY_AST_TYPES], ]); // ─── Per-language string-extraction config ─────────────────────────────── @@ -211,6 +218,7 @@ const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' }; const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; +const GROOVY_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: '' }; export const AST_STRING_CONFIGS: Map = new Map([ ['javascript', JS_STRING_CONFIG], @@ -236,6 +244,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['haskell', HASKELL_STRING_CONFIG], ['ocaml', OCAML_STRING_CONFIG], ['ocaml-interface', OCAML_STRING_CONFIG], + ['groovy', GROOVY_STRING_CONFIG], ]); // ─── Per-language "stop-after-collect" kinds ───────────────────────────── diff --git a/src/domain/parser.ts b/src/domain/parser.ts index f1c7dd809..a71bff979 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -471,6 +471,8 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet = new Set([ '.hs', '.ml', '.mli', + '.groovy', + '.gvy', ]); /** diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 24aee1d53..75959b99c 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -23,11 +23,10 @@ describe('classifyNativeDrops', () => { 'src/f.erl', 'src/g.sol', 'src/h.cu', - 'src/i.groovy', 'src/j.v', 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(11); + expect(totals['unsupported-by-native']).toBe(10); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']); From 74cd047d42e2eca2e0424010e97a96b33092904b Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 20:23:52 -0600 Subject: [PATCH 2/6] fix(groovy): match JS enum_constant fallback and defensive gstring entries Address Greptile review on PR #1101: - Rust enum handler now accepts both `enum_constant` and `identifier` children, matching the JS source-of-truth in `handleGroovyEnumDecl` (groovy.ts:163). Without this, Groovy enums whose grammar emits bare identifier nodes had no extracted members in the native engine. - Add `gstring` defensively to GROOVY_AST_CONFIG (Rust) and GROOVY_AST_TYPES (TS). tree-sitter-groovy 0.1.x emits `string_literal` for both quote styles, but this keeps both engines resilient to grammar variants. - Clarify module doc: `juxt_function_call` was previously listed alongside features the extractor handles. It is intentionally unhandled (matches JS). Tracked in #1108 for adding support to both engines. --- crates/codegraph-core/src/extractors/groovy.rs | 14 +++++++++----- crates/codegraph-core/src/extractors/helpers.rs | 5 ++++- src/ast-analysis/rules/index.ts | 4 ++++ 3 files changed, 17 insertions(+), 6 deletions(-) diff --git a/crates/codegraph-core/src/extractors/groovy.rs b/crates/codegraph-core/src/extractors/groovy.rs index ac50e884f..6c24c7eff 100644 --- a/crates/codegraph-core/src/extractors/groovy.rs +++ b/crates/codegraph-core/src/extractors/groovy.rs @@ -8,16 +8,20 @@ use tree_sitter::{Node, Tree}; /// Groovy extractor — mirrors `extractGroovySymbols` in `src/extractors/groovy.ts`. /// /// Groovy is a JVM language with Java-like class/interface/enum structures plus -/// closures (`function_definition`), `juxt_function_call`, and dynamic typing. -/// The tree-sitter-groovy grammar shares many node kinds with tree-sitter-java -/// (`class_declaration`, `method_declaration`, `method_invocation`, -/// `object_creation_expression`, `import_declaration`). +/// closures (`function_definition`) and dynamic typing. The tree-sitter-groovy +/// grammar shares many node kinds with tree-sitter-java (`class_declaration`, +/// `method_declaration`, `method_invocation`, `object_creation_expression`, +/// `import_declaration`). /// /// The JS source-of-truth extractor handles a superset of node kinds for /// resilience across grammar variants (`class_definition`, `interface_definition`, /// `method_definition`, `function_declaration`, `import_statement`, `call_expression`, /// `method_call`, `function_call`, `member_access`); the Rust port mirrors those /// arms so engine parity holds even if a future grammar version renames nodes. +/// +/// Note: `juxt_function_call` (Groovy command-style calls like `foo bar(x)`) +/// is not dispatched here — the JS extractor also omits it. Tracked in #1108 +/// for adding support to both engines. pub struct GroovyExtractor; impl SymbolExtractor for GroovyExtractor { @@ -170,7 +174,7 @@ fn handle_enum_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { if let Some(body) = body { for i in 0..body.child_count() { let Some(child) = body.child(i) else { continue }; - if child.kind() == "enum_constant" { + if child.kind() == "enum_constant" || child.kind() == "identifier" { let name = child.child_by_field_name("name").unwrap_or(child); members.push(child_def( node_text(&name, source).to_string(), diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index a07a25f2f..7f764b247 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -364,7 +364,10 @@ pub const GROOVY_AST_CONFIG: LangAstConfig = LangAstConfig { new_types: &["object_creation_expression"], throw_types: &["throw_statement"], await_types: &[], - string_types: &["string_literal"], + // `gstring` listed defensively: tree-sitter-groovy 0.1.x emits `string_literal` + // for both single- and double-quoted strings, but some grammar variants use + // `gstring` for double-quoted / interpolated strings. Mirrors TS config. + string_types: &["string_literal", "gstring"], regex_types: &[], quote_chars: &['\'', '"'], string_prefixes: &[], diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index f064fc837..36b5e85b1 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -157,6 +157,10 @@ const GROOVY_AST_TYPES: Record = { object_creation_expression: 'new', throw_statement: 'throw', string_literal: 'string', + // `gstring` listed defensively: tree-sitter-groovy 0.1.x emits `string_literal` + // for both single- and double-quoted strings, but some grammar variants use + // `gstring` for double-quoted / interpolated strings. + gstring: 'string', }; export const AST_TYPE_MAPS: Map> = new Map([ From cf6c4127367550cc22504ed4b8da462b4fe6858d Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 13 May 2026 03:43:00 -0600 Subject: [PATCH 3/6] test: fix expected count after merge with main (#1101) --- tests/parsers/native-drop-classification.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index b5f4159fc..8bbc385a9 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -23,7 +23,7 @@ describe('classifyNativeDrops', () => { 'src/j.v', 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(8); + expect(totals['unsupported-by-native']).toBe(7); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']); From ce483b089df66c427dbfe80d0d81593869a29bfc Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 13 May 2026 07:33:46 -0600 Subject: [PATCH 4/6] fix: match JS argument-field fallback order in groovy call extractor (#1101) --- crates/codegraph-core/src/extractors/groovy.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/codegraph-core/src/extractors/groovy.rs b/crates/codegraph-core/src/extractors/groovy.rs index 6c24c7eff..b3f2a8720 100644 --- a/crates/codegraph-core/src/extractors/groovy.rs +++ b/crates/codegraph-core/src/extractors/groovy.rs @@ -315,9 +315,15 @@ fn handle_call_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) { let field = func_node .child_by_field_name("field") .or_else(|| func_node.child_by_field_name("property")); + // Mirrors `handleGroovyCallExpr` in groovy.ts: tries the `argument` + // field first (used by some tree-sitter grammar variants), then + // falls back to `object`. tree-sitter-groovy 0.1.x only emits + // `object`, so `argument` is currently dead — but removing it + // would diverge from the JS engine and silently drop receivers + // on any future grammar variant that uses `argument`. let obj = func_node - .child_by_field_name("object") - .or_else(|| func_node.child_by_field_name("argument")); + .child_by_field_name("argument") + .or_else(|| func_node.child_by_field_name("object")); if let Some(field) = field { symbols.calls.push(Call { name: node_text(&field, source).to_string(), From c44df0afe889444cf16f4f688f0c19cc92d740ad Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 13 May 2026 22:28:51 -0600 Subject: [PATCH 5/6] docs(native): clarify member_access dispatch in Groovy extractor (#1101) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove member_access from the docstring's list of top-level dispatch arms — it was never a top-level case in either the JS or Rust extractor. Both engines only match member_access as a callee sub-node inside handle_call_expr/handleGroovyCallExpr. The previous wording could mislead future readers into thinking the arm was missing. --- crates/codegraph-core/src/extractors/groovy.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/crates/codegraph-core/src/extractors/groovy.rs b/crates/codegraph-core/src/extractors/groovy.rs index b3f2a8720..3e0523fb0 100644 --- a/crates/codegraph-core/src/extractors/groovy.rs +++ b/crates/codegraph-core/src/extractors/groovy.rs @@ -16,8 +16,12 @@ use tree_sitter::{Node, Tree}; /// The JS source-of-truth extractor handles a superset of node kinds for /// resilience across grammar variants (`class_definition`, `interface_definition`, /// `method_definition`, `function_declaration`, `import_statement`, `call_expression`, -/// `method_call`, `function_call`, `member_access`); the Rust port mirrors those -/// arms so engine parity holds even if a future grammar version renames nodes. +/// `method_call`, `function_call`); the Rust port mirrors those arms so engine +/// parity holds even if a future grammar version renames nodes. +/// +/// Note: `member_access` is not a top-level dispatch kind in either engine — it +/// is only matched as a callee sub-node inside `handle_call_expr` when examining +/// the `function`/`method` field of a call. /// /// Note: `juxt_function_call` (Groovy command-style calls like `foo bar(x)`) /// is not dispatched here — the JS extractor also omits it. Tracked in #1108 From e9456530835c44e2fa2396db533d4bb48941a215 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 13 May 2026 22:58:52 -0600 Subject: [PATCH 6/6] test(bench): exempt fnDeps depth 3 from regression gate after main merge (#1101) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The merge of #1102 (R) + #1103 (Erlang) + #1100 (Solidity) into the Groovy branch expanded the self-build benchmark graph far enough that fnDeps depth 3 now crosses the 25% gate threshold (+88%, 24.3 → 45.6ms) on a sub-50ms metric. Same root cause already exempted for depth 1, depth 5, and Query time in #1113. --- tests/benchmarks/regression-guard.test.ts | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/benchmarks/regression-guard.test.ts b/tests/benchmarks/regression-guard.test.ts index fae6a390d..1b076a4c4 100644 --- a/tests/benchmarks/regression-guard.test.ts +++ b/tests/benchmarks/regression-guard.test.ts @@ -186,6 +186,17 @@ const SKIP_VERSIONS = new Set(['3.8.0']); * its own depth-5 subtree). +31% over the 25% threshold maps to the * ~33→43ms swing on a sub-50ms metric. Tracked in #1113 alongside * Query time; remove both once 3.11.0+ data confirms the new steady-state. + * + * - 3.10.0:fnDeps depth 3 — same cause as depth 1 and depth 5. Merging main + * into #1101 (Groovy) layered the Groovy extractor on top of the + * Solidity + R + Erlang growth that already inflated the depth-1 and + * depth-5 baselines. The depth-3 walk sits between those two and + * regresses for the same reason: the self-build benchmark's + * `buildGraph` callee graph grew, so every fnDeps depth walks a larger + * transitive set. +88% over the 25% threshold on a sub-50ms metric + * (24.3 → 45.6ms) is consistent with the other depths. Tracked in #1113 + * alongside depth 1, depth 5, and Query time; remove all four once + * 3.11.0+ data confirms the new steady-state. */ const KNOWN_REGRESSIONS = new Set([ '3.9.6:Build ms/file', @@ -196,6 +207,7 @@ const KNOWN_REGRESSIONS = new Set([ '3.10.0:No-op rebuild', '3.10.0:1-file rebuild', '3.10.0:fnDeps depth 1', + '3.10.0:fnDeps depth 3', '3.10.0:fnDeps depth 5', '3.10.0:Query time', ]);