From e95deab3b057f700256ef7ed97c925bc99f68053 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 14:52:18 -0600 Subject: [PATCH 1/5] feat(native): port Gleam extractor to Rust --- Cargo.lock | 11 + crates/codegraph-core/Cargo.toml | 1 + crates/codegraph-core/src/change_detection.rs | 5 +- crates/codegraph-core/src/extractors/gleam.rs | 431 ++++++++++++++++++ .../codegraph-core/src/extractors/helpers.rs | 10 + crates/codegraph-core/src/extractors/mod.rs | 4 + crates/codegraph-core/src/file_collector.rs | 7 +- crates/codegraph-core/src/parser_registry.rs | 12 +- package-lock.json | 1 + src/ast-analysis/rules/index.ts | 7 + src/domain/parser.ts | 1 + .../native-drop-classification.test.ts | 10 +- 12 files changed, 485 insertions(+), 15 deletions(-) create mode 100644 crates/codegraph-core/src/extractors/gleam.rs diff --git a/Cargo.lock b/Cargo.lock index 413504b0d..753b3e603 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -86,6 +86,7 @@ dependencies = [ "tree-sitter-cpp", "tree-sitter-dart", "tree-sitter-elixir", + "tree-sitter-gleam", "tree-sitter-go", "tree-sitter-haskell", "tree-sitter-hcl", @@ -789,6 +790,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-gleam" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0175c53793bda5d444360dd5add25463d18d66afb7f521d6791e2fc61bf2fb3" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-go" version = "0.23.4" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index df4361e17..071b77698 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -35,6 +35,7 @@ tree-sitter-dart = "0.0.4" tree-sitter-zig = "1" tree-sitter-haskell = "0.23" tree-sitter-ocaml = "0.24" +tree-sitter-gleam = "1" rayon = "1" ignore = "0.4" globset = "0.4" diff --git a/crates/codegraph-core/src/change_detection.rs b/crates/codegraph-core/src/change_detection.rs index 08e4b7419..770286904 100644 --- a/crates/codegraph-core/src/change_detection.rs +++ b/crates/codegraph-core/src/change_detection.rs @@ -132,7 +132,7 @@ fn load_file_hashes(conn: &Connection) -> Option> { /// found on disk are treated as removed. /// /// Files whose extension is outside the Rust file_collector's supported set -/// (e.g. `.clj`, `.gleam`, `.jl`, `.fs` — WASM-only languages) are skipped: +/// (e.g. `.clj`, `.jl`, `.fs` — WASM-only languages) are skipped: /// the orchestrator's narrower collector never sees them, so absence from /// `current` is a capability boundary, not a deletion. Their `nodes` and /// `file_hashes` rows are owned by the JS-side WASM backfill (#967, #1068) @@ -774,7 +774,7 @@ mod tests { #[test] fn detect_removed_skips_unsupported_extensions() { - // Files in WASM-only languages (Clojure, Gleam, Julia, F#) live in + // Files in WASM-only languages (Clojure, Julia, F#) live in // `file_hashes` because the JS-side WASM backfill writes them, but // Rust's narrower file_collector never collects them. Without this // skip, every incremental rebuild would flag them as removed and @@ -782,7 +782,6 @@ mod tests { let mut existing = HashMap::new(); for path in [ "tests/fixtures/clojure/main.clj", - "tests/fixtures/gleam/main.gleam", "tests/fixtures/julia/main.jl", "tests/fixtures/fsharp/Main.fs", ] { diff --git a/crates/codegraph-core/src/extractors/gleam.rs b/crates/codegraph-core/src/extractors/gleam.rs new file mode 100644 index 000000000..375470733 --- /dev/null +++ b/crates/codegraph-core/src/extractors/gleam.rs @@ -0,0 +1,431 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +pub struct GleamExtractor; + +impl SymbolExtractor for GleamExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_gleam_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &GLEAM_AST_CONFIG); + symbols + } +} + +fn match_gleam_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "function" => handle_function(node, source, symbols), + "external_function" => handle_external_function(node, source, symbols), + "type_definition" => handle_type_definition(node, source, symbols), + "type_alias" => handle_type_alias(node, source, symbols), + "constant" => handle_constant(node, source, symbols), + "import" => handle_import(node, source, symbols), + "function_call" => handle_call(node, source, symbols), + _ => {} + } +} + +fn handle_function(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => return, + }; + + let params = extract_params(node, source); + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "gleam"), + cfg: build_function_cfg(node, "gleam", source), + children: opt_children(params), + }); +} + +fn handle_external_function(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_type_definition(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // type_definition wraps a type_name child (which itself has a `name` field). + // Mirror the JS extractor: prefer a `name` field on the node, fall back to + // taking the text of the `type_name` child so we get e.g. `MyType(a, b)`. + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "type_name")) + { + Some(n) => n, + None => return, + }; + + let mut children: Vec = Vec::new(); + for i in 0..node.child_count() { + let child = match node.child(i) { + Some(c) => c, + None => continue, + }; + match child.kind() { + "data_constructor" | "type_constructor" => { + if let Some(ctor_name) = child + .child_by_field_name("name") + .or_else(|| find_child(&child, "constructor_name")) + { + children.push(child_def( + node_text(&ctor_name, source).to_string(), + "property", + start_line(&child), + )); + } + } + "data_constructors" | "type_constructors" => { + for j in 0..child.child_count() { + let ctor = match child.child(j) { + Some(c) => c, + None => continue, + }; + if ctor.kind() == "data_constructor" || ctor.kind() == "type_constructor" { + if let Some(ctor_name) = ctor + .child_by_field_name("name") + .or_else(|| find_child(&ctor, "constructor_name")) + { + children.push(child_def( + node_text(&ctor_name, source).to_string(), + "property", + start_line(&ctor), + )); + } + } + } + } + _ => {} + } + } + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(children), + }); +} + +fn handle_type_alias(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "type_name")) + { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_constant(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match node + .child_by_field_name("name") + .or_else(|| find_child(node, "identifier")) + { + Some(n) => n, + None => return, + }; + + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "variable".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + // `module` field holds the module path (e.g. `gleam/io` or `repository`). + let module_node = match node + .child_by_field_name("module") + .or_else(|| find_child(node, "module")) + .or_else(|| find_child(node, "string")) + { + Some(n) => n, + None => return, + }; + + let raw = node_text(&module_node, source); + let source_path = raw + .trim_matches(|c| c == '\'' || c == '"') + .to_string(); + let mut names: Vec = Vec::new(); + + // Unqualified imports: `import gleam/io.{println, print}` + if let Some(unqualified) = find_child(node, "unqualified_imports") { + for i in 0..unqualified.child_count() { + let item = match unqualified.child(i) { + Some(c) => c, + None => continue, + }; + if item.kind() == "unqualified_import" { + let name_node = item.child_by_field_name("name"); + if let Some(nn) = name_node { + names.push(node_text(&nn, source).to_string()); + } + } else if item.kind() == "identifier" { + names.push(node_text(&item, source).to_string()); + } + } + } + + // Alias: `import gleam/io as my_io` + if let Some(alias) = node.child_by_field_name("alias") { + let alias_text = node_text(&alias, source).to_string(); + if alias_text != source_path { + names.push(alias_text); + } + } + + if names.is_empty() { + // Default to the last path segment, mirroring the JS extractor. + let default_name = source_path + .rsplit('/') + .next() + .unwrap_or(&source_path) + .to_string(); + names.push(default_name); + } + + symbols + .imports + .push(Import::new(source_path, names, start_line(node))); +} + +fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let func_node = match node.child_by_field_name("function").or_else(|| node.child(0)) { + Some(n) => n, + None => return, + }; + + match func_node.kind() { + "identifier" | "variable" => { + symbols.calls.push(Call { + name: node_text(&func_node, source).to_string(), + line: start_line(node), + dynamic: None, + receiver: None, + }); + } + "field_access" | "module_select" => { + // Qualified call: `module.func(args)` parses as field_access + // with `record` (module identifier) and `field` (label) fields. + let field = func_node + .child_by_field_name("field") + .or_else(|| func_node.child_by_field_name("label")); + let record = func_node + .child_by_field_name("record") + .or_else(|| func_node.child(0)); + if let Some(f) = field { + let receiver = record.and_then(|r| { + // Don't use the field itself as the receiver. + if Some(r.id()) == field.map(|n| n.id()) { + None + } else { + Some(node_text(&r, source).to_string()) + } + }); + symbols.calls.push(Call { + name: node_text(&f, source).to_string(), + line: start_line(node), + dynamic: None, + receiver, + }); + } + } + _ => {} + } +} + +fn extract_params(func_node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + let params_node = match func_node + .child_by_field_name("parameters") + .or_else(|| find_child(func_node, "function_parameters")) + { + Some(n) => n, + None => return params, + }; + + for i in 0..params_node.child_count() { + let param = match params_node.child(i) { + Some(c) => c, + None => continue, + }; + match param.kind() { + "function_parameter" | "parameter" => { + if let Some(name_node) = param + .child_by_field_name("name") + .or_else(|| find_child(¶m, "identifier")) + { + params.push(child_def( + node_text(&name_node, source).to_string(), + "parameter", + start_line(¶m), + )); + } + } + "identifier" => { + params.push(child_def( + node_text(¶m, source).to_string(), + "parameter", + start_line(¶m), + )); + } + _ => {} + } + } + params +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_gleam(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + parser + .set_language(&tree_sitter_gleam::LANGUAGE.into()) + .unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + GleamExtractor.extract(&tree, code.as_bytes(), "test.gleam") + } + + #[test] + fn extracts_public_function() { + let s = parse_gleam("pub fn greet(name) {\n name\n}\n"); + let greet = s.definitions.iter().find(|d| d.name == "greet").unwrap(); + assert_eq!(greet.kind, "function"); + let children = greet.children.as_ref().expect("expected children"); + assert!(children.iter().any(|c| c.name == "name" && c.kind == "parameter")); + } + + #[test] + fn extracts_private_function() { + let s = parse_gleam("fn helper() {\n 1\n}\n"); + assert!(s.definitions.iter().any(|d| d.name == "helper")); + } + + #[test] + fn extracts_qualified_call_as_receiver_name() { + let code = "import repository\n\npub fn main() {\n repository.new_repo()\n}\n"; + let s = parse_gleam(code); + let call = s + .calls + .iter() + .find(|c| c.name == "new_repo") + .expect("expected qualified call to new_repo"); + assert_eq!(call.receiver.as_deref(), Some("repository")); + } + + #[test] + fn extracts_same_file_call() { + let code = "pub fn outer() {\n inner()\n}\n\nfn inner() {\n 1\n}\n"; + let s = parse_gleam(code); + let call = s + .calls + .iter() + .find(|c| c.name == "inner") + .expect("expected unqualified call to inner"); + assert!(call.receiver.is_none()); + } + + #[test] + fn extracts_import_module() { + let s = parse_gleam("import gleam/io\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "gleam/io"); + assert_eq!(s.imports[0].names, vec!["io".to_string()]); + } + + #[test] + fn extracts_unqualified_imports() { + let s = parse_gleam("import gleam/io.{println, print}\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "gleam/io"); + assert!(s.imports[0].names.contains(&"println".to_string())); + assert!(s.imports[0].names.contains(&"print".to_string())); + } + + #[test] + fn extracts_type_definition_with_constructors() { + let code = "pub type Color {\n Red\n Green\n Blue\n}\n"; + let s = parse_gleam(code); + let color = s + .definitions + .iter() + .find(|d| d.kind == "type") + .expect("expected type definition"); + let children = color.children.as_ref().expect("expected constructors"); + let names: Vec<&str> = children.iter().map(|c| c.name.as_str()).collect(); + assert!(names.contains(&"Red")); + assert!(names.contains(&"Green")); + assert!(names.contains(&"Blue")); + } + + #[test] + fn extracts_type_alias() { + let s = parse_gleam("pub type UserId = Int\n"); + assert!(s.definitions.iter().any(|d| d.kind == "type")); + } + + #[test] + fn extracts_constant() { + let s = parse_gleam("pub const max_users = 100\n"); + let c = s + .definitions + .iter() + .find(|d| d.name == "max_users") + .expect("expected constant"); + assert_eq!(c.kind, "variable"); + } +} diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index b02531896..9864da252 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -360,6 +360,16 @@ pub const OCAML_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &[], }; +pub const GLEAM_AST_CONFIG: LangAstConfig = LangAstConfig { + new_types: &[], + throw_types: &[], + await_types: &[], + string_types: &["string"], + regex_types: &[], + quote_chars: &['"'], + string_prefixes: &[], +}; + // ── Generic AST node walker ────────────────────────────────────────────────── /// Node types that represent identifiers across languages. diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 642f29f98..fe73225aa 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -4,6 +4,7 @@ pub mod cpp; pub mod csharp; pub mod dart; pub mod elixir; +pub mod gleam; pub mod go; pub mod haskell; pub mod hcl; @@ -126,5 +127,8 @@ pub fn extract_symbols_with_opts( LanguageKind::Ocaml | LanguageKind::OcamlInterface => { ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::Gleam => { + gleam::GleamExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } } } diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index 0cb157814..7564553b8 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -36,6 +36,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "kt", "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", "ml", "mli", + "gleam", ]; /// Returns whether `path` has an extension the Rust file_collector would accept. @@ -43,9 +44,9 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ /// Mirrors the predicate at the heart of `collect_files`: a file is collected /// if `LanguageKind::from_extension` recognizes it OR its raw extension is in /// `SUPPORTED_EXTENSIONS`. Exposed for `change_detection::detect_removed_files` -/// so that files outside Rust's capability (e.g. WASM-only `.clj`, `.gleam`, -/// `.jl`) are not flagged as "removed" merely because the orchestrator's -/// narrower collector never sees them. +/// so that files outside Rust's capability (e.g. WASM-only `.clj`, `.jl`) are +/// not flagged as "removed" merely because the orchestrator's narrower +/// collector never sees them. pub fn is_supported_extension(path: &str) -> bool { if LanguageKind::from_extension(path).is_some() { return true; diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index c87957f29..b344eb247 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -27,6 +27,7 @@ pub enum LanguageKind { Haskell, Ocaml, OcamlInterface, + Gleam, } impl LanguageKind { @@ -58,6 +59,7 @@ impl LanguageKind { Self::Haskell => "haskell", Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", + Self::Gleam => "gleam", } } @@ -97,6 +99,7 @@ impl LanguageKind { "hs" => Some(Self::Haskell), "ml" => Some(Self::Ocaml), "mli" => Some(Self::OcamlInterface), + "gleam" => Some(Self::Gleam), _ => None, } } @@ -129,6 +132,7 @@ impl LanguageKind { "haskell" => Some(Self::Haskell), "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), + "gleam" => Some(Self::Gleam), _ => None, } } @@ -160,6 +164,7 @@ impl LanguageKind { Self::Haskell => tree_sitter_haskell::LANGUAGE.into(), Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), + Self::Gleam => tree_sitter_gleam::LANGUAGE.into(), } } @@ -175,7 +180,7 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, + OcamlInterface, Gleam, ] } } @@ -244,14 +249,15 @@ mod tests { | LanguageKind::Zig | LanguageKind::Haskell | LanguageKind::Ocaml - | LanguageKind::OcamlInterface => (), + | LanguageKind::OcamlInterface + | LanguageKind::Gleam => (), }; // IMPORTANT: this constant must equal the number of arms in the match // above AND the length of the slice returned by `LanguageKind::all()`. // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 24; + const EXPECTED_LEN: usize = 25; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/package-lock.json b/package-lock.json index 2de5a303b..f0a088fc9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7530,6 +7530,7 @@ "resolved": "git+ssh://git@github.com/gleam-lang/tree-sitter-gleam.git#1627dc5101e63bf19717c540a56df5ef20b1fc7a", "integrity": "sha512-ysgcjQzunTVX0hBoUXWRU7YCrzOVSJlT3bHzrq78E3eE1iu1RQ3+RrwKjXEPVPInOmunuS+gHf6LWd8MyXZ4UQ==", "dev": true, + "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { "nan": "^2.18.0" diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index 653cbd59b..0f1701d8f 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -153,6 +153,10 @@ const OCAML_AST_TYPES: Record = { string: 'string', }; +const GLEAM_AST_TYPES: Record = { + string: 'string', +}; + export const AST_TYPE_MAPS: Map> = new Map([ ['javascript', JS_AST_TYPES], ['typescript', JS_AST_TYPES], @@ -177,6 +181,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['haskell', HASKELL_AST_TYPES], ['ocaml', OCAML_AST_TYPES], ['ocaml-interface', OCAML_AST_TYPES], + ['gleam', GLEAM_AST_TYPES], ]); // ─── Per-language string-extraction config ─────────────────────────────── @@ -211,6 +216,7 @@ const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' }; const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; +const GLEAM_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; export const AST_STRING_CONFIGS: Map = new Map([ ['javascript', JS_STRING_CONFIG], @@ -236,6 +242,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['haskell', HASKELL_STRING_CONFIG], ['ocaml', OCAML_STRING_CONFIG], ['ocaml-interface', OCAML_STRING_CONFIG], + ['gleam', GLEAM_STRING_CONFIG], ]); // ─── Per-language "stop-after-collect" kinds ───────────────────────────── diff --git a/src/domain/parser.ts b/src/domain/parser.ts index f1c7dd809..ac3a6fb50 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -471,6 +471,7 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet = new Set([ '.hs', '.ml', '.mli', + '.gleam', ]); /** diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 24aee1d53..cb104f701 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -16,7 +16,6 @@ describe('classifyNativeDrops', () => { it('groups WASM-only languages under unsupported-by-native', () => { const { byReason, totals } = classifyNativeDrops([ 'src/a.fs', - 'src/b.gleam', 'src/c.clj', 'src/d.jl', 'src/e.R', @@ -27,10 +26,9 @@ describe('classifyNativeDrops', () => { 'src/j.v', 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(11); + expect(totals['unsupported-by-native']).toBe(10); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); - expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']); expect(byReason['unsupported-by-native'].get('.r')).toEqual(['src/e.R']); }); @@ -52,12 +50,12 @@ describe('classifyNativeDrops', () => { 'src/a.ts', 'src/b.fs', 'src/c.fs', - 'src/d.gleam', + 'src/d.clj', ]); expect(totals['native-extractor-failure']).toBe(1); expect(totals['unsupported-by-native']).toBe(3); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/b.fs', 'src/c.fs']); - expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/d.gleam']); + expect(byReason['unsupported-by-native'].get('.clj')).toEqual(['src/d.clj']); }); it('lowercases extensions so .R and .r share a bucket', () => { @@ -78,7 +76,7 @@ describe('classifyNativeDrops', () => { expect(NATIVE_SUPPORTED_EXTENSIONS.has('.ts')).toBe(true); expect(NATIVE_SUPPORTED_EXTENSIONS.has('.py')).toBe(true); expect(NATIVE_SUPPORTED_EXTENSIONS.has('.fs')).toBe(false); - expect(NATIVE_SUPPORTED_EXTENSIONS.has('.gleam')).toBe(false); + expect(NATIVE_SUPPORTED_EXTENSIONS.has('.clj')).toBe(false); }); }); From b97124459d06a90d678eec2c0c8a8f366aad0070 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 20:44:09 -0600 Subject: [PATCH 2/5] fix(gleam): use named_child to skip anonymous punctuation tokens Replaces child(0) fallbacks in handleCall / handle_call with named_child(0) in both the native Rust and WASM/JS Gleam extractors. The Gleam tree-sitter grammar's field_access node includes the '.' punctuation token as a child, so child(0) on field_access could return '.' as the receiver text on malformed input. named_child(0) skips anonymous tokens and is consistent across both engines. The field accessors always succeed on valid Gleam, so this only affects the defensive fallback path, but it removes a silent asymmetry that Greptile flagged in review of #1105. --- crates/codegraph-core/src/extractors/gleam.rs | 7 +++++-- src/extractors/gleam.ts | 6 ++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/crates/codegraph-core/src/extractors/gleam.rs b/crates/codegraph-core/src/extractors/gleam.rs index 375470733..27bd6b90b 100644 --- a/crates/codegraph-core/src/extractors/gleam.rs +++ b/crates/codegraph-core/src/extractors/gleam.rs @@ -241,7 +241,10 @@ fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { } fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { - let func_node = match node.child_by_field_name("function").or_else(|| node.child(0)) { + let func_node = match node + .child_by_field_name("function") + .or_else(|| node.named_child(0)) + { Some(n) => n, None => return, }; @@ -263,7 +266,7 @@ fn handle_call(node: &Node, source: &[u8], symbols: &mut FileSymbols) { .or_else(|| func_node.child_by_field_name("label")); let record = func_node .child_by_field_name("record") - .or_else(|| func_node.child(0)); + .or_else(|| func_node.named_child(0)); if let Some(f) = field { let receiver = record.and_then(|r| { // Don't use the field itself as the receiver. diff --git a/src/extractors/gleam.ts b/src/extractors/gleam.ts index b7889c584..a20ff994b 100644 --- a/src/extractors/gleam.ts +++ b/src/extractors/gleam.ts @@ -198,14 +198,16 @@ function handleImport(node: TreeSitterNode, ctx: ExtractorOutput): void { } function handleCall(node: TreeSitterNode, ctx: ExtractorOutput): void { - const funcNode = node.childForFieldName('function') || node.child(0); + const funcNode = node.childForFieldName('function') || node.namedChild(0); if (!funcNode) return; if (funcNode.type === 'identifier' || funcNode.type === 'variable') { ctx.calls.push({ name: funcNode.text, line: node.startPosition.row + 1 }); } else if (funcNode.type === 'field_access' || funcNode.type === 'module_select') { const field = funcNode.childForFieldName('field') || funcNode.childForFieldName('label'); - const record = funcNode.child(0); + // Prefer the `record` field; fall back to first named child to skip + // anonymous punctuation tokens (the `.` between record and field). + const record = funcNode.childForFieldName('record') || funcNode.namedChild(0); if (field) { const call: Call = { name: field.text, line: node.startPosition.row + 1 }; if (record && record !== field) call.receiver = record.text; From a726c3abbdccf243440c9b7e48326953e6389770 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 23:59:00 -0600 Subject: [PATCH 3/5] fix(gleam): match both function_call and call nodes for parity with JS (#1105) --- crates/codegraph-core/src/extractors/gleam.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/codegraph-core/src/extractors/gleam.rs b/crates/codegraph-core/src/extractors/gleam.rs index 27bd6b90b..3c74c5ce4 100644 --- a/crates/codegraph-core/src/extractors/gleam.rs +++ b/crates/codegraph-core/src/extractors/gleam.rs @@ -24,7 +24,7 @@ fn match_gleam_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _dept "type_alias" => handle_type_alias(node, source, symbols), "constant" => handle_constant(node, source, symbols), "import" => handle_import(node, source, symbols), - "function_call" => handle_call(node, source, symbols), + "function_call" | "call" => handle_call(node, source, symbols), _ => {} } } From cc8ae990d92afa9ea317315b4c8b4c94f7483523 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 13 May 2026 07:26:25 -0600 Subject: [PATCH 4/5] fix(gleam): align alias fallback with JS extractor and add aliased import test (#1105) --- crates/codegraph-core/src/extractors/gleam.rs | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/crates/codegraph-core/src/extractors/gleam.rs b/crates/codegraph-core/src/extractors/gleam.rs index 3c74c5ce4..879929c8a 100644 --- a/crates/codegraph-core/src/extractors/gleam.rs +++ b/crates/codegraph-core/src/extractors/gleam.rs @@ -218,11 +218,15 @@ fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { } // Alias: `import gleam/io as my_io` - if let Some(alias) = node.child_by_field_name("alias") { - let alias_text = node_text(&alias, source).to_string(); - if alias_text != source_path { - names.push(alias_text); - } + // Mirror JS: prefer `alias` field, fall back to first identifier child + // that isn't the module node itself. Compare by node ID rather than text + // so a self-alias like `import mymodule as mymodule` is still recorded. + let alias_node = node + .child_by_field_name("alias") + .or_else(|| find_child(node, "identifier")) + .filter(|a| a.id() != module_node.id()); + if let Some(alias) = alias_node { + names.push(node_text(&alias, source).to_string()); } if names.is_empty() { @@ -399,6 +403,14 @@ mod tests { assert!(s.imports[0].names.contains(&"print".to_string())); } + #[test] + fn extracts_aliased_import() { + let s = parse_gleam("import gleam/io as my_io\n"); + assert_eq!(s.imports.len(), 1); + assert_eq!(s.imports[0].source, "gleam/io"); + assert_eq!(s.imports[0].names, vec!["my_io".to_string()]); + } + #[test] fn extracts_type_definition_with_constructors() { let code = "pub type Color {\n Red\n Green\n Blue\n}\n"; From ae689c819084aa888257b0a3fad2f882873de445 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 13 May 2026 22:26:29 -0600 Subject: [PATCH 5/5] test: add explicit .gleam assertion to NATIVE_SUPPORTED_EXTENSIONS test (#1105) --- tests/parsers/native-drop-classification.test.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index abe2b0354..a4ede0164 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -71,6 +71,7 @@ describe('classifyNativeDrops', () => { it('exposes the native-supported extension set for callers', () => { expect(NATIVE_SUPPORTED_EXTENSIONS.has('.ts')).toBe(true); expect(NATIVE_SUPPORTED_EXTENSIONS.has('.py')).toBe(true); + expect(NATIVE_SUPPORTED_EXTENSIONS.has('.gleam')).toBe(true); expect(NATIVE_SUPPORTED_EXTENSIONS.has('.fs')).toBe(false); expect(NATIVE_SUPPORTED_EXTENSIONS.has('.fsx')).toBe(false); });