From 61f8a9d63c552fa1dde354973a63b199dee229ce Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 14:55:46 -0600 Subject: [PATCH 1/7] feat(native): port Objective-C extractor to Rust Mirrors `src/extractors/objc.ts` in `crates/codegraph-core/src/extractors/objc.rs`. Adds the `tree-sitter-objc` dependency, wires `LanguageKind::ObjC` (`.m`) in the Rust `parser_registry` and `file_collector`, adds `.m` to `NATIVE_SUPPORTED_EXTENSIONS` on the JS side, and registers `OBJC_AST_TYPES` / `OBJC_AST_CONFIG` so the native and WASM engines extract identical `ast_nodes` for Objective-C source. Handles class interfaces / implementations (with `: Superclass`), protocols, instance and class method declarations/definitions with multi-part selectors assembled from leading identifiers and `method_parameter` children, C-level function declarations/definitions, `#import`/`@import` imports, and message expression call extraction. --- Cargo.lock | 11 + crates/codegraph-core/Cargo.toml | 1 + .../codegraph-core/src/extractors/helpers.rs | 13 + crates/codegraph-core/src/extractors/mod.rs | 4 + crates/codegraph-core/src/extractors/objc.rs | 775 ++++++++++++++++++ crates/codegraph-core/src/file_collector.rs | 1 + crates/codegraph-core/src/parser_registry.rs | 12 +- package-lock.json | 1 + src/ast-analysis/rules/index.ts | 8 + src/domain/parser.ts | 1 + .../native-drop-classification.test.ts | 3 +- 11 files changed, 825 insertions(+), 5 deletions(-) create mode 100644 crates/codegraph-core/src/extractors/objc.rs diff --git a/Cargo.lock b/Cargo.lock index 413504b0d..79ba3a55b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -93,6 +93,7 @@ dependencies = [ "tree-sitter-javascript", "tree-sitter-kotlin-sg", "tree-sitter-lua", + "tree-sitter-objc", "tree-sitter-ocaml", "tree-sitter-php", "tree-sitter-python", @@ -865,6 +866,16 @@ dependencies = [ "tree-sitter-language", ] +[[package]] +name = "tree-sitter-objc" +version = "3.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca8bb556423fc176f0535e79d525f783a6684d3c9da81bf9d905303c129e1d2" +dependencies = [ + "cc", + "tree-sitter-language", +] + [[package]] name = "tree-sitter-ocaml" version = "0.24.2" diff --git a/crates/codegraph-core/Cargo.toml b/crates/codegraph-core/Cargo.toml index df4361e17..d64140413 100644 --- a/crates/codegraph-core/Cargo.toml +++ b/crates/codegraph-core/Cargo.toml @@ -35,6 +35,7 @@ tree-sitter-dart = "0.0.4" tree-sitter-zig = "1" tree-sitter-haskell = "0.23" tree-sitter-ocaml = "0.24" +tree-sitter-objc = "3" rayon = "1" ignore = "0.4" globset = "0.4" diff --git a/crates/codegraph-core/src/extractors/helpers.rs b/crates/codegraph-core/src/extractors/helpers.rs index b02531896..de0d82d34 100644 --- a/crates/codegraph-core/src/extractors/helpers.rs +++ b/crates/codegraph-core/src/extractors/helpers.rs @@ -360,6 +360,19 @@ pub const OCAML_AST_CONFIG: LangAstConfig = LangAstConfig { string_prefixes: &[], }; +/// Objective-C string literals use the `@"..."` prefix. The shared +/// `build_string_node` strips a leading `@` before applying prefixes, so we +/// don't need to list it explicitly here. +pub const OBJC_AST_CONFIG: LangAstConfig = LangAstConfig { + new_types: &[], + throw_types: &["throw_statement"], + await_types: &[], + string_types: &["string_literal"], + regex_types: &[], + quote_chars: &['"'], + string_prefixes: &[], +}; + // ── Generic AST node walker ────────────────────────────────────────────────── /// Node types that represent identifiers across languages. diff --git a/crates/codegraph-core/src/extractors/mod.rs b/crates/codegraph-core/src/extractors/mod.rs index 642f29f98..9c72049a4 100644 --- a/crates/codegraph-core/src/extractors/mod.rs +++ b/crates/codegraph-core/src/extractors/mod.rs @@ -12,6 +12,7 @@ pub mod java; pub mod javascript; pub mod kotlin; pub mod lua; +pub mod objc; pub mod ocaml; pub mod php; pub mod python; @@ -126,5 +127,8 @@ pub fn extract_symbols_with_opts( LanguageKind::Ocaml | LanguageKind::OcamlInterface => { ocaml::OcamlExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) } + LanguageKind::ObjC => { + objc::ObjCExtractor.extract_with_opts(tree, source, file_path, include_ast_nodes) + } } } diff --git a/crates/codegraph-core/src/extractors/objc.rs b/crates/codegraph-core/src/extractors/objc.rs new file mode 100644 index 000000000..929f2d75a --- /dev/null +++ b/crates/codegraph-core/src/extractors/objc.rs @@ -0,0 +1,775 @@ +use tree_sitter::{Node, Tree}; +use crate::cfg::build_function_cfg; +use crate::complexity::compute_all_metrics; +use crate::types::*; +use super::helpers::*; +use super::SymbolExtractor; + +/// Objective-C extractor — mirrors `src/extractors/objc.ts`. +/// +/// The tree-sitter-objc grammar extends C with `@interface`, `@implementation`, +/// `@protocol`, method declarations/definitions, `#import`, `@import`, and +/// message expressions. Methods inside `class_implementation` are wrapped in +/// `implementation_definition`, and selectors are not exposed as a named +/// `selector` field — they are assembled from leading `identifier` keywords +/// followed by `method_parameter` children. +pub struct ObjCExtractor; + +impl SymbolExtractor for ObjCExtractor { + fn extract(&self, tree: &Tree, source: &[u8], file_path: &str) -> FileSymbols { + let mut symbols = FileSymbols::new(file_path.to_string()); + walk_tree(&tree.root_node(), source, &mut symbols, match_objc_node); + walk_ast_nodes_with_config(&tree.root_node(), source, &mut symbols.ast_nodes, &OBJC_AST_CONFIG); + symbols + } +} + +fn match_objc_node(node: &Node, source: &[u8], symbols: &mut FileSymbols, _depth: usize) { + match node.kind() { + "class_interface" => handle_class_interface(node, source, symbols), + "class_implementation" => handle_class_implementation(node, source, symbols), + "protocol_declaration" => handle_protocol_decl(node, source, symbols), + "method_declaration" | "method_definition" => handle_method(node, source, symbols), + "function_definition" => handle_function_def(node, source, symbols), + "preproc_include" | "preproc_import" => handle_import(node, source, symbols), + "module_import" => handle_at_import(node, source, symbols), + "struct_specifier" => handle_struct_specifier(node, source, symbols), + "enum_specifier" => handle_enum_specifier(node, source, symbols), + "type_definition" => handle_typedef(node, source, symbols), + "call_expression" => handle_c_call_expr(node, source, symbols), + "message_expression" => handle_message_expr(node, source, symbols), + _ => {} + } +} + +// ── ObjC class/protocol handlers ────────────────────────────────────────── + +/// `@interface Foo : NSObject ` or `@interface Foo (Cat)`. +/// +/// The grammar does not expose `name` as a named field — the class name is +/// the first `identifier` child. `superclass` and `category` *are* named +/// fields. Adopted protocols appear under `parameterized_arguments`. +fn handle_class_interface(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match find_objc_decl_name(node) { + Some(n) => n, + None => return, + }; + let class_name = node_text(&name_node, source).to_string(); + + // Categories: `@interface Foo (Cat)` — name becomes `Foo(Cat)` + let display_name = if let Some(cat) = node.child_by_field_name("category") { + format!("{}({})", class_name, node_text(&cat, source)) + } else { + class_name.clone() + }; + + let members = collect_class_members(node, source); + symbols.definitions.push(Definition { + name: display_name, + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: opt_children(members), + }); + + // Superclass — use the bare class name (categories already recorded above) + if let Some(superclass) = node.child_by_field_name("superclass") { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: Some(node_text(&superclass, source).to_string()), + implements: None, + line: start_line(node), + }); + } + + // Adopted protocols + if let Some(protos) = find_child(node, "parameterized_arguments") { + for i in 0..protos.child_count() { + if let Some(proto) = protos.child(i) { + // tree-sitter-objc wraps each protocol in `type_name > type_identifier` + let proto_name = if proto.kind() == "type_name" { + find_child(&proto, "type_identifier") + .or_else(|| find_child(&proto, "identifier")) + .map(|n| node_text(&n, source).to_string()) + } else if proto.kind() == "identifier" || proto.kind() == "type_identifier" { + Some(node_text(&proto, source).to_string()) + } else { + None + }; + if let Some(p) = proto_name { + symbols.classes.push(ClassRelation { + name: class_name.clone(), + extends: None, + implements: Some(p), + line: start_line(node), + }); + } + } + } + } +} + +/// `@implementation Foo` or `@implementation Foo (Cat)`. +fn handle_class_implementation(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match find_objc_decl_name(node) { + Some(n) => n, + None => return, + }; + let class_name = node_text(&name_node, source).to_string(); + let display_name = if let Some(cat) = node.child_by_field_name("category") { + format!("{}({})", class_name, node_text(&cat, source)) + } else { + class_name + }; + + symbols.definitions.push(Definition { + name: display_name, + kind: "class".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +/// `@protocol MyProto`. +fn handle_protocol_decl(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name_node = match find_objc_decl_name(node) { + Some(n) => n, + None => return, + }; + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "interface".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); +} + +// ── Method / function handlers ──────────────────────────────────────────── + +fn handle_method(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let selector = match build_selector(node, source) { + Some(s) => s, + None => return, + }; + let parent_class = find_objc_parent_class(node, source); + let full_name = match parent_class { + Some(c) => format!("{}.{}", c, selector), + None => selector, + }; + + let params = extract_method_params(node, source); + symbols.definitions.push(Definition { + name: full_name, + kind: "method".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "objc"), + cfg: build_function_cfg(node, "objc", source), + children: opt_children(params), + }); +} + +fn handle_function_def(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let name = match extract_c_function_name(node, source) { + Some(n) => n, + None => return, + }; + let params = extract_c_parameters(node, source); + symbols.definitions.push(Definition { + name, + kind: "function".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: compute_all_metrics(node, source, "objc"), + cfg: build_function_cfg(node, "objc", source), + children: opt_children(params), + }); +} + +// ── Import handlers ─────────────────────────────────────────────────────── + +fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let path_node = match node.child_by_field_name("path") { + Some(n) => n, + None => return, + }; + let raw = node_text(&path_node, source); + // Strip `"..."` or `<...>` wrappers — mirrors the JS extractor regex. + let source_path = raw.trim_matches(|c| c == '"' || c == '<' || c == '>').to_string(); + if source_path.is_empty() { + return; + } + let last_name = source_path.rsplit('/').next().unwrap_or(&source_path).to_string(); + let mut imp = Import::new(source_path, vec![last_name], start_line(node)); + imp.c_include = Some(true); + symbols.imports.push(imp); +} + +/// `@import Foundation;` — grammar emits `module_import` with `path` field +/// pointing at the module identifier. +fn handle_at_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let module_node = node.child_by_field_name("path") + .or_else(|| find_child(node, "identifier")); + if let Some(m) = module_node { + let name = node_text(&m, source).to_string(); + symbols.imports.push(Import::new( + name.clone(), + vec![name], + start_line(node), + )); + } +} + +// ── C-compatible type handlers ──────────────────────────────────────────── + +fn handle_struct_specifier(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "struct".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn handle_enum_specifier(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + if let Some(name_node) = node.child_by_field_name("name") { + symbols.definitions.push(Definition { + name: node_text(&name_node, source).to_string(), + kind: "enum".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +fn handle_typedef(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let mut alias_name = None; + for i in (0..node.child_count()).rev() { + if let Some(child) = node.child(i) { + match child.kind() { + "type_identifier" | "identifier" | "primitive_type" => { + alias_name = Some(node_text(&child, source).to_string()); + break; + } + _ => {} + } + } + } + if let Some(name) = alias_name { + symbols.definitions.push(Definition { + name, + kind: "type".to_string(), + line: start_line(node), + end_line: Some(end_line(node)), + decorators: None, + complexity: None, + cfg: None, + children: None, + }); + } +} + +// ── Call handlers ───────────────────────────────────────────────────────── + +/// Plain C-style `func(arg)` calls. tree-sitter-objc lacks a `function` +/// field — the called expression is the first non-anonymous child. +fn handle_c_call_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let fn_node = match node.child_by_field_name("function") { + Some(n) => n, + None => { + // Fallback: first identifier child + let mut found = None; + for i in 0..node.child_count() { + if let Some(c) = node.child(i) { + if c.kind() == "identifier" || c.kind() == "field_expression" { + found = Some(c); + break; + } + } + } + match found { + Some(n) => n, + None => return, + } + } + }; + + let (name, receiver) = if fn_node.kind() == "field_expression" { + let field = fn_node.child_by_field_name("field") + .map(|n| node_text(&n, source).to_string()) + .unwrap_or_else(|| node_text(&fn_node, source).to_string()); + let recv = fn_node.child_by_field_name("argument") + .map(|n| node_text(&n, source).to_string()); + (field, recv) + } else { + (node_text(&fn_node, source).to_string(), None) + }; + + if !name.is_empty() { + symbols.calls.push(Call { + name, + line: start_line(node), + dynamic: None, + receiver, + }); + } +} + +/// `[receiver selector:arg ...]` message send. The grammar gives every +/// keyword identifier the `method` field name; for multi-keyword selectors +/// we collect them all and join with `:`. +fn handle_message_expr(node: &Node, source: &[u8], symbols: &mut FileSymbols) { + let receiver = node.child_by_field_name("receiver") + .map(|n| node_text(&n, source).to_string()); + + let selector = build_message_selector(node, source); + if selector.is_empty() { + return; + } + + symbols.calls.push(Call { + name: selector, + line: start_line(node), + dynamic: None, + receiver, + }); +} + +// ── Helpers ─────────────────────────────────────────────────────────────── + +/// Build a method-definition selector by collecting the leading keyword +/// `identifier` child plus any subsequent identifier+method_parameter pairs. +/// +/// Examples: +/// - Unary: `- (void)doSomething` → `doSomething` +/// - Keyword: `- (void)initWith:(...)x age:(...)y` → `initWith:age:` +fn build_selector(method_node: &Node, source: &[u8]) -> Option { + // tree-sitter-objc v3 does not expose a `selector` field; we always + // assemble the selector from the keyword identifiers. + let mut parts: Vec = Vec::new(); + let mut has_params = false; + + for i in 0..method_node.child_count() { + let child = match method_node.child(i) { + Some(c) => c, + None => continue, + }; + match child.kind() { + "identifier" => { + // Keyword name — appears before each `:` + parts.push(node_text(&child, source).to_string()); + } + "method_parameter" => { + has_params = true; + } + _ => {} + } + } + + if parts.is_empty() { + return None; + } + if has_params { + Some(format!("{}:", parts.join(":"))) + } else { + Some(parts.join(":")) + } +} + +/// Build a message-expression selector by collecting all `identifier` +/// children annotated with the `method` field. +fn build_message_selector(message_node: &Node, source: &[u8]) -> String { + let mut parts: Vec = Vec::new(); + let mut has_colon = false; + for i in 0..message_node.child_count() { + if let Some(child) = message_node.child(i) { + if let Some(field) = message_node.field_name_for_child(i as u32) { + if field == "method" { + parts.push(node_text(&child, source).to_string()); + } + } + if child.kind() == ":" { + has_colon = true; + } + } + } + if parts.is_empty() { + return String::new(); + } + if has_colon { + format!("{}:", parts.join(":")) + } else { + parts.join(":") + } +} + +fn find_objc_parent_class(node: &Node, source: &[u8]) -> Option { + let mut current = node.parent(); + while let Some(parent) = current { + match parent.kind() { + "class_interface" + | "class_implementation" + | "protocol_declaration" => { + let name_node = find_objc_decl_name(&parent)?; + let base = node_text(&name_node, source).to_string(); + // Categories: include `(Cat)` so methods are grouped per category. + if let Some(cat) = parent.child_by_field_name("category") { + return Some(format!("{}({})", base, node_text(&cat, source))); + } + return Some(base); + } + _ => {} + } + current = parent.parent(); + } + None +} + +/// Find the declaration name — the first `identifier` child. The grammar +/// places the class/protocol name as a positional child rather than under a +/// named field. +fn find_objc_decl_name<'a>(node: &Node<'a>) -> Option> { + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if child.kind() == "identifier" { + return Some(child); + } + } + } + None +} + +/// Collect immediate method/property members of a `class_interface` body. +fn collect_class_members(class_node: &Node, source: &[u8]) -> Vec { + let mut members = Vec::new(); + for i in 0..class_node.child_count() { + let child = match class_node.child(i) { + Some(c) => c, + None => continue, + }; + match child.kind() { + "method_declaration" | "method_definition" => { + if let Some(sel) = build_selector(&child, source) { + members.push(child_def(sel, "method", start_line(&child))); + } + } + "implementation_definition" => { + // Wraps a `method_definition` inside `class_implementation`. + if let Some(method) = find_child(&child, "method_definition") { + if let Some(sel) = build_selector(&method, source) { + members.push(child_def(sel, "method", start_line(&method))); + } + } + } + "property_declaration" => { + if let Some(prop_name) = extract_property_name(&child, source) { + members.push(child_def(prop_name, "property", start_line(&child))); + } + } + _ => {} + } + } + members +} + +/// Extract the name from `@property (...) Type *foo;`. The grammar nests +/// the identifier under `struct_declaration > struct_declarator > pointer_declarator > identifier`. +fn extract_property_name(prop_node: &Node, source: &[u8]) -> Option { + let struct_decl = find_child(prop_node, "struct_declaration")?; + for i in 0..struct_decl.child_count() { + let child = struct_decl.child(i)?; + if child.kind() == "struct_declarator" { + // struct_declarator > pointer_declarator > identifier + // or struct_declarator > identifier (no pointer) + let name = unwrap_property_declarator(&child, source); + if !name.is_empty() { + return Some(name); + } + } + } + None +} + +/// Walk through `struct_declarator`/`pointer_declarator` chains and return +/// the inner identifier text. The grammar nests: +/// `struct_declarator > pointer_declarator > identifier(field=declarator)`, +/// but neither the outer struct_declarator nor pointer_declarator exposes a +/// named `declarator` field on its direct child — only the inner identifier +/// is field-tagged. We walk children defensively. +fn unwrap_property_declarator(node: &Node, source: &[u8]) -> String { + fn find_identifier_deep<'a>(node: &Node<'a>) -> Option> { + if node.kind() == "identifier" { + return Some(*node); + } + for i in 0..node.child_count() { + if let Some(child) = node.child(i) { + if let Some(found) = find_identifier_deep(&child) { + return Some(found); + } + } + } + None + } + if let Some(id) = find_identifier_deep(node) { + return node_text(&id, source).to_string(); + } + node_text(node, source).to_string() +} + +/// Extract `method_parameter` parameter names from a method node. +fn extract_method_params(method_node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + for i in 0..method_node.child_count() { + let child = match method_node.child(i) { + Some(c) => c, + None => continue, + }; + if child.kind() != "method_parameter" { + continue; + } + // Last identifier in `method_parameter` is the parameter name. + let mut name_node: Option = None; + for j in 0..child.child_count() { + if let Some(c) = child.child(j) { + if c.kind() == "identifier" { + name_node = Some(c); + } + } + } + if let Some(n) = name_node { + params.push(child_def( + node_text(&n, source).to_string(), + "parameter", + start_line(&n), + )); + } + } + params +} + +// ── C-style helpers (extracted from c.rs equivalents) ───────────────────── + +fn extract_c_function_name(node: &Node, source: &[u8]) -> Option { + let declarator = node.child_by_field_name("declarator")?; + let inner = if declarator.kind() == "function_declarator" { + declarator.child_by_field_name("declarator") + } else if declarator.kind() == "pointer_declarator" { + let fd = find_child(&declarator, "function_declarator")?; + fd.child_by_field_name("declarator") + } else { + Some(declarator) + }; + inner.map(|n| unwrap_c_declarator(&n, source)) +} + +fn extract_c_parameters(node: &Node, source: &[u8]) -> Vec { + let mut params = Vec::new(); + let declarator = match node.child_by_field_name("declarator") { + Some(d) => d, + None => return params, + }; + let func_decl = if declarator.kind() == "function_declarator" { + Some(declarator) + } else { + find_child(&declarator, "function_declarator") + }; + if let Some(func_decl) = func_decl { + if let Some(param_list) = func_decl.child_by_field_name("parameters") { + for i in 0..param_list.child_count() { + if let Some(child) = param_list.child(i) { + if child.kind() == "parameter_declaration" { + if let Some(decl) = child.child_by_field_name("declarator") { + let name = unwrap_c_declarator(&decl, source); + if !name.is_empty() { + params.push(child_def(name, "parameter", start_line(&child))); + } + } + } + } + } + } + } + params +} + +fn unwrap_c_declarator(node: &Node, source: &[u8]) -> String { + let mut current = *node; + loop { + match current.kind() { + "pointer_declarator" | "array_declarator" | "parenthesized_declarator" => { + if let Some(inner) = current.child_by_field_name("declarator") { + current = inner; + } else { + break; + } + } + "identifier" => return node_text(¤t, source).to_string(), + _ => break, + } + } + node_text(¤t, source).to_string() +} + +#[cfg(test)] +mod tests { + use super::*; + use tree_sitter::Parser; + + fn parse_objc(code: &str) -> FileSymbols { + let mut parser = Parser::new(); + let lang: tree_sitter::Language = tree_sitter_objc::LANGUAGE.into(); + parser.set_language(&lang).unwrap(); + let tree = parser.parse(code.as_bytes(), None).unwrap(); + ObjCExtractor.extract(&tree, code.as_bytes(), "test.m") + } + + #[test] + fn extracts_class_interface_with_superclass_and_protocols() { + let code = "@interface Foo : NSObject \n- (void)doIt;\n@end"; + let s = parse_objc(code); + let foo = s.definitions.iter().find(|d| d.name == "Foo").unwrap(); + assert_eq!(foo.kind, "class"); + let supers: Vec<_> = s.classes.iter().filter(|c| c.extends.is_some()).collect(); + assert_eq!(supers.len(), 1); + assert_eq!(supers[0].extends.as_deref(), Some("NSObject")); + let impls: Vec<_> = s.classes.iter().filter(|c| c.implements.is_some()).collect(); + assert_eq!(impls.len(), 2); + let names: Vec<_> = impls.iter().filter_map(|c| c.implements.as_deref()).collect(); + assert!(names.contains(&"Bar")); + assert!(names.contains(&"Baz")); + } + + #[test] + fn extracts_class_implementation_and_methods() { + let code = "\ +@implementation Foo +- (void)doIt { + [self other]; +} ++ (instancetype)shared { + return [[Foo alloc] init]; +} +@end"; + let s = parse_objc(code); + let foo = s.definitions.iter().find(|d| d.name == "Foo").unwrap(); + assert_eq!(foo.kind, "class"); + let do_it = s.definitions.iter().find(|d| d.name == "Foo.doIt").unwrap(); + assert_eq!(do_it.kind, "method"); + let shared = s.definitions.iter().find(|d| d.name == "Foo.shared").unwrap(); + assert_eq!(shared.kind, "method"); + } + + #[test] + fn extracts_keyword_selector_with_params() { + let code = "\ +@implementation Foo +- (void)setName:(NSString *)name age:(int)age { +} +@end"; + let s = parse_objc(code); + let m = s.definitions.iter().find(|d| d.name == "Foo.setName:age:").unwrap(); + let kids = m.children.as_ref().unwrap(); + assert_eq!(kids.len(), 2); + assert_eq!(kids[0].name, "name"); + assert_eq!(kids[1].name, "age"); + } + + #[test] + fn extracts_category_definitions() { + let code = "\ +@interface Foo (Cat) +- (void)catMethod; +@end +@implementation Foo (Cat) +- (void)catMethod {} +@end"; + let s = parse_objc(code); + let iface = s.definitions.iter().find(|d| d.name == "Foo(Cat)" && d.line == 1).unwrap(); + assert_eq!(iface.kind, "class"); + let m = s.definitions.iter().find(|d| d.name == "Foo(Cat).catMethod" && d.kind == "method").unwrap(); + let _ = m; + } + + #[test] + fn extracts_protocol_as_interface() { + let code = "@protocol MyProto\n- (void)reqMethod;\n@end"; + let s = parse_objc(code); + let p = s.definitions.iter().find(|d| d.name == "MyProto").unwrap(); + assert_eq!(p.kind, "interface"); + } + + #[test] + fn extracts_imports() { + let code = "#import \"Repo.h\"\n#import \n@import UIKit;"; + let s = parse_objc(code); + assert_eq!(s.imports.len(), 3); + assert_eq!(s.imports[0].source, "Repo.h"); + assert_eq!(s.imports[0].c_include, Some(true)); + assert_eq!(s.imports[1].source, "Foundation/Foundation.h"); + assert_eq!(s.imports[2].source, "UIKit"); + } + + #[test] + fn extracts_message_send_calls() { + let code = "\ +@implementation Foo +- (void)go { + [Validators isValidEmail:@\"a@b\"]; + [_repo saveWithId:userId name:name]; + [super init]; +} +@end"; + let s = parse_objc(code); + let names: Vec<_> = s.calls.iter().map(|c| c.name.as_str()).collect(); + assert!(names.contains(&"isValidEmail:")); + assert!(names.contains(&"saveWithId:name:")); + assert!(names.contains(&"init")); + } + + #[test] + fn extracts_plain_c_function_and_call() { + let code = "void run(int x) {\n printf(\"hi\");\n}"; + let s = parse_objc(code); + let f = s.definitions.iter().find(|d| d.name == "run").unwrap(); + assert_eq!(f.kind, "function"); + let p = f.children.as_ref().unwrap(); + assert_eq!(p[0].name, "x"); + assert!(s.calls.iter().any(|c| c.name == "printf")); + } + + #[test] + fn extracts_property_name() { + let code = "\ +@interface Foo : NSObject +@property (nonatomic, strong) NSString *name; +@end"; + let s = parse_objc(code); + let foo = s.definitions.iter().find(|d| d.name == "Foo").unwrap(); + let kids = foo.children.as_ref().unwrap(); + let prop = kids.iter().find(|k| k.kind == "property").unwrap(); + assert_eq!(prop.name, "name"); + } +} diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index 0cb157814..539f87bc5 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -36,6 +36,7 @@ const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "kt", "kts", "swift", "scala", "sh", "bash", "ex", "exs", "lua", "dart", "zig", "hs", "ml", "mli", + "m", ]; /// Returns whether `path` has an extension the Rust file_collector would accept. diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index c87957f29..fe042a06d 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -27,6 +27,7 @@ pub enum LanguageKind { Haskell, Ocaml, OcamlInterface, + ObjC, } impl LanguageKind { @@ -58,6 +59,7 @@ impl LanguageKind { Self::Haskell => "haskell", Self::Ocaml => "ocaml", Self::OcamlInterface => "ocaml-interface", + Self::ObjC => "objc", } } @@ -97,6 +99,7 @@ impl LanguageKind { "hs" => Some(Self::Haskell), "ml" => Some(Self::Ocaml), "mli" => Some(Self::OcamlInterface), + "m" => Some(Self::ObjC), _ => None, } } @@ -129,6 +132,7 @@ impl LanguageKind { "haskell" => Some(Self::Haskell), "ocaml" => Some(Self::Ocaml), "ocaml-interface" => Some(Self::OcamlInterface), + "objc" => Some(Self::ObjC), _ => None, } } @@ -160,6 +164,7 @@ impl LanguageKind { Self::Haskell => tree_sitter_haskell::LANGUAGE.into(), Self::Ocaml => tree_sitter_ocaml::LANGUAGE_OCAML.into(), Self::OcamlInterface => tree_sitter_ocaml::LANGUAGE_OCAML_INTERFACE.into(), + Self::ObjC => tree_sitter_objc::LANGUAGE.into(), } } @@ -175,7 +180,7 @@ impl LanguageKind { &[ JavaScript, TypeScript, Tsx, Python, Go, Rust, Java, CSharp, Ruby, Php, Hcl, C, Cpp, Kotlin, Swift, Scala, Bash, Elixir, Lua, Dart, Zig, Haskell, Ocaml, - OcamlInterface, + OcamlInterface, ObjC, ] } } @@ -244,14 +249,15 @@ mod tests { | LanguageKind::Zig | LanguageKind::Haskell | LanguageKind::Ocaml - | LanguageKind::OcamlInterface => (), + | LanguageKind::OcamlInterface + | LanguageKind::ObjC => (), }; // IMPORTANT: this constant must equal the number of arms in the match // above AND the length of the slice returned by `LanguageKind::all()`. // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 24; + const EXPECTED_LEN: usize = 25; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, diff --git a/package-lock.json b/package-lock.json index 2de5a303b..f0a088fc9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -7530,6 +7530,7 @@ "resolved": "git+ssh://git@github.com/gleam-lang/tree-sitter-gleam.git#1627dc5101e63bf19717c540a56df5ef20b1fc7a", "integrity": "sha512-ysgcjQzunTVX0hBoUXWRU7YCrzOVSJlT3bHzrq78E3eE1iu1RQ3+RrwKjXEPVPInOmunuS+gHf6LWd8MyXZ4UQ==", "dev": true, + "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { "nan": "^2.18.0" diff --git a/src/ast-analysis/rules/index.ts b/src/ast-analysis/rules/index.ts index 653cbd59b..c8ce42518 100644 --- a/src/ast-analysis/rules/index.ts +++ b/src/ast-analysis/rules/index.ts @@ -153,6 +153,11 @@ const OCAML_AST_TYPES: Record = { string: 'string', }; +const OBJC_AST_TYPES: Record = { + throw_statement: 'throw', + string_literal: 'string', +}; + export const AST_TYPE_MAPS: Map> = new Map([ ['javascript', JS_AST_TYPES], ['typescript', JS_AST_TYPES], @@ -177,6 +182,7 @@ export const AST_TYPE_MAPS: Map> = new Map([ ['haskell', HASKELL_AST_TYPES], ['ocaml', OCAML_AST_TYPES], ['ocaml-interface', OCAML_AST_TYPES], + ['objc', OBJC_AST_TYPES], ]); // ─── Per-language string-extraction config ─────────────────────────────── @@ -211,6 +217,7 @@ const DART_STRING_CONFIG: AstStringConfig = { quoteChars: '\'"', stringPrefixes: const ZIG_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; const HASKELL_STRING_CONFIG: AstStringConfig = { quoteChars: '"\'', stringPrefixes: '' }; const OCAML_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; +const OBJC_STRING_CONFIG: AstStringConfig = { quoteChars: '"', stringPrefixes: '' }; export const AST_STRING_CONFIGS: Map = new Map([ ['javascript', JS_STRING_CONFIG], @@ -236,6 +243,7 @@ export const AST_STRING_CONFIGS: Map = new Map([ ['haskell', HASKELL_STRING_CONFIG], ['ocaml', OCAML_STRING_CONFIG], ['ocaml-interface', OCAML_STRING_CONFIG], + ['objc', OBJC_STRING_CONFIG], ]); // ─── Per-language "stop-after-collect" kinds ───────────────────────────── diff --git a/src/domain/parser.ts b/src/domain/parser.ts index f1c7dd809..a86f60532 100644 --- a/src/domain/parser.ts +++ b/src/domain/parser.ts @@ -471,6 +471,7 @@ export const NATIVE_SUPPORTED_EXTENSIONS: ReadonlySet = new Set([ '.hs', '.ml', '.mli', + '.m', ]); /** diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 24aee1d53..458c4b36e 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -25,9 +25,8 @@ describe('classifyNativeDrops', () => { 'src/h.cu', 'src/i.groovy', 'src/j.v', - 'src/k.m', ]); - expect(totals['unsupported-by-native']).toBe(11); + expect(totals['unsupported-by-native']).toBe(10); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']); From de05222ec4802f89ec87a82d99c7d9b697261ac7 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Mon, 11 May 2026 20:16:59 -0600 Subject: [PATCH 2/7] fix(extractors): address ObjC review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Use `module` field (not `path`) for `@import` in Rust to mirror the JS extractor and match the tree-sitter-objc `module_import` grammar field. - Drop the unreachable `implementation_definition` branch from `collect_class_members` — it is only invoked from `handle_class_interface` and `class_interface` nodes do not contain `implementation_definition` children. - Qualify category methods with `(Category)` in the JS extractor so its output matches Rust for `@interface Foo (Cat)` / `@implementation Foo (Cat)` when the grammar emits `class_interface`/`class_implementation` rather than dedicated `category_interface` nodes. Two categories can declare same-named methods, so the qualified parent disambiguates the symbols. - Document the `.m` extension collision with MATLAB/Octave in the file collector since `.m` files are unconditionally routed to the ObjC parser. --- crates/codegraph-core/src/extractors/objc.rs | 23 ++++++----------- crates/codegraph-core/src/file_collector.rs | 8 ++++++ src/extractors/objc.ts | 26 +++++++++++++++++--- 3 files changed, 38 insertions(+), 19 deletions(-) diff --git a/crates/codegraph-core/src/extractors/objc.rs b/crates/codegraph-core/src/extractors/objc.rs index 929f2d75a..70d385e86 100644 --- a/crates/codegraph-core/src/extractors/objc.rs +++ b/crates/codegraph-core/src/extractors/objc.rs @@ -9,10 +9,9 @@ use super::SymbolExtractor; /// /// The tree-sitter-objc grammar extends C with `@interface`, `@implementation`, /// `@protocol`, method declarations/definitions, `#import`, `@import`, and -/// message expressions. Methods inside `class_implementation` are wrapped in -/// `implementation_definition`, and selectors are not exposed as a named -/// `selector` field — they are assembled from leading `identifier` keywords -/// followed by `method_parameter` children. +/// message expressions. Selectors are not exposed as a named `selector` field +/// — they are assembled from leading `identifier` keywords followed by +/// `method_parameter` children. pub struct ObjCExtractor; impl SymbolExtractor for ObjCExtractor { @@ -218,10 +217,12 @@ fn handle_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { symbols.imports.push(imp); } -/// `@import Foundation;` — grammar emits `module_import` with `path` field -/// pointing at the module identifier. +/// `@import Foundation;` — grammar emits `module_import` with a `module` +/// field pointing at the module identifier. (`path` is the `#import` +/// preprocessor field; `module_import` uses `module`.) Mirrors +/// `src/extractors/objc.ts`. fn handle_at_import(node: &Node, source: &[u8], symbols: &mut FileSymbols) { - let module_node = node.child_by_field_name("path") + let module_node = node.child_by_field_name("module") .or_else(|| find_child(node, "identifier")); if let Some(m) = module_node { let name = node_text(&m, source).to_string(); @@ -476,14 +477,6 @@ fn collect_class_members(class_node: &Node, source: &[u8]) -> Vec { members.push(child_def(sel, "method", start_line(&child))); } } - "implementation_definition" => { - // Wraps a `method_definition` inside `class_implementation`. - if let Some(method) = find_child(&child, "method_definition") { - if let Some(sel) = build_selector(&method, source) { - members.push(child_def(sel, "method", start_line(&method))); - } - } - } "property_declaration" => { if let Some(prop_name) = extract_property_name(&child, source) { members.push(child_def(prop_name, "property", start_line(&child))); diff --git a/crates/codegraph-core/src/file_collector.rs b/crates/codegraph-core/src/file_collector.rs index 539f87bc5..ec8ba3c3d 100644 --- a/crates/codegraph-core/src/file_collector.rs +++ b/crates/codegraph-core/src/file_collector.rs @@ -32,6 +32,14 @@ const DEFAULT_IGNORE_DIRS: &[&str] = &[ /// All supported file extensions (mirrors the JS `EXTENSIONS` set). /// Must stay in sync with `LanguageKind::from_extension`. +/// +/// Known extension collisions: +/// - `.m` is the canonical extension for both Objective-C *and* MATLAB/GNU +/// Octave source files. We route every `.m` file through the Objective-C +/// extractor. MATLAB files will parse but produce garbled or empty symbol +/// output (no error is raised). If MATLAB support is added later this will +/// need disambiguation heuristics (e.g. presence of `@interface`/`@import` +/// vs MATLAB keywords like `function`/`classdef`). const SUPPORTED_EXTENSIONS: &[&str] = &[ "js", "jsx", "mjs", "cjs", "ts", "tsx", "d.ts", "py", "pyi", "go", "rs", "java", "cs", "rb", "rake", "gemspec", "php", "phtml", "tf", "hcl", "c", "h", "cpp", "cc", "cxx", "hpp", "kt", diff --git a/src/extractors/objc.ts b/src/extractors/objc.ts index d9f567546..74e9397c2 100644 --- a/src/extractors/objc.ts +++ b/src/extractors/objc.ts @@ -87,17 +87,23 @@ function handleClassInterface(node: TreeSitterNode, ctx: ExtractorOutput): void const nameNode = node.childForFieldName('name') || findObjCDeclName(node); if (!nameNode) return; const name = nameNode.text; + // Categories declared as `@interface Foo (Cat)` arrive as `class_interface` + // with a `category` field (rather than the `category_interface` node type). + // Qualify the display name with `(Cat)` so symbols stay grouped per category + // and match the Rust extractor. + const category = node.childForFieldName('category'); + const displayName = category ? `${name}(${category.text})` : name; const members = collectClassMembers(node); ctx.definitions.push({ - name, + name: displayName, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), children: members.length > 0 ? members : undefined, }); - // Superclass + // Superclass — keyed on the bare class name (categories don't have a superclass). const superclass = node.childForFieldName('superclass'); if (superclass) { ctx.classes.push({ name, extends: superclass.text, line: node.startPosition.row + 1 }); @@ -118,9 +124,14 @@ function handleClassInterface(node: TreeSitterNode, ctx: ExtractorOutput): void function handleClassImplementation(node: TreeSitterNode, ctx: ExtractorOutput): void { const nameNode = node.childForFieldName('name') || findObjCDeclName(node); if (!nameNode) return; + // Categories declared as `@implementation Foo (Cat)` arrive as + // `class_implementation` with a `category` field. Mirror the Rust extractor + // and qualify the display name with `(Cat)`. + const category = node.childForFieldName('category'); + const displayName = category ? `${nameNode.text}(${category.text})` : nameNode.text; ctx.definitions.push({ - name: nameNode.text, + name: displayName, kind: 'class', line: node.startPosition.row + 1, endLine: nodeEndLine(node), @@ -349,7 +360,14 @@ function findObjCParentClass(node: TreeSitterNode): string | null { current.type === 'category_implementation' ) { const nameNode = current.childForFieldName('name') || findObjCDeclName(current); - return nameNode ? nameNode.text : null; + if (!nameNode) return null; + // Categories: include `(Cat)` so methods are grouped per category. + // Two categories on the same class can declare same-named methods, so + // qualifying the parent name keeps the symbols disambiguated. Mirrors + // `find_objc_parent_class` in `crates/codegraph-core/src/extractors/objc.rs`. + const category = current.childForFieldName('category'); + if (category) return `${nameNode.text}(${category.text})`; + return nameNode.text; } current = current.parent; } From cfcd5a26cc0c595e9e70821d8d681289e5cf133c Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Tue, 12 May 2026 00:16:06 -0600 Subject: [PATCH 3/7] fix: correct expected count after merge dropped two entries (#1106) The native-drop-classification test asserts the count of WASM-only languages. The Clojure PR (#1097) removed src/c.clj and decremented the count from 11 to 10. This PR removes src/k.m (now natively supported via ObjC). After the merge both entries are removed, so the assertion needs to be 9, not 10. --- tests/parsers/native-drop-classification.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/parsers/native-drop-classification.test.ts b/tests/parsers/native-drop-classification.test.ts index 24cfeab75..5df5f0e84 100644 --- a/tests/parsers/native-drop-classification.test.ts +++ b/tests/parsers/native-drop-classification.test.ts @@ -25,7 +25,7 @@ describe('classifyNativeDrops', () => { 'src/i.groovy', 'src/j.v', ]); - expect(totals['unsupported-by-native']).toBe(10); + expect(totals['unsupported-by-native']).toBe(9); expect(totals['native-extractor-failure']).toBe(0); expect(byReason['unsupported-by-native'].get('.fs')).toEqual(['src/a.fs']); expect(byReason['unsupported-by-native'].get('.gleam')).toEqual(['src/b.gleam']); From 7294941e8abe78f89a112dfd489676b9cbbc4b63 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 13 May 2026 03:37:49 -0600 Subject: [PATCH 4/7] fix(extractors): align JS ObjC engine with native for @import and calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three engine-parity gaps Greptile flagged on the WASM side that broke the "identical output" goal: - @import statements: tree-sitter-objc v3 emits `module_import` not `import_declaration`, so the JS dispatch arm never matched and every `@import Foundation;` was silently dropped. Accept both node types. - C-style calls (printf, CGContextFillRect, …): the grammar lacks a `function` field on `call_expression`, so the named-field lookup always misses. Rust falls back to the first identifier child; JS did not, so every C call was dropped. Add the same fallback. - Message expressions: the grammar tags each keyword identifier with the `method` field rather than exposing a `selector` field, so the JS selector lookup misfired for multi-keyword selectors. Assemble the selector from `method` children with `:` joining, matching `build_message_selector` in the Rust extractor. Also expose `fieldNameForChild` on the `TreeSitterNode` type and add three JS extractor tests covering the new parity behaviour. --- src/extractors/objc.ts | 49 ++++++++++++++++++++++++++++++++++---- src/types.ts | 1 + tests/parsers/objc.test.ts | 34 ++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 4 deletions(-) diff --git a/src/extractors/objc.ts b/src/extractors/objc.ts index 74e9397c2..4906f03e4 100644 --- a/src/extractors/objc.ts +++ b/src/extractors/objc.ts @@ -55,6 +55,11 @@ function walkObjCNode(node: TreeSitterNode, ctx: ExtractorOutput): void { case 'preproc_import': handleImport(node, ctx); break; + // tree-sitter-objc v3 emits `module_import` for `@import Foundation;` + // statements. Older grammar revisions used `import_declaration`, so we + // accept both for forward/backward compatibility and keep behaviour + // aligned with `handle_at_import` on the Rust side. + case 'module_import': case 'import_declaration': handleAtImport(node, ctx); break; @@ -296,7 +301,20 @@ function handleTypedef(node: TreeSitterNode, ctx: ExtractorOutput): void { // ── Call handlers ───────────────────────────────────────────────────────── function handleCCallExpr(node: TreeSitterNode, ctx: ExtractorOutput): void { - const funcNode = node.childForFieldName('function'); + // tree-sitter-objc does not expose a `function` field on `call_expression`, + // so the named-field lookup almost always misses. Fall back to the first + // `identifier` / `field_expression` child to mirror `handle_c_call_expr` in + // `crates/codegraph-core/src/extractors/objc.rs` and keep engine parity. + let funcNode = node.childForFieldName('function'); + if (!funcNode) { + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (child && (child.type === 'identifier' || child.type === 'field_expression')) { + funcNode = child; + break; + } + } + } if (!funcNode) return; const call: Call = { name: '', line: node.startPosition.row + 1 }; if (funcNode.type === 'field_expression') { @@ -313,10 +331,33 @@ function handleCCallExpr(node: TreeSitterNode, ctx: ExtractorOutput): void { function handleMessageExpr(node: TreeSitterNode, ctx: ExtractorOutput): void { // [receiver selector:arg ...] const receiver = node.childForFieldName('receiver'); - const selector = node.childForFieldName('selector'); - if (!selector) return; - const call: Call = { name: selector.text, line: node.startPosition.row + 1 }; + // tree-sitter-objc v3 does not expose a `selector` field on + // `message_expression`; instead every keyword identifier has the `method` + // field. Assemble the selector by joining `method` children with `:`, + // appending a trailing `:` when the message has at least one colon + // (keyword form). Mirrors `build_message_selector` in + // `crates/codegraph-core/src/extractors/objc.rs`. + const parts: string[] = []; + let hasColon = false; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + const fieldName = node.fieldNameForChild(i); + if (fieldName === 'method') parts.push(child.text); + if (child.type === ':') hasColon = true; + } + let name: string; + if (parts.length > 0) { + name = hasColon ? `${parts.join(':')}:` : parts.join(':'); + } else { + // Fallback: some grammar revisions expose a `selector` field. + const selector = node.childForFieldName('selector'); + if (!selector) return; + name = selector.text; + } + + const call: Call = { name, line: node.startPosition.row + 1 }; if (receiver) call.receiver = receiver.text; ctx.calls.push(call); } diff --git a/src/types.ts b/src/types.ts index 4eff92878..08eb94316 100644 --- a/src/types.ts +++ b/src/types.ts @@ -576,6 +576,7 @@ export interface TreeSitterNode { child(index: number): TreeSitterNode | null; namedChild(index: number): TreeSitterNode | null; childForFieldName(name: string): TreeSitterNode | null; + fieldNameForChild(index: number): string | null; parent: TreeSitterNode | null; previousSibling: TreeSitterNode | null; nextSibling: TreeSitterNode | null; diff --git a/tests/parsers/objc.test.ts b/tests/parsers/objc.test.ts index 33c5668ea..59e1c79bd 100644 --- a/tests/parsers/objc.test.ts +++ b/tests/parsers/objc.test.ts @@ -56,4 +56,38 @@ describe('Objective-C parser', () => { expect.objectContaining({ name: 'MyView', extends: 'UIView' }), ); }); + + it('extracts @import module statements', () => { + // tree-sitter-objc v3 emits `module_import` for `@import` statements. + // The Rust extractor dispatches on this node type and the JS extractor + // must match it to keep engine parity (otherwise every `@import` is + // silently dropped on the JS side). + const symbols = parseObjC(`@import Foundation;`); + expect(symbols.imports).toContainEqual( + expect.objectContaining({ source: 'Foundation', names: ['Foundation'] }), + ); + }); + + it('extracts C-style function calls without a `function` field', () => { + // tree-sitter-objc does not expose a `function` field on `call_expression`, + // so the JS extractor must fall back to the first identifier child — + // matching the Rust side. Otherwise C calls like `printf(...)` are + // silently dropped. + const symbols = parseObjC(`void main() { + printf("hello"); +}`); + expect(symbols.calls).toContainEqual(expect.objectContaining({ name: 'printf' })); + }); + + it('builds keyword-selector calls from message expressions', () => { + // The grammar tags each keyword identifier with the `method` field rather + // than exposing a single `selector` field. Mirror the Rust assembly so + // selectors like `initWithName:age:` are recorded identically. + const symbols = parseObjC(`void main() { + [obj initWithName:@"x" age:10]; +}`); + expect(symbols.calls).toContainEqual( + expect.objectContaining({ name: 'initWithName:age:', receiver: 'obj' }), + ); + }); }); From a0c9a33e19b26f6d9ca7c1e84ba6e0b080cbde48 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 13 May 2026 05:05:46 -0600 Subject: [PATCH 5/7] fix: bump EXPECTED_LEN after merging Solidity LanguageKind (#1106) --- crates/codegraph-core/src/parser_registry.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/codegraph-core/src/parser_registry.rs b/crates/codegraph-core/src/parser_registry.rs index e685f7389..aa603b4be 100644 --- a/crates/codegraph-core/src/parser_registry.rs +++ b/crates/codegraph-core/src/parser_registry.rs @@ -281,7 +281,7 @@ mod tests { // Because both checks require the same manual update, they reinforce // each other: a developer who updates the match is reminded to also // update `all()` and this count. - const EXPECTED_LEN: usize = 28; + const EXPECTED_LEN: usize = 29; assert_eq!( LanguageKind::all().len(), EXPECTED_LEN, From 29ad42ae65402413dabcb63da9c1af2e4a957e23 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Wed, 13 May 2026 22:40:51 -0600 Subject: [PATCH 6/7] fix(extractors): align JS ObjC engine with native for v3 grammar (#1106) The v3 tree-sitter-objc grammar emits flat 'identifier' + 'method_parameter' children directly under method nodes (no 'keyword_selector' wrapper) and nests property names under 'struct_declaration > struct_declarator > [pointer_declarator >] identifier' rather than exposing a 'name' field. The JS extractor was still following the old grammar shape, which silently dropped multi-keyword method definitions, never populated parameter children, and omitted all '@property' members from class children. - buildSelector: assemble selector from flat 'identifier'+'method_parameter' children directly under the method node, matching build_selector in crates/codegraph-core/src/extractors/objc.rs. - extractMethodParams: iterate 'method_parameter' children directly and use the last 'identifier' child as the parameter name (mirrors extract_method_params in the Rust extractor). - collectClassMembers: extract '@property' names via a deep identifier walk under 'struct_declaration > struct_declarator' (mirrors extract_property_name in the Rust extractor). Added three regression tests covering keyword method definitions with parameters, and pointer + non-pointer '@property' member names. --- src/extractors/objc.ts | 91 +++++++++++++++++++++++++------------- tests/parsers/objc.test.ts | 40 +++++++++++++++++ 2 files changed, 100 insertions(+), 31 deletions(-) diff --git a/src/extractors/objc.ts b/src/extractors/objc.ts index 4906f03e4..5e70d1e1c 100644 --- a/src/extractors/objc.ts +++ b/src/extractors/objc.ts @@ -365,29 +365,25 @@ function handleMessageExpr(node: TreeSitterNode, ctx: ExtractorOutput): void { // ── Helpers ─────────────────────────────────────────────────────────────── function buildSelector(methodNode: TreeSitterNode): string | null { - const selector = methodNode.childForFieldName('selector'); - if (selector) return selector.text; - - // Build selector from keyword children: initWith:name: + // tree-sitter-objc v3 does not expose a `selector` field; the selector is + // assembled from the leading `identifier` keywords. Multi-keyword forms + // look like `setName:(...)x age:(...)y` and appear as flat + // `identifier` + `method_parameter` children directly under the method + // node (not wrapped in `keyword_selector`). Mirrors `build_selector` in + // `crates/codegraph-core/src/extractors/objc.rs`. const parts: string[] = []; + let hasParams = false; for (let i = 0; i < methodNode.childCount; i++) { const child = methodNode.child(i); if (!child) continue; - if (child.type === 'keyword_selector') { - for (let j = 0; j < child.childCount; j++) { - const kw = child.child(j); - if (kw && kw.type === 'keyword_declarator') { - const kwName = kw.childForFieldName('keyword'); - if (kwName) parts.push(kwName.text); - } - } - } - if (child.type === 'identifier' && i === 1) { - // Simple unary selector - return child.text; + if (child.type === 'identifier') { + parts.push(child.text); + } else if (child.type === 'method_parameter') { + hasParams = true; } } - return parts.length > 0 ? `${parts.join(':')}:` : null; + if (parts.length === 0) return null; + return hasParams ? `${parts.join(':')}:` : parts.join(':'); } function findObjCParentClass(node: TreeSitterNode): string | null { @@ -440,32 +436,65 @@ function collectClassMembers(classNode: TreeSitterNode): SubDeclaration[] { } } if (child.type === 'property_declaration') { - const propName = child.childForFieldName('name'); + const propName = extractPropertyName(child); if (propName) { - members.push({ name: propName.text, kind: 'property', line: child.startPosition.row + 1 }); + members.push({ name: propName, kind: 'property', line: child.startPosition.row + 1 }); } } } return members; } +/** + * Extract the property name from `@property (...) Type *foo;`. The v3 grammar + * does not expose `name` as a named field on `property_declaration`; instead + * the identifier nests under `struct_declaration > struct_declarator > + * [pointer_declarator >] identifier`. Mirrors `extract_property_name` in + * `crates/codegraph-core/src/extractors/objc.rs`. + */ +function extractPropertyName(propNode: TreeSitterNode): string | null { + const structDecl = findChild(propNode, 'struct_declaration'); + if (!structDecl) return null; + for (let i = 0; i < structDecl.childCount; i++) { + const child = structDecl.child(i); + if (!child || child.type !== 'struct_declarator') continue; + const id = findIdentifierDeep(child); + if (id) return id.text; + } + return null; +} + +function findIdentifierDeep(node: TreeSitterNode): TreeSitterNode | null { + if (node.type === 'identifier') return node; + for (let i = 0; i < node.childCount; i++) { + const child = node.child(i); + if (!child) continue; + const found = findIdentifierDeep(child); + if (found) return found; + } + return null; +} + function extractMethodParams(methodNode: TreeSitterNode): SubDeclaration[] { + // The v3 grammar emits flat `method_parameter` children under the method + // node; the parameter name is the last `identifier` inside each + // `method_parameter`. Mirrors `extract_method_params` in + // `crates/codegraph-core/src/extractors/objc.rs`. const params: SubDeclaration[] = []; for (let i = 0; i < methodNode.childCount; i++) { const child = methodNode.child(i); - if (!child || child.type !== 'keyword_selector') continue; + if (!child || child.type !== 'method_parameter') continue; + let nameNode: TreeSitterNode | null = null; for (let j = 0; j < child.childCount; j++) { - const kw = child.child(j); - if (kw && kw.type === 'keyword_declarator') { - const nameNode = kw.childForFieldName('name'); - if (nameNode) { - params.push({ - name: nameNode.text, - kind: 'parameter', - line: nameNode.startPosition.row + 1, - }); - } - } + const inner = child.child(j); + if (inner && inner.type === 'identifier') nameNode = inner; + } + if (nameNode) { + params.push({ + name: nameNode.text, + kind: 'parameter', + line: nameNode.startPosition.row + 1, + }); } } return params; diff --git a/tests/parsers/objc.test.ts b/tests/parsers/objc.test.ts index 59e1c79bd..0016f9f78 100644 --- a/tests/parsers/objc.test.ts +++ b/tests/parsers/objc.test.ts @@ -90,4 +90,44 @@ describe('Objective-C parser', () => { expect.objectContaining({ name: 'initWithName:age:', receiver: 'obj' }), ); }); + + it('extracts keyword-selector method definitions with parameter names', () => { + // The v3 grammar emits flat `identifier`+`method_parameter` children under + // `method_definition` rather than wrapping them in `keyword_selector`. The + // JS extractor must mirror `build_selector` / `extract_method_params` in + // `crates/codegraph-core/src/extractors/objc.rs` so multi-keyword selectors + // like `setName:age:` appear in `definitions` with their parameter names + // populated. Otherwise these methods are silently dropped on the JS side. + const symbols = parseObjC(`@implementation Foo +- (void)setName:(NSString *)name age:(int)age { +} +@end`); + const method = symbols.definitions.find((d) => d.name === 'Foo.setName:age:'); + expect(method).toBeDefined(); + expect(method?.kind).toBe('method'); + expect(method?.children).toEqual([ + expect.objectContaining({ name: 'name', kind: 'parameter' }), + expect.objectContaining({ name: 'age', kind: 'parameter' }), + ]); + }); + + it('extracts @property names nested under struct_declarator', () => { + // The v3 grammar does not expose `name` as a named field on + // `property_declaration`; the identifier nests under + // `struct_declaration > struct_declarator > [pointer_declarator >] + // identifier`. Mirror `extract_property_name` in the Rust extractor so + // pointer and non-pointer properties both surface as class children. + const symbols = parseObjC(`@interface Foo : NSObject +@property (nonatomic, strong) NSString *name; +@property (nonatomic) int age; +@end`); + const foo = symbols.definitions.find((d) => d.name === 'Foo'); + expect(foo).toBeDefined(); + expect(foo?.children).toEqual( + expect.arrayContaining([ + expect.objectContaining({ name: 'name', kind: 'property' }), + expect.objectContaining({ name: 'age', kind: 'property' }), + ]), + ); + }); }); From dc1c8def03d0deccc88966541713199ca13b03b2 Mon Sep 17 00:00:00 2001 From: carlos-alm Date: Thu, 14 May 2026 00:34:50 -0600 Subject: [PATCH 7/7] fix(extractors): handle parameterized_arguments for ObjC protocol adoption (#1106) The JS extractor was looking up 'protocol_qualifiers' which doesn't exist in tree-sitter-objc v3 (the grammar wraps adopted protocols in 'parameterized_arguments' instead). As a result every '@interface Foo : NSObject ' silently dropped its 'implements' relations on the JS side, while the Rust extractor correctly extracted them. Mirror handle_class_interface in crates/codegraph-core/src/extractors/objc.rs (parameterized_arguments + the type_name > type_identifier nesting) and add a regression test. --- src/extractors/objc.ts | 19 +++++++++++++++---- tests/parsers/objc.test.ts | 19 +++++++++++++++++++ 2 files changed, 34 insertions(+), 4 deletions(-) diff --git a/src/extractors/objc.ts b/src/extractors/objc.ts index 5e70d1e1c..d23594312 100644 --- a/src/extractors/objc.ts +++ b/src/extractors/objc.ts @@ -114,13 +114,24 @@ function handleClassInterface(node: TreeSitterNode, ctx: ExtractorOutput): void ctx.classes.push({ name, extends: superclass.text, line: node.startPosition.row + 1 }); } - // Protocols - const protocols = findChild(node, 'protocol_qualifiers'); + // Adopted protocols. tree-sitter-objc v3 wraps the adopted-protocol list in + // `parameterized_arguments` (not `protocol_qualifiers`, which was the v2 + // grammar shape). Each child is wrapped in `type_name > type_identifier`; + // fall back to a bare `identifier`/`type_identifier` for older grammars. + const protocols = findChild(node, 'parameterized_arguments'); if (protocols) { for (let i = 0; i < protocols.childCount; i++) { const proto = protocols.child(i); - if (proto && proto.type === 'identifier') { - ctx.classes.push({ name, implements: proto.text, line: node.startPosition.row + 1 }); + if (!proto) continue; + let protoName: string | null = null; + if (proto.type === 'type_name') { + const inner = findChild(proto, 'type_identifier') || findChild(proto, 'identifier'); + if (inner) protoName = inner.text; + } else if (proto.type === 'identifier' || proto.type === 'type_identifier') { + protoName = proto.text; + } + if (protoName) { + ctx.classes.push({ name, implements: protoName, line: node.startPosition.row + 1 }); } } } diff --git a/tests/parsers/objc.test.ts b/tests/parsers/objc.test.ts index 0016f9f78..cb9f33847 100644 --- a/tests/parsers/objc.test.ts +++ b/tests/parsers/objc.test.ts @@ -111,6 +111,25 @@ describe('Objective-C parser', () => { ]); }); + it('extracts adopted protocols as implements relations', () => { + // tree-sitter-objc v3 wraps the adopted-protocol list in + // `parameterized_arguments` (not the legacy `protocol_qualifiers`). The + // JS extractor must mirror `handle_class_interface` in + // `crates/codegraph-core/src/extractors/objc.rs`, otherwise every + // `implements` relation for an ObjC class interface is silently dropped. + const symbols = parseObjC(`@interface Foo : NSObject +@end`); + expect(symbols.classes).toContainEqual( + expect.objectContaining({ name: 'Foo', extends: 'NSObject' }), + ); + expect(symbols.classes).toContainEqual( + expect.objectContaining({ name: 'Foo', implements: 'Bar' }), + ); + expect(symbols.classes).toContainEqual( + expect.objectContaining({ name: 'Foo', implements: 'Baz' }), + ); + }); + it('extracts @property names nested under struct_declarator', () => { // The v3 grammar does not expose `name` as a named field on // `property_declaration`; the identifier nests under