From f0faa46d89ad8acf2a7b120b8399c14ec5f4b29e Mon Sep 17 00:00:00 2001 From: Hesham Salman Date: Tue, 2 Jun 2026 10:05:12 -0400 Subject: [PATCH] fix: consolidate TypeScript and JavaScript parser resolution --- crates/sem-cli/src/commands/context.rs | 11 +- crates/sem-cli/src/commands/impact.rs | 11 +- crates/sem-cli/src/commands/mod.rs | 55 +- crates/sem-cli/src/main.rs | 4 +- crates/sem-cli/tests/accessor_cli.rs | 99 +++ crates/sem-core/src/parser/differ.rs | 198 ++++- crates/sem-core/src/parser/graph.rs | 590 +++++++++++++-- .../sem-core/src/parser/import_resolution.rs | 117 ++- .../parser/plugins/code/entity_extractor.rs | 575 ++++++++++++--- .../src/parser/plugins/code/languages.rs | 12 + crates/sem-core/src/parser/scope_resolve.rs | 685 ++++++++++++++++-- 11 files changed, 2115 insertions(+), 242 deletions(-) create mode 100644 crates/sem-cli/tests/accessor_cli.rs diff --git a/crates/sem-cli/src/commands/context.rs b/crates/sem-cli/src/commands/context.rs index 225d96da..9f6a4cf8 100644 --- a/crates/sem-cli/src/commands/context.rs +++ b/crates/sem-cli/src/commands/context.rs @@ -129,7 +129,11 @@ fn find_entity<'a>( std::process::exit(1); }); - let mut matching: Vec<_> = graph.entities.values().filter(|e| e.name == name).collect(); + let mut matching: Vec<_> = graph + .entities + .values() + .filter(|e| super::entity_matches_query(e, name)) + .collect(); if matching.is_empty() { eprintln!("{} Entity '{}' not found", "error:".red().bold(), name); @@ -155,7 +159,10 @@ fn find_entity<'a>( matching.sort_by_key(|e| (&e.file_path, e.start_line)); eprintln!("{} Entity name '{}' is ambiguous ({} matches). Specify --file or --entity-id:", "error:".red().bold(), name, matching.len()); for m in &matching { - eprintln!(" {} ({}:L{})", m.id, m.file_path, m.start_line); + eprintln!( + " {} {} ({}:L{})", + m.entity_type, m.id, m.file_path, m.start_line + ); } std::process::exit(1); } diff --git a/crates/sem-cli/src/commands/impact.rs b/crates/sem-cli/src/commands/impact.rs index 650c3745..9f50da6a 100644 --- a/crates/sem-cli/src/commands/impact.rs +++ b/crates/sem-cli/src/commands/impact.rs @@ -80,7 +80,11 @@ fn find_entity<'a>( std::process::exit(1); }); - let mut matching: Vec<_> = graph.entities.values().filter(|e| e.name == name).collect(); + let mut matching: Vec<_> = graph + .entities + .values() + .filter(|e| super::entity_matches_query(e, name)) + .collect(); if matching.is_empty() { eprintln!("{} Entity '{}' not found", "error:".red().bold(), name); @@ -108,7 +112,10 @@ fn find_entity<'a>( matching.sort_by_key(|e| (&e.file_path, e.start_line)); eprintln!("{} Entity name '{}' is ambiguous ({} matches). Specify --file or --entity-id:", "error:".red().bold(), name, matching.len()); for m in &matching { - eprintln!(" {} ({}:L{})", m.id, m.file_path, m.start_line); + eprintln!( + " {} {} ({}:L{})", + m.entity_type, m.id, m.file_path, m.start_line + ); } std::process::exit(1); } diff --git a/crates/sem-cli/src/commands/mod.rs b/crates/sem-cli/src/commands/mod.rs index e167a123..3b16ddd7 100644 --- a/crates/sem-cli/src/commands/mod.rs +++ b/crates/sem-cli/src/commands/mod.rs @@ -104,6 +104,27 @@ fn normalize_existing_prefix(path: &Path) -> Option { None } +pub fn entity_matches_query(entity: &sem_core::parser::graph::EntityInfo, query: &str) -> bool { + if entity.name == query { + return true; + } + + let Some((entity_type, name)) = split_type_qualified_query(query) else { + return false; + }; + + entity.entity_type == entity_type && entity.name == name +} + +fn split_type_qualified_query(query: &str) -> Option<(&str, &str)> { + let (entity_type, name) = query.split_once(' ')?; + if entity_type.is_empty() || name.is_empty() { + return None; + } + + Some((entity_type, name)) +} + /// Truncate a string to `max_chars` Unicode scalar values (codepoints), appending "..." if /// truncated. Safe for multibyte encodings (CJK, simple emoji). Note: does not split on grapheme /// cluster boundaries — ZWJ emoji sequences may render incorrectly at the truncation point. @@ -141,9 +162,41 @@ pub fn truncate_str(s: &str, max_chars: usize) -> String { #[cfg(test)] mod tests { - use super::{normalize_existing_prefix, normalize_lexical, normalize_repo_relative_path, truncate_str}; + use super::{ + entity_matches_query, normalize_existing_prefix, normalize_lexical, + normalize_repo_relative_path, truncate_str, + }; + use sem_core::parser::graph::EntityInfo; use std::path::Path; + fn entity(entity_type: &str, name: &str) -> EntityInfo { + EntityInfo { + id: format!("a.ts::{entity_type}::{name}"), + name: name.to_string(), + entity_type: entity_type.to_string(), + file_path: "a.ts".to_string(), + parent_id: None, + start_line: 1, + end_line: 1, + } + } + + #[test] + fn entity_query_matches_exact_name() { + let entity = entity("function", "getter value"); + + assert!(entity_matches_query(&entity, "getter value")); + } + + #[test] + fn entity_query_matches_type_qualified_name() { + let entity = entity("getter", "value"); + + assert!(entity_matches_query(&entity, "getter value")); + assert!(!entity_matches_query(&entity, "setter value")); + assert!(!entity_matches_query(&entity, "method value")); + } + #[test] fn ascii_short_string_unchanged() { assert_eq!(truncate_str("hello", 10), "hello"); diff --git a/crates/sem-cli/src/main.rs b/crates/sem-cli/src/main.rs index 1ed3355a..0e40f0ec 100644 --- a/crates/sem-cli/src/main.rs +++ b/crates/sem-cli/src/main.rs @@ -108,7 +108,7 @@ enum Commands { }, /// Show impact of changing an entity (deps, dependents, transitive impact, tests) Impact { - /// Name of the entity to analyze + /// Name of the entity to analyze, optionally as "type name" #[arg(required_unless_present = "entity_id")] entity: Option, @@ -242,7 +242,7 @@ enum Commands { }, /// Show token-budgeted context for an entity Context { - /// Name of the entity + /// Name of the entity, optionally as "type name" #[arg(required_unless_present = "entity_id")] entity: Option, diff --git a/crates/sem-cli/tests/accessor_cli.rs b/crates/sem-cli/tests/accessor_cli.rs new file mode 100644 index 00000000..ee59743d --- /dev/null +++ b/crates/sem-cli/tests/accessor_cli.rs @@ -0,0 +1,99 @@ +use std::fs; +use std::process::Command; + +use tempfile::TempDir; + +fn git(repo: &TempDir, args: &[&str]) { + let status = Command::new("git") + .current_dir(repo.path()) + .args(args) + .status() + .unwrap(); + assert!(status.success(), "git {:?} failed", args); +} + +fn sem(repo: &TempDir, home: &TempDir, args: &[&str]) -> std::process::Output { + Command::new(env!("CARGO_BIN_EXE_sem")) + .current_dir(repo.path()) + .env("HOME", home.path()) + .args(args) + .output() + .expect("sem should run") +} + +#[test] +fn context_and_impact_accept_type_qualified_accessor_queries() { + let repo = TempDir::new().unwrap(); + let home = TempDir::new().unwrap(); + git(&repo, &["init", "-q"]); + + fs::write( + repo.path().join("box.ts"), + r#"export class Box { + private _v = 0; + get value(): number { return this._v; } + set value(n: number) { this._v = n; } +} +"#, + ) + .unwrap(); + + let context = sem( + &repo, + &home, + &[ + "context", + "getter value", + "--file", + "box.ts", + "--json", + "--no-cache", + ], + ); + assert!( + context.status.success(), + "sem context failed\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&context.stdout), + String::from_utf8_lossy(&context.stderr) + ); + let context_json: serde_json::Value = serde_json::from_slice(&context.stdout).unwrap(); + assert_eq!(context_json["entity"].as_str(), Some("value")); + assert_eq!( + context_json["entries"][0]["type"].as_str(), + Some("getter"), + "{context_json:?}" + ); + assert!( + context_json["entityId"] + .as_str() + .is_some_and(|id| id.contains("::value@L3")), + "{context_json:?}" + ); + + let impact = sem( + &repo, + &home, + &[ + "impact", + "setter value", + "--file", + "box.ts", + "--json", + "--no-cache", + ], + ); + assert!( + impact.status.success(), + "sem impact failed\nstdout: {}\nstderr: {}", + String::from_utf8_lossy(&impact.stdout), + String::from_utf8_lossy(&impact.stderr) + ); + let impact_json: serde_json::Value = serde_json::from_slice(&impact.stdout).unwrap(); + assert_eq!(impact_json["entity"]["type"].as_str(), Some("setter")); + assert!( + impact_json["entity"]["entityId"] + .as_str() + .is_some_and(|id| id.contains("::value@L4")), + "{impact_json:?}" + ); +} diff --git a/crates/sem-core/src/parser/differ.rs b/crates/sem-core/src/parser/differ.rs index 366f6735..e60517cb 100644 --- a/crates/sem-core/src/parser/differ.rs +++ b/crates/sem-core/src/parser/differ.rs @@ -250,6 +250,7 @@ fn suppress_redundant_parents( "export", "package", "field", + "variable", "svelte_instance_script", "svelte_module_script", "object", @@ -376,22 +377,74 @@ fn strip_children_content( parent_start_line: usize, children: &[&SemanticEntity], ) -> String { - let lines: Vec<&str> = content.lines().collect(); - let mut excluded: HashSet = HashSet::new(); + let mut line_starts = vec![0]; + for (idx, ch) in content.char_indices() { + if ch == '\n' { + line_starts.push(idx + ch.len_utf8()); + } + } + + let mut excluded_ranges: Vec<(usize, usize)> = Vec::new(); for child in children { let start_idx = child.start_line.saturating_sub(parent_start_line); let end_idx = child.end_line.saturating_sub(parent_start_line); - for i in start_idx..=end_idx.max(start_idx) { - if i < lines.len() { - excluded.insert(i); + let search_start = line_starts.get(start_idx).copied().unwrap_or(0); + let search_end = line_starts + .get(end_idx.saturating_add(1)) + .copied() + .unwrap_or(content.len()) + .min(content.len()); + + if !child.content.is_empty() && search_start <= search_end { + let search_window = &content[search_start..search_end]; + if search_window.starts_with(&child.content) { + excluded_ranges.push((search_start, search_start + child.content.len())); + continue; + } + + if let Some(relative_start) = search_window.find(&child.content) { + let start = search_start + relative_start; + excluded_ranges.push((start, start + child.content.len())); + continue; } } } - lines - .iter() - .enumerate() - .filter(|(i, _)| !excluded.contains(i)) - .map(|(_, l)| l.trim()) + + if excluded_ranges.is_empty() { + return normalize_content_for_parent_suppression(content); + } + + excluded_ranges.sort_unstable(); + let mut merged_ranges: Vec<(usize, usize)> = Vec::new(); + for (start, end) in excluded_ranges { + if let Some((_, merged_end)) = merged_ranges.last_mut() { + if start <= *merged_end { + *merged_end = (*merged_end).max(end); + continue; + } + } + merged_ranges.push((start, end)); + } + + let mut stripped = String::with_capacity(content.len()); + let mut cursor = 0; + for (start, end) in merged_ranges { + if cursor < start { + stripped.push_str(&content[cursor..start]); + } + cursor = end.max(cursor); + } + if cursor < content.len() { + stripped.push_str(&content[cursor..]); + } + + normalize_content_for_parent_suppression(&stripped) +} + +fn normalize_content_for_parent_suppression(content: &str) -> String { + content + .lines() + .map(|l| l.trim()) .filter(|l| !l.is_empty()) .collect::>() .join(" ") @@ -939,6 +992,131 @@ mod tests { ); } + #[test] + fn test_nested_typescript_object_literal_diff_reports_leaf_method() { + let before = r#"export const svc = { + open(): number { return 1; }, + close(): number { return 0; }, +}; +"#; + let after = r#"export const svc = { + open(): number { return 2; }, + close(): number { return 0; }, +}; +"#; + + let registry = create_default_registry(); + let result = compute_semantic_diff( + &[modified_file("service.ts", before, after)], + ®istry, + None, + None, + ); + + let changes: Vec<_> = result + .changes + .iter() + .map(|c| (c.entity_name.as_str(), c.entity_type.as_str())) + .collect(); + assert!( + result + .changes + .iter() + .any(|c| c.entity_id == "service.ts::variable::svc::open"), + "expected object-literal method leaf change, got: {changes:?}" + ); + assert!( + !result + .changes + .iter() + .any(|c| c.entity_name == "svc" && c.entity_type == "variable"), + "variable container should be suppressed when only a nested method changed, got: {changes:?}" + ); + } + + #[test] + fn test_nested_typescript_object_literal_pair_diff_reports_leaf_methods() { + let before = r#"export const svc = { + reset: () => 1, + flush: function() { return 0; }, +}; +"#; + let after = r#"export const svc = { + reset: () => 2, + flush: function() { return 3; }, +}; +"#; + + let registry = create_default_registry(); + let result = compute_semantic_diff( + &[modified_file("service.ts", before, after)], + ®istry, + None, + None, + ); + + let changes: Vec<_> = result + .changes + .iter() + .map(|c| (c.entity_name.as_str(), c.entity_type.as_str())) + .collect(); + assert!( + result + .changes + .iter() + .any(|c| c.entity_id == "service.ts::variable::svc::reset"), + "expected arrow-valued object method change, got: {changes:?}" + ); + assert!( + result + .changes + .iter() + .any(|c| c.entity_id == "service.ts::variable::svc::flush"), + "expected function-valued object method change, got: {changes:?}" + ); + assert!( + !result + .changes + .iter() + .any(|c| c.entity_name == "svc" && c.entity_type == "variable"), + "variable container should be suppressed when only nested function-valued properties changed, got: {changes:?}" + ); + } + + #[test] + fn test_inline_typescript_object_literal_keeps_parent_variable_changes() { + let before = "export const svc = { open() { return 1; }, enabled: true };\n"; + let after = "export let svc = { open() { return 2; }, enabled: false };\n"; + + let registry = create_default_registry(); + let result = compute_semantic_diff( + &[modified_file("service.ts", before, after)], + ®istry, + None, + None, + ); + + let changes: Vec<_> = result + .changes + .iter() + .map(|c| (c.entity_name.as_str(), c.entity_type.as_str())) + .collect(); + assert!( + result + .changes + .iter() + .any(|c| c.entity_id == "service.ts::variable::svc::open"), + "expected nested method change, got: {changes:?}" + ); + assert!( + result + .changes + .iter() + .any(|c| c.entity_name == "svc" && c.entity_type == "variable"), + "parent variable change should remain visible, got: {changes:?}" + ); + } + #[test] fn renamed_file_with_edited_entity_reports_move_not_add_delete() { let before = "def foo():\n return alpha + beta + gamma\n"; diff --git a/crates/sem-core/src/parser/graph.rs b/crates/sem-core/src/parser/graph.rs index bb2d17a7..636e7a1b 100644 --- a/crates/sem-core/src/parser/graph.rs +++ b/crates/sem-core/src/parser/graph.rs @@ -33,7 +33,10 @@ macro_rules! maybe_par_iter { use crate::git::types::{FileChange, FileStatus}; use crate::model::entity::SemanticEntity; -use crate::parser::import_resolution::{find_import_target, import_source_matches_file}; +use crate::parser::import_resolution::{ + find_import_file, find_import_target, import_source_matches_file, is_js_ts_file, + sort_import_candidate_files, JS_TS_EXTENSIONS, +}; use crate::parser::registry::{resolve_go_method_parent_ids, ParserRegistry}; use crate::parser::scope_resolve; @@ -420,7 +423,7 @@ impl FileReferenceIndex { RefType::Imports } else { RefType::TypeRef - }; + }; (word, ref_type) }) .collect() @@ -737,9 +740,14 @@ impl EntityGraph { let mut enclosing_class: HashMap<&str, &str> = HashMap::new(); let mut class_members: HashMap<&str, Vec<(&str, &str)>> = HashMap::new(); let mut scope_class_members: HashMap> = HashMap::new(); + let mut scope_owner_members: HashMap> = HashMap::new(); for entity in &all_entities { if let Some(ref pid) = entity.parent_id { + scope_owner_members + .entry(pid.clone()) + .or_default() + .push((entity.name.clone(), entity.id.clone())); if let Some(&parent_name) = id_to_name.get(pid.as_str()) { if class_entity_names.contains(parent_name) { enclosing_class.insert(entity.id.as_str(), parent_name); @@ -825,6 +833,7 @@ impl EntityGraph { let pre_built = scope_resolve::PreBuiltLookups { symbol_table: Arc::clone(&symbol_table), class_members: scope_class_members, + owner_members: scope_owner_members, entity_ranges: scope_entity_ranges, go_pkg_index: owned_go_pkg_index, }; @@ -846,6 +855,7 @@ impl EntityGraph { &entity_map, Some(parsed_files), Some(pre_built), + Some(&import_table), ); let consumed_words = build_scope_consumed_words(&result.resolution_log); (result.edges, consumed_words) @@ -870,7 +880,7 @@ impl EntityGraph { crate::parser::plugins::code::languages::get_language_config(ext) else { return vec![]; - }; + }; let fallback_end_line = fallback_reference_end_line(entity, language_config.scope_resolve.is_some()); let fallback_ranges = @@ -921,7 +931,7 @@ impl EntityGraph { }) .collect() } - }; + }; for (receiver, member, position) in &dot_chains { if consumed_words.contains(*member) { @@ -1063,8 +1073,11 @@ impl EntityGraph { }) .collect(); + let export_edges = build_export_alias_edges(&all_entities, &import_table); + // Merge scope edges with bag-of-words edges, deduplicating let mut combined: Vec<(String, String, RefType)> = scope_edges; + combined.extend(export_edges); combined.extend(resolved_refs); let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len()); let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len()); @@ -1609,6 +1622,7 @@ impl EntityGraph { &entity_map, pre, None, + Some(&import_table), ); let consumed_words = build_scope_consumed_words(&result.resolution_log); (result.edges, consumed_words) @@ -1680,7 +1694,7 @@ impl EntityGraph { }) .collect() } - }; + }; for (receiver, member, position) in &dot_chains { if consumed_words.contains(*member) { @@ -1813,8 +1827,11 @@ impl EntityGraph { }) .collect(); + let export_edges = build_export_alias_edges(&all_entities, &import_table); + // Merge scope edges + bag-of-words edges + kept cached edges let mut combined: Vec<(String, String, RefType)> = scope_edges; + combined.extend(export_edges); combined.extend(resolved_refs); let mut seen_edges: HashSet<(String, String)> = HashSet::with_capacity(combined.len()); let mut all_resolved: Vec<(String, String, RefType)> = Vec::with_capacity(combined.len()); @@ -2358,6 +2375,274 @@ fn is_test_entity(entity: &crate::model::entity::SemanticEntity) -> bool { in_test_file && has_test_marker } +fn build_export_alias_edges( + all_entities: &[SemanticEntity], + import_table: &HashMap<(String, String), String>, +) -> Vec<(String, String, RefType)> { + all_entities + .iter() + .filter(|entity| entity.entity_type == "export") + .filter_map(|entity| { + let key = (entity.file_path.clone(), entity.name.clone()); + let target_id = import_table.get(&key)?; + if target_id == &entity.id { + return None; + } + Some((entity.id.clone(), target_id.clone(), RefType::Imports)) + }) + .collect() +} + +struct TsDefaultExportTable { + exports_by_file: HashMap, + sorted_files: Vec, +} + +struct TsDefaultReExport { + file_path: String, + original_name: String, + module_path: String, +} + +fn build_ts_default_export_table( + file_paths: &[String], + symbol_table: &HashMap>, + entity_map: &HashMap, + content_map: &HashMap<&str, &str>, +) -> TsDefaultExportTable { + let mut default_exports = HashMap::new(); + let mut re_exports = Vec::new(); + + for file_path in file_paths { + if !is_js_ts_file(file_path) { + continue; + } + + let Some(content) = content_map.get(file_path.as_str()).copied() else { + continue; + }; + + for name in default_export_names_from_content(content) { + let Some(target_ids) = symbol_table.get(name.as_str()) else { + continue; + }; + let target = target_ids.iter().find(|id| { + entity_map.get(*id).map_or(false, |entity| { + entity.file_path == *file_path && entity.parent_id.is_none() + }) + }); + if let Some(target_id) = target { + default_exports.insert(file_path.clone(), target_id.clone()); + } + } + + for (original_name, module_path) in default_re_exports_from_content(content) { + re_exports.push(TsDefaultReExport { + file_path: file_path.clone(), + original_name, + module_path, + }); + } + } + + resolve_ts_default_re_exports(&mut default_exports, re_exports, symbol_table, entity_map); + + let sorted_files = sorted_default_export_files(&default_exports); + + TsDefaultExportTable { + exports_by_file: default_exports, + sorted_files, + } +} + +fn sorted_default_export_files(default_exports: &HashMap) -> Vec { + let mut sorted_files: Vec = default_exports.keys().cloned().collect(); + sort_import_candidate_files(&mut sorted_files, JS_TS_EXTENSIONS); + sorted_files +} + +fn resolve_ts_default_re_exports( + default_exports: &mut HashMap, + pending: Vec, + symbol_table: &HashMap>, + entity_map: &HashMap, +) { + let mut pending = pending; + while !pending.is_empty() { + let sorted_files = sorted_default_export_files(default_exports); + let mut unresolved = Vec::new(); + let mut progressed = false; + + for re_export in pending { + let target_id = if re_export.original_name == "default" { + find_import_file( + &sorted_files, + &re_export.module_path, + &re_export.file_path, + JS_TS_EXTENSIONS, + ) + .and_then(|target_file| default_exports.get(target_file)) + .cloned() + } else { + symbol_table + .get(&re_export.original_name) + .and_then(|target_ids| { + find_import_target( + target_ids, + &re_export.module_path, + &re_export.file_path, + JS_TS_EXTENSIONS, + entity_map, + ) + .cloned() + }) + }; + + if let Some(target_id) = target_id { + default_exports.insert(re_export.file_path, target_id); + progressed = true; + } else { + unresolved.push(re_export); + } + } + + if !progressed { + break; + } + pending = unresolved; + } +} + +fn default_export_names_from_content(content: &str) -> Vec { + static DEFAULT_FUNCTION_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"\bexport\s+default\s+(?:async\s+)?function\s*\*?\s+([A-Za-z_$][\w$]*)") + .unwrap() + }); + static DEFAULT_CLASS_RE: LazyLock = LazyLock::new(|| { + Regex::new(r"\bexport\s+default\s+(?:abstract\s+)?class\s+([A-Za-z_$][\w$]*)").unwrap() + }); + static DEFAULT_IDENTIFIER_RE: LazyLock = + LazyLock::new(|| Regex::new(r"\bexport\s+default\s+([A-Za-z_$][\w$]*)").unwrap()); + static DEFAULT_SPECIFIER_RE: LazyLock = + LazyLock::new(|| Regex::new(r#"export\s+(?:type\s+)?\{([^}]+)\}\s*;?"#).unwrap()); + + let mut names = Vec::new(); + for cap in DEFAULT_FUNCTION_RE.captures_iter(content) { + names.push(cap.get(1).unwrap().as_str().to_string()); + } + for cap in DEFAULT_CLASS_RE.captures_iter(content) { + names.push(cap.get(1).unwrap().as_str().to_string()); + } + for cap in DEFAULT_IDENTIFIER_RE.captures_iter(content) { + let name = cap.get(1).unwrap(); + let line_tail = content[name.end()..] + .split_once('\n') + .map_or(&content[name.end()..], |(line, _)| line); + if only_js_ts_statement_trivia(line_tail) { + names.push(name.as_str().to_string()); + } + } + for cap in DEFAULT_SPECIFIER_RE.captures_iter(content) { + let rest = content[cap.get(0).unwrap().end()..].trim_start(); + if rest.starts_with("from ") { + continue; + } + let names_str = cap.get(1).unwrap().as_str(); + for name_part in names_str.split(',') { + let Some((original_name, local_name)) = parse_js_ts_import_specifier(name_part) else { + continue; + }; + if local_name == "default" { + names.push(original_name.to_string()); + } + } + } + + names +} + +fn default_re_exports_from_content(content: &str) -> Vec<(String, String)> { + static REEXPORT_SPECIFIER_RE: LazyLock = LazyLock::new(|| { + Regex::new(r#"export\s+(?:type\s+)?\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap() + }); + + let mut re_exports = Vec::new(); + for cap in REEXPORT_SPECIFIER_RE.captures_iter(content) { + let names_str = cap.get(1).unwrap().as_str(); + let module_path = cap.get(2).unwrap().as_str(); + for name_part in names_str.split(',') { + let Some((original_name, local_name)) = parse_js_ts_import_specifier(name_part) else { + continue; + }; + if local_name == "default" { + re_exports.push((original_name.to_string(), module_path.to_string())); + } + } + } + re_exports +} + +fn only_js_ts_statement_trivia(mut text: &str) -> bool { + loop { + text = text.trim_start(); + if let Some(rest) = text.strip_prefix(';') { + text = rest; + continue; + } + if text.is_empty() { + return true; + } + if text.starts_with("//") { + return true; + } + if let Some(rest) = text.strip_prefix("/*") { + let Some(end) = rest.find("*/") else { + return false; + }; + text = &rest[end + 2..]; + continue; + } + return false; + } +} + +fn resolve_default_export_target( + default_exports: &TsDefaultExportTable, + module_path: &str, + file_path: &str, +) -> Option { + let target_file = find_import_file( + &default_exports.sorted_files, + module_path, + file_path, + JS_TS_EXTENSIONS, + )?; + default_exports.exports_by_file.get(target_file).cloned() +} + +fn parse_js_ts_import_specifier(name_part: &str) -> Option<(&str, &str)> { + let name_part = name_part.trim(); + if name_part.is_empty() { + return None; + } + + let (original, local) = if let Some(pos) = name_part.find(" as ") { + let original = name_part[..pos].trim(); + let local = name_part[pos + 4..].trim(); + (original, local) + } else { + (name_part, name_part) + }; + + let original = original.strip_prefix("type ").unwrap_or(original).trim(); + let local = local.strip_prefix("type ").unwrap_or(local).trim(); + if original.is_empty() || local.is_empty() { + return None; + } + + Some((original, local)) +} + /// Build import table: maps (file_path, imported_name) → target entity ID. /// /// Parses `from X import Y` / `import X` / `use X` style statements from entity content @@ -2370,14 +2655,30 @@ fn build_import_table( pre_parsed_content: Option<&[(String, String, tree_sitter::Tree)]>, ) -> HashMap<(String, String), String> { // Build a content lookup from pre-parsed files to avoid re-reading from disk - let content_map: HashMap<&str, &str> = pre_parsed_content - .map(|files| { + let mut content_map: HashMap<&str, &str> = HashMap::new(); + if let Some(files) = pre_parsed_content { + content_map.extend( files .iter() - .map(|(fp, content, _)| (fp.as_str(), content.as_str())) - .collect() - }) - .unwrap_or_default(); + .map(|(fp, content, _)| (fp.as_str(), content.as_str())), + ); + } + let mut owned_content: HashMap = HashMap::new(); + for file_path in file_paths { + if file_path.ends_with(".go") || content_map.contains_key(file_path.as_str()) { + continue; + } + if let Ok(content) = std::fs::read_to_string(root.join(file_path)) { + owned_content.insert(file_path.clone(), content); + } + } + content_map.extend( + owned_content + .iter() + .map(|(file_path, content)| (file_path.as_str(), content.as_str())), + ); + let ts_default_exports = + build_ts_default_export_table(file_paths, symbol_table, entity_map, &content_map); // Go imports are handled entirely by the scope resolver (which uses an indexed approach). // We no longer need a go_pkg_index here since Go files are skipped below. @@ -2390,17 +2691,8 @@ fn build_import_table( return None; } - // Use pre-parsed content if available, otherwise read from disk - let owned_content: Option; - let content: &str = if let Some(c) = content_map.get(file_path.as_str()) { - c - } else { - let full_path = root.join(file_path); - owned_content = std::fs::read_to_string(&full_path).ok(); - match owned_content.as_deref() { - Some(c) => c, - None => return None, - } + let Some(content) = content_map.get(file_path.as_str()).copied() else { + return None; }; let mut local_imports: Vec<((String, String), String)> = Vec::new(); @@ -2487,15 +2779,24 @@ fn build_import_table( // JS/TS imports: import { foo, bar as baz } from './module' // import Foo from './module' - let is_js_ts = file_path.ends_with(".js") || file_path.ends_with(".ts") - || file_path.ends_with(".jsx") || file_path.ends_with(".tsx"); + let is_js_ts = is_js_ts_file(file_path); if is_js_ts { static JS_NAMED_RE: LazyLock = LazyLock::new(|| { - Regex::new(r#"import\s*\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#).unwrap() + Regex::new( + r#"import\s+(?:type\s+)?(?:[A-Za-z_$][\w$]*\s*,\s*)?\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#, + ) + .unwrap() }); static JS_DEFAULT_RE: LazyLock = LazyLock::new(|| { - Regex::new(r#"import\s+(?:type\s+)?([A-Za-z_]\w*)\s+from\s*['"]([^'"]+)['"]"#).unwrap() + Regex::new( + r#"import\s+(?:type\s+)?([A-Za-z_$][\w$]*)(?:\s*,\s*\{[^}]*\})?\s*from\s*['"]([^'"]+)['"]"#, + ) + .unwrap() + }); + static JS_REEXPORT_RE: LazyLock = LazyLock::new(|| { + Regex::new(r#"export\s+(?:type\s+)?\{([^}]+)\}\s*from\s*['"]([^'"]+)['"]"#) + .unwrap() }); for cap in JS_NAMED_RE.captures_iter(content) { @@ -2503,28 +2804,18 @@ fn build_import_table( let module_path = cap.get(2).unwrap().as_str(); for name_part in names_str.split(',') { - let name_part = name_part.trim(); - if name_part.is_empty() { continue; } - - // Handle "foo as bar" aliases and "type foo" prefixes - let (original_name, local_name) = if let Some(pos) = name_part.find(" as ") { - let orig = name_part[..pos].trim(); - let local = name_part[pos + 4..].trim(); - let orig = orig.strip_prefix("type ").unwrap_or(orig); - (orig, local) - } else { - let name = name_part.strip_prefix("type ").unwrap_or(name_part); - (name, name) + let Some((original_name, local_name)) = + parse_js_ts_import_specifier(name_part) + else { + continue; }; - if original_name.is_empty() || local_name.is_empty() { continue; } - if let Some(target_ids) = symbol_table.get(original_name) { let target = find_import_target( target_ids, module_path, file_path, - &[".ts", ".tsx", ".js", ".jsx"], + JS_TS_EXTENSIONS, entity_map, ); if let Some(target_id) = target { @@ -2541,18 +2832,50 @@ fn build_import_table( let local_name = cap.get(1).unwrap().as_str(); let module_path = cap.get(2).unwrap().as_str(); - if let Some(target_ids) = symbol_table.get(local_name) { - let target = find_import_target( - target_ids, - module_path, - file_path, - &[".ts", ".tsx", ".js", ".jsx"], - entity_map, - ); - if let Some(target_id) = target { + if let Some(target_id) = + resolve_default_export_target(&ts_default_exports, module_path, file_path) + { + local_imports.push(( + (file_path.clone(), local_name.to_string()), + target_id, + )); + } + } + + for cap in JS_REEXPORT_RE.captures_iter(content) { + let names_str = cap.get(1).unwrap().as_str(); + let module_path = cap.get(2).unwrap().as_str(); + + for name_part in names_str.split(',') { + let Some((original_name, local_name)) = + parse_js_ts_import_specifier(name_part) + else { + continue; + }; + + let target_id = if original_name == "default" { + resolve_default_export_target( + &ts_default_exports, + module_path, + file_path, + ) + } else { + symbol_table.get(original_name).and_then(|target_ids| { + find_import_target( + target_ids, + module_path, + file_path, + JS_TS_EXTENSIONS, + entity_map, + ) + .cloned() + }) + }; + + if let Some(target_id) = target_id { local_imports.push(( (file_path.clone(), local_name.to_string()), - target_id.clone(), + target_id, )); } } @@ -5384,6 +5707,171 @@ export function caller() { return helper(); } ); } + #[test] + fn test_js_ts_default_import_resolves_static_member() { + let (dir, registry) = create_test_repo(); + let root = dir.path(); + + write_file( + root, + "base.ts", + "\ +export default class Widget { + static make(): string { return 'ok'; } +} +", + ); + write_file( + root, + "consumer.ts", + "\ +import RenamedWidget from './base'; +export function useWidget(): string { return RenamedWidget.make(); } +", + ); + + let (graph, _) = + EntityGraph::build(root, &["base.ts".into(), "consumer.ts".into()], ®istry); + + let use_widget_id = graph + .entities + .keys() + .find(|id| id.contains("useWidget")) + .expect("useWidget entity should exist"); + let deps = graph.get_dependencies(use_widget_id); + assert!( + deps.iter() + .any(|d| d.name == "make" && d.file_path == "base.ts"), + "default import alias should resolve the static member. Deps: {:?}", + deps.iter() + .map(|d| (&d.name, &d.file_path)) + .collect::>() + ); + } + + #[test] + fn test_js_ts_re_export_alias_resolves_through_barrel() { + let (dir, registry) = create_test_repo(); + let root = dir.path(); + + write_file( + root, + "lib.ts", + "\ +export function core(): string { return 'core'; } +", + ); + write_file( + root, + "barrel.ts", + "\ +export { core as publicCore } from './lib'; +", + ); + write_file( + root, + "consumer.ts", + "\ +import { publicCore } from './barrel'; +export function usePublicCore(): string { return publicCore(); } +", + ); + + let (graph, _) = EntityGraph::build( + root, + &["lib.ts".into(), "barrel.ts".into(), "consumer.ts".into()], + ®istry, + ); + + let public_core = graph + .entities + .values() + .find(|entity| { + entity.name == "publicCore" + && entity.file_path == "barrel.ts" + && entity.entity_type == "export" + }) + .expect("barrel export alias entity should exist"); + let alias_deps = graph.get_dependencies(&public_core.id); + assert!( + alias_deps + .iter() + .any(|d| d.name == "core" && d.file_path == "lib.ts"), + "barrel export alias should depend on lib.ts core. Deps: {:?}", + alias_deps + .iter() + .map(|d| (&d.name, &d.file_path)) + .collect::>() + ); + + let use_public_core_id = graph + .entities + .keys() + .find(|id| id.contains("usePublicCore")) + .expect("usePublicCore entity should exist"); + let consumer_deps = graph.get_dependencies(use_public_core_id); + assert!( + consumer_deps + .iter() + .any(|d| d.name == "publicCore" && d.file_path == "barrel.ts"), + "consumer should resolve publicCore through the barrel export. Deps: {:?}", + consumer_deps + .iter() + .map(|d| (&d.name, &d.file_path)) + .collect::>() + ); + } + + #[test] + fn test_js_ts_object_literal_receiver_resolves_owned_member() { + let (dir, registry) = create_test_repo(); + let root = dir.path(); + + write_file( + root, + "service.ts", + "\ +export const other = { + open() { return 'other'; } +}; +export const svc = { + open() { return 'svc'; } +}; +export function run(): string { + return svc.open(); +} +", + ); + + let (graph, _) = EntityGraph::build(root, &["service.ts".into()], ®istry); + + let run_id = graph + .entities + .keys() + .find(|id| id.contains("run")) + .expect("run entity should exist"); + let deps = graph.get_dependencies(run_id); + assert!( + deps.iter() + .any(|d| d.name == "open" + && d.parent_id.as_deref().is_some_and(|id| id.contains("svc"))), + "svc.open() should resolve to the object literal member owned by svc. Deps: {:?}", + deps.iter() + .map(|d| (&d.name, &d.parent_id)) + .collect::>() + ); + assert!( + !deps.iter().any(|d| d.name == "open" + && d.parent_id + .as_deref() + .is_some_and(|id| id.contains("other"))), + "svc.open() should not resolve to another object literal member. Deps: {:?}", + deps.iter() + .map(|d| (&d.name, &d.parent_id)) + .collect::>() + ); + } + #[test] fn test_python_relative_import_resolution_uses_full_path() { let (dir, registry) = create_test_repo(); diff --git a/crates/sem-core/src/parser/import_resolution.rs b/crates/sem-core/src/parser/import_resolution.rs index 4f0ccf5e..2338c5b6 100644 --- a/crates/sem-core/src/parser/import_resolution.rs +++ b/crates/sem-core/src/parser/import_resolution.rs @@ -3,6 +3,32 @@ use std::path::{Component, Path, PathBuf}; use crate::parser::graph::EntityInfo; +pub(crate) const JS_TS_EXTENSIONS: &[&str] = + &[".ts", ".tsx", ".mts", ".cts", ".js", ".jsx", ".mjs", ".cjs"]; + +pub(crate) fn is_js_ts_file(file_path: &str) -> bool { + JS_TS_EXTENSIONS + .iter() + .any(|extension| file_path.ends_with(extension)) +} + +pub(crate) fn sort_import_candidate_files>(paths: &mut [P], extensions: &[&str]) { + paths.sort_by(|left, right| { + let left = left.as_ref(); + let right = right.as_ref(); + extension_priority(left, extensions) + .cmp(&extension_priority(right, extensions)) + .then_with(|| left.cmp(right)) + }); +} + +fn extension_priority(file_path: &str, extensions: &[&str]) -> usize { + extensions + .iter() + .position(|extension| file_path.ends_with(extension)) + .unwrap_or(extensions.len()) +} + pub(crate) fn find_import_target<'a>( target_ids: &'a [String], source_path: &str, @@ -10,22 +36,40 @@ pub(crate) fn find_import_target<'a>( extensions: &[&str], entity_map: &HashMap, ) -> Option<&'a String> { + let target_files: Vec<&str> = target_ids + .iter() + .filter_map(|id| entity_map.get(id).map(|entity| entity.file_path.as_str())) + .collect(); + let target_file = find_import_file(&target_files, source_path, file_path, extensions)?; + + target_ids.iter().find(|id| { + entity_map + .get(*id) + .map_or(false, |entity| entity.file_path == target_file) + }) +} + +pub(crate) fn find_import_file<'a, P: AsRef>( + candidate_file_paths: &'a [P], + source_path: &str, + file_path: &str, + extensions: &[&str], +) -> Option<&'a str> { if let Some(candidates) = import_file_candidates(file_path, source_path, extensions) { return candidates.iter().find_map(|candidate_path| { - target_ids.iter().find(|id| { - entity_map - .get(*id) - .map_or(false, |e| e.file_path == *candidate_path) - }) + candidate_file_paths + .iter() + .map(AsRef::as_ref) + .find(|path| *path == candidate_path.as_str()) }); } let source_module = import_stem(source_path); - target_ids.iter().find(|id| { - entity_map - .get(*id) - .map_or(false, |e| file_stem(&e.file_path) == source_module) - }) + let mut candidates: Vec<&'a str> = candidate_file_paths.iter().map(AsRef::as_ref).collect(); + sort_import_candidate_files(&mut candidates, extensions); + candidates + .into_iter() + .find(|path| file_stem(path) == source_module) } pub(crate) fn import_source_matches_file( @@ -143,8 +187,12 @@ fn file_stem(file_path: &str) -> &str { file_name .strip_suffix(".py") .or_else(|| file_name.strip_suffix(".rs")) + .or_else(|| file_name.strip_suffix(".mts")) + .or_else(|| file_name.strip_suffix(".cts")) .or_else(|| file_name.strip_suffix(".ts")) .or_else(|| file_name.strip_suffix(".tsx")) + .or_else(|| file_name.strip_suffix(".mjs")) + .or_else(|| file_name.strip_suffix(".cjs")) .or_else(|| file_name.strip_suffix(".js")) .or_else(|| file_name.strip_suffix(".jsx")) .or_else(|| file_name.strip_suffix(".go")) @@ -182,7 +230,7 @@ mod tests { &ids, "./util.ts", "src/main.ts", - &[".ts", ".tsx", ".js", ".jsx"], + JS_TS_EXTENSIONS, &entity_map, ); @@ -198,7 +246,7 @@ mod tests { &ids, "./util.ts", "src/main.ts", - &[".ts", ".tsx", ".js", ".jsx"], + JS_TS_EXTENSIONS, &entity_map, ); @@ -230,10 +278,53 @@ mod tests { &ids, "util.ts", "src/main.ts", - &[".ts", ".tsx", ".js", ".jsx"], + JS_TS_EXTENSIONS, &entity_map, ); assert_eq!(target, Some(&ids[0])); } + + #[test] + fn bare_import_prefers_ordered_module_variant() { + let ids = vec![ + "lib.js::function::helper".to_string(), + "lib.ts::function::helper".to_string(), + ]; + let entity_map = HashMap::from([ + (ids[0].clone(), entity("lib.js")), + (ids[1].clone(), entity("lib.ts")), + ]); + + let target = find_import_target(&ids, "lib", "consumer.ts", JS_TS_EXTENSIONS, &entity_map); + + assert_eq!(target, Some(&ids[1])); + } + + #[test] + fn explicit_relative_import_resolves_module_variants_before_same_named_ts() { + for extension in [".mts", ".cts", ".mjs", ".cjs"] { + let ids = vec![ + "src/config.ts::function::helper".to_string(), + format!("src/deep/config{extension}::function::helper"), + ]; + let entity_map = HashMap::from([ + (ids[0].clone(), entity("src/config.ts")), + ( + ids[1].clone(), + entity(&format!("src/deep/config{extension}")), + ), + ]); + + let target = find_import_target( + &ids, + &format!("./deep/config{extension}"), + "src/main.ts", + JS_TS_EXTENSIONS, + &entity_map, + ); + + assert_eq!(target, Some(&ids[1]), "extension: {extension}"); + } + } } diff --git a/crates/sem-core/src/parser/plugins/code/entity_extractor.rs b/crates/sem-core/src/parser/plugins/code/entity_extractor.rs index e61faeaf..8bb24c82 100644 --- a/crates/sem-core/src/parser/plugins/code/entity_extractor.rs +++ b/crates/sem-core/src/parser/plugins/code/entity_extractor.rs @@ -1,14 +1,12 @@ use tree_sitter::{Node, Tree}; -use std::collections::{HashMap, HashSet}; +use super::languages::LanguageConfig; use crate::model::entity::{ - build_entity_id, - build_entity_id_disambiguated, - build_entity_id_disambiguated_with_ordinal, + build_entity_id, build_entity_id_disambiguated, build_entity_id_disambiguated_with_ordinal, SemanticEntity, }; use crate::utils::hash::{content_hash, structural_hash, structural_hash_excluding_range}; -use super::languages::LanguageConfig; +use std::collections::{HashMap, HashSet}; pub fn extract_entities( tree: &Tree, @@ -176,7 +174,11 @@ fn select_rewritten_parent_id( continue; } let parent = &entities[*parent_idx]; - let same_file_rank = if parent.file_path == child.file_path { 0 } else { 1 }; + let same_file_rank = if parent.file_path == child.file_path { + 0 + } else { + 1 + }; let before_rank = if *parent_idx < child_idx { 0 } else { 1 }; let line_span_contains_child = parent.start_line <= child.start_line && child.end_line <= parent.end_line; @@ -265,6 +267,8 @@ fn visit_node( root_parent_id.map(str::to_owned), root_suppression.map(str::to_owned), )]; + let mut ts_implementation_names_by_scope: HashMap<(usize, usize), HashSet> = + HashMap::new(); while let Some((node, pid_owned, sup_owned)) = worklist.pop() { let parent_id = pid_owned.as_deref(); @@ -314,7 +318,8 @@ fn visit_node( // Extract each binding as a separate entity. if node_type == "value_definition" && config.entity_node_types.contains(&node_type) { let mut cursor = node.walk(); - let bindings: Vec<_> = node.named_children(&mut cursor) + let bindings: Vec<_> = node + .named_children(&mut cursor) .filter(|c| c.kind() == "let_binding") .collect(); if !bindings.is_empty() { @@ -347,26 +352,53 @@ fn visit_node( if node_type == "module_definition" && config.entity_node_types.contains(&node_type) { let extracted = extract_ocaml_named_bindings( - node, "module_binding", "module_name", - map_node_type(node_type), file_path, parent_id, source, config, entities, + node, + "module_binding", + "module_name", + map_node_type(node_type), + file_path, + parent_id, + source, + config, + entities, ); - if extracted { continue; } + if extracted { + continue; + } } if node_type == "class_definition" && config.entity_node_types.contains(&node_type) { let extracted = extract_ocaml_named_bindings( - node, "class_binding", "class_name", - map_node_type(node_type), file_path, parent_id, source, config, entities, + node, + "class_binding", + "class_name", + map_node_type(node_type), + file_path, + parent_id, + source, + config, + entities, ); - if extracted { continue; } + if extracted { + continue; + } } if node_type == "class_type_definition" && config.entity_node_types.contains(&node_type) { let extracted = extract_ocaml_named_bindings( - node, "class_type_binding", "class_type_name", - map_node_type(node_type), file_path, parent_id, source, config, entities, + node, + "class_type_binding", + "class_type_name", + map_node_type(node_type), + file_path, + parent_id, + source, + config, + entities, ); - if extracted { continue; } + if extracted { + continue; + } } // TypeScript/JS multi-declarator: `const a = 1, b = 2` should produce @@ -384,48 +416,47 @@ fn visit_node( let skip_declaration = should_skip_entity(config, suppression_context, node_type); let mut initializer_children = Vec::new(); for declarator in &declarators { - let emitted_entity_id = if let Some(name_node) = - declarator.child_by_field_name("name") - { - let entity_type = - map_js_ts_declarator_entity_type(node, *declarator, config); - if !skip_declaration || entity_type == "function" { - let name = node_text(name_node, source).to_string(); - let content = node_text(*declarator, source).to_string(); - let struct_hash = compute_structural_hash(*declarator, source); - let entity = SemanticEntity { - id: build_entity_id(file_path, entity_type, &name, parent_id), - file_path: file_path.to_string(), - entity_type: entity_type.to_string(), - name, - parent_id: parent_id.map(String::from), - content_hash: content_hash(&content), - structural_hash: Some(struct_hash), - content, - start_line: declarator.start_position().row + 1, - end_line: declarator.end_position().row + 1, - metadata: None, - }; - - let entity_id = entity.id.clone(); - entities.push(entity); - Some(entity_id) + let emitted_entity_id = + if let Some(name_node) = declarator.child_by_field_name("name") { + let entity_type = + map_js_ts_declarator_entity_type(node, *declarator, config); + if !skip_declaration || entity_type == "function" { + let name = node_text(name_node, source).to_string(); + let content = node_text(*declarator, source).to_string(); + let struct_hash = compute_structural_hash(*declarator, source); + let entity = SemanticEntity { + id: build_entity_id(file_path, entity_type, &name, parent_id), + file_path: file_path.to_string(), + entity_type: entity_type.to_string(), + name, + parent_id: parent_id.map(String::from), + content_hash: content_hash(&content), + structural_hash: Some(struct_hash), + content, + start_line: declarator.start_position().row + 1, + end_line: declarator.end_position().row + 1, + metadata: None, + }; + + let entity_id = entity.id.clone(); + entities.push(entity); + Some(entity_id) + } else { + None + } } else { None - } - } else { - None - }; + }; // Suppressed local declarators do not have an entity of // their own, so their initializer is traversed under the // surrounding parent, matching the single-declarator path. let initializer_parent = emitted_entity_id.as_deref().or(parent_id); - if let Some(initializer_child) = - js_ts_initializer_child(config, *declarator, initializer_parent) - { - initializer_children.push(initializer_child); - } + initializer_children.extend(js_ts_initializer_children( + config, + *declarator, + initializer_parent, + )); } // The worklist is LIFO; push in reverse so initializers are // visited in source order. @@ -496,7 +527,9 @@ fn visit_node( } // JS/TS test call expressions: describe("name", () => {}), test(...), it(...), etc. - if node_type == "call_expression" && matches!(config.id, "typescript" | "tsx" | "javascript") { + if node_type == "call_expression" + && matches!(config.id, "typescript" | "tsx" | "javascript") + { if let Some((test_name, test_entity_type, is_container)) = extract_js_test_call(node, source) { @@ -536,6 +569,40 @@ fn visit_node( } } + if should_skip_ts_overload_signature( + node, + config, + source, + &mut ts_implementation_names_by_scope, + ) { + continue; + } + + if let Some((name, value)) = + extract_js_ts_object_function_pair(node, config, source, suppression_context) + { + let content = node_text(node, source).to_string(); + let struct_hash = compute_structural_hash(node, source); + let entity = SemanticEntity { + id: build_entity_id(file_path, "method", &name, parent_id), + file_path: file_path.to_string(), + entity_type: "method".to_string(), + name, + parent_id: parent_id.map(String::from), + content_hash: content_hash(&content), + structural_hash: Some(struct_hash), + content, + start_line: node.start_position().row + 1, + end_line: node.end_position().row + 1, + metadata: None, + }; + + let entity_id = entity.id.clone(); + entities.push(entity); + worklist.push((value, Some(entity_id), Some(value.kind().to_string()))); + continue; + } + if config.entity_node_types.contains(&node_type) { if let Some(name) = extract_name(node, source) { let name = qualify_hcl_name(&name, node_type, parent_id, suppression_context); @@ -545,17 +612,22 @@ fn visit_node( if !should_skip { // Go method_declaration: extract receiver type for parent linkage. // e.g. `func (t *Transaction) Execute(...)` -> parent is Transaction struct - let effective_parent = if node_type == "method_declaration" && parent_id.is_none() { - extract_go_receiver_struct(node, source, file_path, entities) - } else { - None - }; + let effective_parent = + if node_type == "method_declaration" && parent_id.is_none() { + extract_go_receiver_struct(node, source, file_path, entities) + } else { + None + }; let parent_ref = effective_parent.as_deref().or(parent_id); // Dart top-level signatures are split from their body node. // When a sibling function_body exists, extend the entity to // cover the full definition so body changes are detected. - let body = if config.id == "dart" { sibling_function_body(node) } else { None }; + let body = if config.id == "dart" { + sibling_function_body(node) + } else { + None + }; let end_byte = body.map_or(node.end_byte(), |b| b.end_byte()); let end_line = body.map_or(node.end_position().row + 1, |b| b.end_position().row + 1); @@ -563,11 +635,10 @@ fn visit_node( // Extend start backward to include outer attributes (e.g. Rust // #[derive(...)], #[cfg(...)], #[test]) so attribute changes // are captured as part of the entity diff. - let (start_byte, start_line) = - preceding_attributes_start(node, config).map_or( - (node.start_byte(), node.start_position().row + 1), - |(sb, sr)| (sb, sr + 1), - ); + let (start_byte, start_line) = preceding_attributes_start(node, config).map_or( + (node.start_byte(), node.start_position().row + 1), + |(sb, sr)| (sb, sr + 1), + ); let content = std::str::from_utf8(&source[start_byte..end_byte]) .unwrap_or("") @@ -606,7 +677,11 @@ fn visit_node( let mut inner_cursor = child.walk(); let nested: Vec<_> = child.named_children(&mut inner_cursor).collect(); for n in nested.into_iter().rev() { - worklist.push((n, Some(entity_id.clone()), next_suppression.clone())); + worklist.push(( + n, + Some(entity_id.clone()), + next_suppression.clone(), + )); } } } @@ -636,6 +711,15 @@ fn visit_node( } } + if node_type == "export_statement" + && matches!(config.id, "typescript" | "tsx" | "javascript") + && node.child_by_field_name("source").is_some() + { + if emit_js_ts_re_export_entities(node, file_path, parent_id, source, entities) { + continue; + } + } + // For export statements, look inside for the actual declaration if node_type == "export_statement" { if let Some(declaration) = node.child_by_field_name("declaration") { @@ -664,6 +748,80 @@ fn visit_node( } } +fn emit_js_ts_re_export_entities( + node: Node, + file_path: &str, + parent_id: Option<&str>, + source: &[u8], + entities: &mut Vec, +) -> bool { + let source_path = node + .child_by_field_name("source") + .and_then(|n| n.utf8_text(source).ok()) + .unwrap_or("") + .trim_matches(|c: char| c == '\'' || c == '"'); + if source_path.is_empty() { + return false; + } + + let mut emitted = false; + let mut worklist = vec![node]; + while let Some(current) = worklist.pop() { + let mut cursor = current.walk(); + for child in current.named_children(&mut cursor) { + if child.kind() == "export_specifier" { + let original = child + .child_by_field_name("name") + .and_then(|n| n.utf8_text(source).ok()) + .map(clean_js_ts_export_name) + .unwrap_or_default(); + let local = child + .child_by_field_name("alias") + .and_then(|n| n.utf8_text(source).ok()) + .map(clean_js_ts_export_name) + .unwrap_or_else(|| original.clone()); + + if local.is_empty() { + continue; + } + + let content = node_text(node, source).to_string(); + let mut metadata = HashMap::new(); + metadata.insert("export.source".to_string(), source_path.to_string()); + if !original.is_empty() { + metadata.insert("export.original".to_string(), original); + } + + entities.push(SemanticEntity { + id: build_entity_id(file_path, "export", &local, parent_id), + file_path: file_path.to_string(), + entity_type: "export".to_string(), + name: local, + parent_id: parent_id.map(String::from), + content_hash: content_hash(&content), + structural_hash: Some(compute_structural_hash(child, source)), + content, + start_line: child.start_position().row + 1, + end_line: child.end_position().row + 1, + metadata: Some(metadata), + }); + emitted = true; + } else { + worklist.push(child); + } + } + } + + emitted +} + +fn clean_js_ts_export_name(name: &str) -> String { + name.trim() + .strip_prefix("type ") + .unwrap_or(name.trim()) + .to_string() +} + #[derive(Clone)] struct RecoveredSwiftContainer { name: String, @@ -695,10 +853,9 @@ fn recover_swift_conditional_compilation_containers( while let Some(node) = worklist.pop() { if node.kind() == "ERROR" { if let Some(container) = parse_swift_recovered_container(node, source) { - if !recovered - .iter() - .any(|existing: &RecoveredSwiftContainer| existing.start_byte == container.start_byte) - { + if !recovered.iter().any(|existing: &RecoveredSwiftContainer| { + existing.start_byte == container.start_byte + }) { recovered.push(container); } } @@ -1219,7 +1376,10 @@ fn find_name_byte_range(node: Node, _source: &[u8]) -> Option<(usize, usize)> { } } - if node_type == "module_binding" || node_type == "class_binding" || node_type == "class_type_binding" { + if node_type == "module_binding" + || node_type == "class_binding" + || node_type == "class_type_binding" + { let name_kind = match node_type { "module_binding" => "module_name", "class_binding" => "class_name", @@ -1244,6 +1404,14 @@ fn find_name_byte_range(node: Node, _source: &[u8]) -> Option<(usize, usize)> { } } + if node_type == "pair" { + if let Some(key) = node.child_by_field_name("key") { + if js_ts_object_key_name(key, _source).is_some() { + return Some((key.start_byte(), key.end_byte())); + } + } + } + // OCaml and C type_definition if node_type == "type_definition" { // OCaml: type_definition -> type_binding -> field "name" @@ -1311,7 +1479,9 @@ fn find_declarator_name_range(mut node: Node) -> Option<(usize, usize)> { "qualified_identifier" | "scoped_identifier" => { return Some((node.start_byte(), node.end_byte())); } - "pointer_declarator" | "function_declarator" | "array_declarator" + "pointer_declarator" + | "function_declarator" + | "array_declarator" | "parenthesized_declarator" => { if let Some(inner) = node.child_by_field_name("declarator") { node = inner; @@ -1474,7 +1644,10 @@ fn extract_name(node: Node, source: &[u8]) -> Option { } // For C# property_declaration, namespace_declaration, struct_declaration - if node_type == "property_declaration" || node_type == "namespace_declaration" || node_type == "struct_declaration" { + if node_type == "property_declaration" + || node_type == "namespace_declaration" + || node_type == "struct_declaration" + { if let Some(name_node) = node.child_by_field_name("name") { return Some(node_text(name_node, source).to_string()); } @@ -1684,11 +1857,64 @@ fn should_skip_entity( }) } +fn should_skip_ts_overload_signature( + node: Node, + config: &LanguageConfig, + source: &[u8], + implementation_names_by_scope: &mut HashMap<(usize, usize), HashSet>, +) -> bool { + if !matches!(config.id, "typescript" | "tsx") || node.kind() != "function_signature" { + return false; + } + + let Some(signature_name) = extract_name(node, source) else { + return false; + }; + + let anchor = match node.parent() { + Some(parent) if parent.kind() == "export_statement" => parent, + _ => node, + }; + + let Some(scope) = anchor.parent() else { + return false; + }; + + let scope_key = (scope.start_byte(), scope.end_byte()); + let implementation_names = implementation_names_by_scope + .entry(scope_key) + .or_insert_with(|| collect_ts_function_implementation_names(scope, source)); + + implementation_names.contains(&signature_name) +} + +fn collect_ts_function_implementation_names(scope: Node, source: &[u8]) -> HashSet { + let mut cursor = scope.walk(); + scope + .named_children(&mut cursor) + .filter_map(|sibling| ts_function_declaration_name(sibling, source)) + .collect() +} + +fn ts_function_declaration_name(node: Node, source: &[u8]) -> Option { + let declaration = if node.kind() == "export_statement" { + node.child_by_field_name("declaration")? + } else { + node + }; + + (declaration.kind() == "function_declaration") + .then(|| extract_name(declaration, source)) + .flatten() +} + /// Extract the name from a C declarator (handles pointer_declarator, function_declarator, etc.) fn extract_declarator_name(mut node: Node, source: &[u8]) -> Option { loop { match node.kind() { - "identifier" | "type_identifier" | "field_identifier" => return Some(node_text(node, source).to_string()), + "identifier" | "type_identifier" | "field_identifier" => { + return Some(node_text(node, source).to_string()) + } "qualified_identifier" | "scoped_identifier" => { // For C++ qualified names like ClassName::method, return the full qualified name return Some(node_text(node, source).to_string()); @@ -1733,15 +1959,28 @@ fn map_node_type(tree_sitter_type: &str) -> &str { | "function_item" | "function_signature" | "subroutine_declaration_statement" => "function", - "method_declaration" | "method_definition" | "method" | "singleton_method" - | "method_signature" | "operator_signature" => "method", - "class_declaration" | "class_definition" | "class_specifier" => "class", + "method_declaration" + | "method_definition" + | "method" + | "singleton_method" + | "method_signature" + | "abstract_method_signature" + | "operator_signature" => "method", + "class_declaration" + | "abstract_class_declaration" + | "class_definition" + | "class_specifier" => "class", "interface_declaration" => "interface", "protocol_declaration" => "protocol", "init_declaration" => "init", "deinit_declaration" => "deinit", "subscript_declaration" => "subscript", - "type_alias_declaration" | "typealias_declaration" | "type_declaration" | "type_item" | "type_definition" | "type_alias" => "type", + "type_alias_declaration" + | "typealias_declaration" + | "type_declaration" + | "type_item" + | "type_definition" + | "type_alias" => "type", "associatedtype_declaration" => "associatedtype", "operator_declaration" => "operator", "enum_declaration" | "enum_item" | "enum_specifier" | "enum_definition" => "enum", @@ -1754,7 +1993,11 @@ fn map_node_type(tree_sitter_type: &str) -> &str { "union_specifier" => "union", "impl_item" => "impl", "trait_item" => "trait", - "mod_item" | "module" | "module_definition" | "namespace_definition" | "namespace_declaration" + "mod_item" + | "module" + | "module_definition" + | "namespace_definition" + | "namespace_declaration" | "package_object" => "module", "object_definition" => "object", "trait_definition" => "trait", @@ -1763,7 +2006,9 @@ fn map_node_type(tree_sitter_type: &str) -> &str { "extension_definition" => "extension", "package_statement" => "package", "export_statement" => "export", - "lexical_declaration" | "variable_declaration" | "var_declaration" | "declaration" => "variable", + "lexical_declaration" | "variable_declaration" | "var_declaration" | "declaration" => { + "variable" + } "const_declaration" | "const_item" => "constant", "binding" => "binding", "inherit" | "inherit_from" => "inherit", @@ -1784,7 +2029,11 @@ fn map_node_type(tree_sitter_type: &str) -> &str { } /// Extract entity info from a call node (Elixir macros like def, defmodule, etc.) -fn extract_call_entity(node: Node, config: &LanguageConfig, source: &[u8]) -> Option<(String, &'static str)> { +fn extract_call_entity( + node: Node, + config: &LanguageConfig, + source: &[u8], +) -> Option<(String, &'static str)> { let target = node.child_by_field_name("target")?; if target.kind() != "identifier" { return None; @@ -1809,7 +2058,9 @@ fn extract_call_entity(node: Node, config: &LanguageConfig, source: &[u8]) -> Op // Get arguments node (child by kind, not field name) let mut cursor = node.walk(); - let args = node.named_children(&mut cursor).find(|c| c.kind() == "arguments")?; + let args = node + .named_children(&mut cursor) + .find(|c| c.kind() == "arguments")?; let name = match keyword { "defmodule" | "defprotocol" => extract_first_alias_or_identifier(args, source)?, @@ -1846,7 +2097,8 @@ fn extract_fn_name_from_arg(mut node: Node, source: &[u8]) -> Option { Some(node_text(fn_target, source).to_string()) } else { let mut c = node.walk(); - let id = node.named_children(&mut c) + let id = node + .named_children(&mut c) .find(|n| n.kind() == "identifier")?; Some(node_text(id, source).to_string()) }; @@ -1922,7 +2174,9 @@ fn map_class_member_type(node: Node) -> &'static str { "function_signature" => "method", "getter_signature" => "getter", "setter_signature" => "setter", - "constructor_signature" | "factory_constructor_signature" => "constructor", + "constructor_signature" | "factory_constructor_signature" => { + "constructor" + } "operator_signature" => "method", _ => continue, }; @@ -1971,8 +2225,11 @@ fn map_entity_type(node: Node, config: &LanguageConfig) -> &'static str { match node.kind() { "decorated_definition" => map_decorated_type(node), "class_member" => map_class_member_type(node), - "class_declaration" if config.id == "swift" => swift_class_declaration_type(node) + "method_definition" => map_js_ts_accessor_method_type(node, config) .unwrap_or_else(|| map_node_type(node.kind())), + "class_declaration" if config.id == "swift" => { + swift_class_declaration_type(node).unwrap_or_else(|| map_node_type(node.kind())) + } // C/C++ declarations with a function_declarator are function prototypes, // not variables (#152). "declaration" if matches!(config.id, "c" | "cpp") && has_function_declarator(node) => { @@ -1984,6 +2241,31 @@ fn map_entity_type(node: Node, config: &LanguageConfig) -> &'static str { } } +fn map_js_ts_accessor_method_type(node: Node, config: &LanguageConfig) -> Option<&'static str> { + if !matches!(config.id, "typescript" | "tsx" | "javascript") { + return None; + } + + let name = node.child_by_field_name("name")?; + let mut accessor_type = None; + let mut cursor = node.walk(); + for child in node.children(&mut cursor) { + if child.start_byte() >= name.start_byte() { + break; + } + if child.is_extra() { + continue; + } + match child.kind() { + "get" => accessor_type = Some("getter"), + "set" => accessor_type = Some("setter"), + _ => {} + } + } + + accessor_type +} + fn swift_class_declaration_type(node: Node) -> Option<&'static str> { let declaration_kind = node.child_by_field_name("declaration_kind")?; swift_declaration_keyword_type(declaration_kind.kind()) @@ -2051,7 +2333,9 @@ fn promote_js_ts_const_function(node: Node, config: &LanguageConfig) -> Option<& } let mut cursor = node.walk(); - let declarator = node.named_children(&mut cursor).find(|child| child.kind() == "variable_declarator")?; + let declarator = node + .named_children(&mut cursor) + .find(|child| child.kind() == "variable_declarator")?; promote_js_ts_const_declarator_function(node, declarator, config) } @@ -2087,26 +2371,61 @@ fn push_js_ts_initializer_children<'tree>( node: Node<'tree>, entity_id: &str, ) { - if let Some(initializer_child) = js_ts_initializer_child(config, node, Some(entity_id)) { + let initializer_children = js_ts_initializer_children(config, node, Some(entity_id)); + for initializer_child in initializer_children.into_iter().rev() { worklist.push(initializer_child); } } -fn js_ts_initializer_child<'tree>( +fn js_ts_initializer_children<'tree>( config: &LanguageConfig, node: Node<'tree>, parent_id: Option<&str>, -) -> Option<(Node<'tree>, Option, Option)> { +) -> Vec<(Node<'tree>, Option, Option)> { if !matches!(config.id, "typescript" | "tsx" | "javascript") { - return None; + return Vec::new(); + } + + let Some(value) = js_ts_initializer_value(config, node) else { + return Vec::new(); + }; + + if value.kind() == "object" { + return js_ts_object_initializer_children(value, parent_id); } - let value = js_ts_initializer_value(config, node)?; - Some(( + vec![( value, parent_id.map(String::from), Some(value.kind().to_string()), - )) + )] +} + +fn js_ts_object_initializer_children<'tree>( + object: Node<'tree>, + parent_id: Option<&str>, +) -> Vec<(Node<'tree>, Option, Option)> { + let mut cursor = object.walk(); + object + .named_children(&mut cursor) + .filter_map(|child| { + let suppression_context = if child.kind() == "method_definition" { + Some(child.kind().to_string()) + } else if js_ts_pair_function_value(child).is_some() { + Some("object".to_string()) + } else { + None + }; + + suppression_context.map(|suppression_context| { + ( + child, + parent_id.map(String::from), + Some(suppression_context), + ) + }) + }) + .collect() } fn js_ts_initializer_value<'tree>( @@ -2129,7 +2448,59 @@ fn js_ts_initializer_value<'tree>( } fn is_js_ts_initializer_node(config: &LanguageConfig, node: Node) -> bool { - config.scope_boundary_types.contains(&node.kind()) || node.kind() == "class" + config.scope_boundary_types.contains(&node.kind()) || matches!(node.kind(), "class" | "object") +} + +fn extract_js_ts_object_function_pair<'tree>( + node: Node<'tree>, + config: &LanguageConfig, + source: &[u8], + suppression_context: Option<&str>, +) -> Option<(String, Node<'tree>)> { + if !matches!(config.id, "typescript" | "tsx" | "javascript") + || suppression_context != Some("object") + { + return None; + } + + let value = js_ts_pair_function_value(node)?; + let key = node.child_by_field_name("key")?; + Some((js_ts_object_key_name(key, source)?, value)) +} + +fn js_ts_pair_function_value(node: Node) -> Option { + if node.kind() != "pair" { + return None; + } + + let value = node.child_by_field_name("value")?; + matches!( + value.kind(), + "arrow_function" | "function_expression" | "generator_function" + ) + .then_some(value) +} + +fn js_ts_object_key_name(key: Node, source: &[u8]) -> Option { + let text = node_text(key, source).trim(); + if text.is_empty() || text.starts_with('[') { + return None; + } + + let name = match key.kind() { + "string" | "template_string" => text + .trim_matches('"') + .trim_matches('\'') + .trim_matches('`') + .to_string(), + _ => text.to_string(), + }; + + if name.is_empty() || (key.kind() == "template_string" && name.contains("${")) { + return None; + } + + Some(name) } /// Dart constructor signatures use `field("name", seq(identifier, optional(".", identifier)))`, @@ -2144,7 +2515,9 @@ const DART_CONSTRUCTOR_SIG_KINDS: &[&str] = &[ fn extract_dart_constructor_full_name(sig: Node, source: &[u8]) -> Option { let (start, end) = dart_constructor_name_byte_range(sig)?; - std::str::from_utf8(&source[start..end]).ok().map(|s| s.to_string()) + std::str::from_utf8(&source[start..end]) + .ok() + .map(|s| s.to_string()) } /// Byte range spanning all "name" field children of a Dart constructor signature, @@ -2346,7 +2719,8 @@ fn extract_ocaml_named_bindings( entities: &mut Vec, ) -> bool { let mut cursor = node.walk(); - let bindings: Vec<_> = node.named_children(&mut cursor) + let bindings: Vec<_> = node + .named_children(&mut cursor) .filter(|c| c.kind() == binding_kind) .collect(); if bindings.is_empty() { @@ -2354,7 +2728,8 @@ fn extract_ocaml_named_bindings( } for binding in bindings { let mut inner = binding.walk(); - let name = binding.named_children(&mut inner) + let name = binding + .named_children(&mut inner) .find(|c| c.kind() == name_kind) .map(|c| node_text(c, source).to_string()); if let Some(name) = name { @@ -2426,7 +2801,10 @@ fn extract_go_receiver_struct( for e in entities.iter().rev() { if e.file_path == file_path && e.name == struct_name - && matches!(e.entity_type.as_str(), "type" | "struct" | "class" | "interface") + && matches!( + e.entity_type.as_str(), + "type" | "struct" | "class" | "interface" + ) { return Some(e.id.clone()); } @@ -2483,7 +2861,10 @@ fn extract_js_test_call(node: Node, source: &[u8]) -> Option<(String, &'static s } _ => { // Hooks like beforeEach don't need a string name - if matches!(callee_name, "beforeEach" | "afterEach" | "beforeAll" | "afterAll") { + if matches!( + callee_name, + "beforeEach" | "afterEach" | "beforeAll" | "afterAll" + ) { callee_name.to_string() } else { return None; diff --git a/crates/sem-core/src/parser/plugins/code/languages.rs b/crates/sem-core/src/parser/plugins/code/languages.rs index d2183f44..a1c73e25 100644 --- a/crates/sem-core/src/parser/plugins/code/languages.rs +++ b/crates/sem-core/src/parser/plugins/code/languages.rs @@ -418,15 +418,21 @@ static TYPESCRIPT_CONFIG: LanguageConfig = LanguageConfig { entity_node_types: &[ "function_declaration", "generator_function_declaration", + "function_signature", "class_declaration", + "abstract_class_declaration", "interface_declaration", "type_alias_declaration", "enum_declaration", + "internal_module", + "module", "export_statement", "lexical_declaration", "variable_declaration", "method_definition", + "abstract_method_signature", "public_field_definition", + "function_signature", "method_signature", "property_signature", ], @@ -450,15 +456,21 @@ static TSX_CONFIG: LanguageConfig = LanguageConfig { entity_node_types: &[ "function_declaration", "generator_function_declaration", + "function_signature", "class_declaration", + "abstract_class_declaration", "interface_declaration", "type_alias_declaration", "enum_declaration", + "internal_module", + "module", "export_statement", "lexical_declaration", "variable_declaration", "method_definition", + "abstract_method_signature", "public_field_definition", + "function_signature", "method_signature", "property_signature", ], diff --git a/crates/sem-core/src/parser/scope_resolve.rs b/crates/sem-core/src/parser/scope_resolve.rs index 3028932b..a7a7049d 100644 --- a/crates/sem-core/src/parser/scope_resolve.rs +++ b/crates/sem-core/src/parser/scope_resolve.rs @@ -13,7 +13,7 @@ use std::collections::{HashMap, HashSet}; use std::path::Path; -use std::sync::Arc; +use std::sync::{Arc, OnceLock}; #[cfg(feature = "parallel")] use rayon::prelude::*; @@ -33,7 +33,10 @@ macro_rules! maybe_par_iter { }}; } use crate::parser::graph::{EntityInfo, RefType}; -use crate::parser::import_resolution::{find_import_target, import_source_matches_file}; +use crate::parser::import_resolution::{ + find_import_file, find_import_target, import_source_matches_file, is_js_ts_file, + sort_import_candidate_files, JS_TS_EXTENSIONS, +}; use crate::parser::plugins::code::languages::{ get_language_config, AssignmentStrategy, CallNodeStyle, ClassNameField, InitStrategy, ParamNameField, ScopeResolveConfig, @@ -221,9 +224,7 @@ fn find_entity_source_span( } for (candidate_offset, _) in line.match_indices(first_content_line) { - if let Some(span) = - source_span_at(source, &entity.content, line_start + candidate_offset) - { + if let Some(span) = source_span_at(source, &entity.content, line_start + candidate_offset) { return Some(span); } } @@ -269,12 +270,29 @@ pub struct ResolutionEntry { pub(crate) struct PreBuiltLookups { pub(crate) symbol_table: Arc>>, pub(crate) class_members: HashMap>, + pub(crate) owner_members: HashMap>, pub(crate) entity_ranges: HashMap>, /// Go package index: pkg_name → [(entity_name, entity_id)] /// Avoids O(symbol_table) scan per Go import. pub(crate) go_pkg_index: HashMap>, } +struct TsDefaultExportTable { + exports_by_file: HashMap, + sorted_files: Vec, +} + +struct TsDefaultReExport { + file_path: String, + original_name: String, + module_path: String, +} + +struct TopLevelEntityIndex { + entities_by_file: HashMap>, + sorted_files: Vec, +} + struct FileEntityLookup<'a> { by_name: HashMap<&'a str, Vec<&'a SemanticEntity>>, } @@ -346,7 +364,15 @@ pub fn resolve_with_scopes( entity_map: &HashMap, pre_parsed: Option>, ) -> ScopeResult { - resolve_with_scopes_full(root, file_paths, all_entities, entity_map, pre_parsed, None) + resolve_with_scopes_full( + root, + file_paths, + all_entities, + entity_map, + pre_parsed, + None, + None, + ) } /// Internal version with pre-built lookups for performance. @@ -357,65 +383,74 @@ pub(crate) fn resolve_with_scopes_full( entity_map: &HashMap, pre_parsed: Option>, pre_built: Option, + pre_built_import_table: Option<&HashMap<(String, String), String>>, ) -> ScopeResult { let mut all_edges: Vec<(String, String, RefType)> = Vec::new(); let mut log: Vec = Vec::new(); // Use pre-built lookups if provided, otherwise build from scratch - let (symbol_table, class_members, entity_ranges, go_pkg_index) = if let Some(pb) = pre_built { - ( - pb.symbol_table, - pb.class_members, - pb.entity_ranges, - pb.go_pkg_index, - ) - } else { - let mut symbol_table: HashMap> = HashMap::new(); - let mut class_members: HashMap> = HashMap::new(); - let mut entity_ranges: HashMap> = HashMap::new(); + let (symbol_table, class_members, owner_members, entity_ranges, go_pkg_index) = + if let Some(pb) = pre_built { + ( + pb.symbol_table, + pb.class_members, + pb.owner_members, + pb.entity_ranges, + pb.go_pkg_index, + ) + } else { + let mut symbol_table: HashMap> = HashMap::new(); + let mut class_members: HashMap> = HashMap::new(); + let mut owner_members: HashMap> = HashMap::new(); + let mut entity_ranges: HashMap> = HashMap::new(); + + for entity in all_entities { + symbol_table + .entry(entity.name.clone()) + .or_default() + .push(entity.id.clone()); - for entity in all_entities { - symbol_table - .entry(entity.name.clone()) - .or_default() - .push(entity.id.clone()); + if let Some(ref pid) = entity.parent_id { + owner_members + .entry(pid.clone()) + .or_default() + .push((entity.name.clone(), entity.id.clone())); + if let Some(parent) = entity_map.get(pid) { + if let Some(owner_name) = class_member_owner_name(parent) { + class_members + .entry(owner_name.to_string()) + .or_default() + .push((entity.name.clone(), entity.id.clone())); + } + } + } - if let Some(ref pid) = entity.parent_id { - if let Some(parent) = entity_map.get(pid) { - if let Some(owner_name) = class_member_owner_name(parent) { + if entity.entity_type == "method" && entity.file_path.ends_with(".go") { + if let Some(struct_name) = extract_go_receiver_type(&entity.content) { class_members - .entry(owner_name.to_string()) + .entry(struct_name) .or_default() .push((entity.name.clone(), entity.id.clone())); } } - } - if entity.entity_type == "method" && entity.file_path.ends_with(".go") { - if let Some(struct_name) = extract_go_receiver_type(&entity.content) { - class_members - .entry(struct_name) - .or_default() - .push((entity.name.clone(), entity.id.clone())); - } + entity_ranges + .entry(entity.file_path.clone()) + .or_default() + .push((entity.start_line, entity.end_line, entity.id.clone())); } - entity_ranges - .entry(entity.file_path.clone()) - .or_default() - .push((entity.start_line, entity.end_line, entity.id.clone())); - } + // Build Go package index for O(1) import lookup + let go_pkg_index = build_go_pkg_index(&symbol_table, entity_map); - // Build Go package index for O(1) import lookup - let go_pkg_index = build_go_pkg_index(&symbol_table, entity_map); - - ( - Arc::new(symbol_table), - class_members, - entity_ranges, - go_pkg_index, - ) - }; + ( + Arc::new(symbol_table), + class_members, + owner_members, + entity_ranges, + go_pkg_index, + ) + }; // Build file-path indexed entity lookup: file_path -> Vec<&SemanticEntity> let mut entities_by_file: HashMap<&str, Vec<&SemanticEntity>> = HashMap::new(); @@ -483,6 +518,8 @@ pub(crate) fn resolve_with_scopes_full( } } let parsed_files: &[(String, String, tree_sitter::Tree)] = &owned_parsed_files; + let ts_default_exports = build_ts_default_export_table(parsed_files, &symbol_table, entity_map); + let top_level_entities = OnceLock::new(); // Pass 1: Scan ALL files for return types and instance attr types first // This ensures cross-file return type info is available during resolution @@ -617,6 +654,18 @@ pub(crate) fn resolve_with_scopes_full( ); let mut local_import_table: HashMap<(String, String), String> = HashMap::new(); + if let Some(import_table) = pre_built_import_table { + for ((import_file_path, local_name), target_id) in import_table { + if import_file_path != file_path { + continue; + } + local_import_table.insert( + (import_file_path.clone(), local_name.clone()), + target_id.clone(), + ); + scopes[0].defs.insert(local_name.clone(), target_id.clone()); + } + } extract_imports_from_ast( tree.root_node(), file_path, @@ -627,6 +676,8 @@ pub(crate) fn resolve_with_scopes_full( &mut scopes, config, &go_pkg_index, + &ts_default_exports, + &top_level_entities, ); // Resolve pending call types using the complete return type map @@ -707,6 +758,7 @@ pub(crate) fn resolve_with_scopes_full( &scopes, &symbol_table, &class_members, + &owner_members, &local_import_table, &instance_attr_types, entity_map, @@ -868,10 +920,9 @@ fn build_descendant_ranges_by_entity( let mut ancestor_stack: Vec<&SemanticEntity> = Vec::new(); for entity in sorted_entities { - while ancestor_stack - .last() - .map_or(false, |candidate| !is_strict_enclosing_range(candidate, entity)) - { + while ancestor_stack.last().map_or(false, |candidate| { + !is_strict_enclosing_range(candidate, entity) + }) { ancestor_stack.pop(); } @@ -3092,6 +3143,332 @@ fn inject_return_type_bindings( } } +fn build_ts_default_export_table( + parsed_files: &[(String, String, tree_sitter::Tree)], + symbol_table: &HashMap>, + entity_map: &HashMap, +) -> TsDefaultExportTable { + let mut default_exports = HashMap::new(); + let mut re_exports = Vec::new(); + + for (file_path, content, tree) in parsed_files { + if !is_js_ts_file(file_path) { + continue; + } + + let extracted = extract_ts_default_exports(tree.root_node(), content.as_bytes()); + for name in extracted.names { + let Some(target_ids) = symbol_table.get(&name) else { + continue; + }; + let target = target_ids.iter().find(|id| { + entity_map.get(*id).map_or(false, |entity| { + entity.file_path == *file_path && entity.parent_id.is_none() + }) + }); + if let Some(target_id) = target { + default_exports.insert(file_path.clone(), target_id.clone()); + } + } + re_exports.extend( + extracted + .re_exports + .into_iter() + .map(|(original_name, module_path)| TsDefaultReExport { + file_path: file_path.clone(), + original_name, + module_path, + }), + ); + } + + resolve_ts_default_re_exports(&mut default_exports, re_exports, symbol_table, entity_map); + let sorted_files = sorted_default_export_files(&default_exports); + + TsDefaultExportTable { + exports_by_file: default_exports, + sorted_files, + } +} + +fn sorted_default_export_files(default_exports: &HashMap) -> Vec { + let mut sorted_files: Vec = default_exports.keys().cloned().collect(); + sort_import_candidate_files(&mut sorted_files, JS_TS_EXTENSIONS); + sorted_files +} + +fn resolve_ts_default_re_exports( + default_exports: &mut HashMap, + pending: Vec, + symbol_table: &HashMap>, + entity_map: &HashMap, +) { + let mut pending = pending; + while !pending.is_empty() { + let sorted_files = sorted_default_export_files(default_exports); + let mut unresolved = Vec::new(); + let mut progressed = false; + + for re_export in pending { + let target_id = if re_export.original_name == "default" { + find_import_file( + &sorted_files, + &re_export.module_path, + &re_export.file_path, + JS_TS_EXTENSIONS, + ) + .and_then(|target_file| default_exports.get(target_file)) + .cloned() + } else { + symbol_table + .get(&re_export.original_name) + .and_then(|target_ids| { + find_import_target( + target_ids, + &re_export.module_path, + &re_export.file_path, + JS_TS_EXTENSIONS, + entity_map, + ) + .cloned() + }) + }; + + if let Some(target_id) = target_id { + default_exports.insert(re_export.file_path, target_id); + progressed = true; + } else { + unresolved.push(re_export); + } + } + + if !progressed { + break; + } + pending = unresolved; + } +} + +fn build_top_level_entity_index( + symbol_table: &HashMap>, + entity_map: &HashMap, +) -> TopLevelEntityIndex { + let mut entities_by_file: HashMap> = HashMap::new(); + + for (name, target_ids) in symbol_table { + for target_id in target_ids { + let Some(info) = entity_map.get(target_id) else { + continue; + }; + if info.parent_id.is_some() { + continue; + } + entities_by_file + .entry(info.file_path.clone()) + .or_default() + .push((name.clone(), target_id.clone())); + } + } + + let mut sorted_files: Vec = entities_by_file.keys().cloned().collect(); + sort_import_candidate_files(&mut sorted_files, JS_TS_EXTENSIONS); + + TopLevelEntityIndex { + entities_by_file, + sorted_files, + } +} + +struct TsDefaultExports { + names: Vec, + re_exports: Vec<(String, String)>, +} + +fn extract_ts_default_exports(root: tree_sitter::Node, source: &[u8]) -> TsDefaultExports { + let mut names = Vec::new(); + let mut re_exports = Vec::new(); + let mut worklist = vec![root]; + + while let Some(node) = worklist.pop() { + if node.kind() == "export_statement" { + let has_source = node.child_by_field_name("source").is_some(); + let source_path = node + .child_by_field_name("source") + .and_then(|n| n.utf8_text(source).ok()) + .map(|text| { + text.trim_matches(|c: char| c == '\'' || c == '"') + .to_string() + }); + let text = node.utf8_text(source).unwrap_or(""); + if !has_source { + if let Some(declaration) = node.child_by_field_name("declaration") { + if text.contains("default") { + if let Some(name) = ts_default_declaration_name(declaration, source) { + names.push(name); + } + } + } else if text.contains("default") && !has_ts_export_specifier(node) { + if let Some(name) = ts_bare_default_export_identifier(node, source) { + names.push(name); + } + } + } + collect_ts_default_export_specifiers( + node, + source, + source_path.as_deref(), + &mut names, + &mut re_exports, + ); + } + + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + worklist.push(child); + } + } + + TsDefaultExports { names, re_exports } +} + +fn ts_default_declaration_name(node: tree_sitter::Node, source: &[u8]) -> Option { + match node.kind() { + "function_declaration" + | "generator_function_declaration" + | "class_declaration" + | "abstract_class_declaration" + | "lexical_declaration" + | "variable_declaration" => ts_declaration_name(node, source), + "identifier" => node.utf8_text(source).ok().map(str::to_string), + _ => None, + } +} + +fn has_ts_export_specifier(node: tree_sitter::Node) -> bool { + let mut worklist = vec![node]; + while let Some(current) = worklist.pop() { + let mut cursor = current.walk(); + for child in current.named_children(&mut cursor) { + if child.kind() == "export_specifier" { + return true; + } + worklist.push(child); + } + } + false +} + +fn collect_ts_default_export_specifiers( + node: tree_sitter::Node, + source: &[u8], + source_path: Option<&str>, + names: &mut Vec, + re_exports: &mut Vec<(String, String)>, +) { + let mut worklist = vec![node]; + while let Some(current) = worklist.pop() { + let mut cursor = current.walk(); + for child in current.named_children(&mut cursor) { + if child.kind() == "export_specifier" { + let original = child + .child_by_field_name("name") + .and_then(|n| n.utf8_text(source).ok()) + .unwrap_or(""); + let local = child + .child_by_field_name("alias") + .and_then(|n| n.utf8_text(source).ok()) + .unwrap_or(original); + if local == "default" && !original.is_empty() { + if let Some(source_path) = source_path { + re_exports.push((original.to_string(), source_path.to_string())); + } else { + names.push(original.to_string()); + } + } + } else { + worklist.push(child); + } + } + } +} + +fn ts_declaration_name(node: tree_sitter::Node, source: &[u8]) -> Option { + if let Some(name) = node.child_by_field_name("name") { + return Some(name.utf8_text(source).ok()?.to_string()); + } + + if node.kind() == "lexical_declaration" || node.kind() == "variable_declaration" { + let mut cursor = node.walk(); + for child in node.named_children(&mut cursor) { + if child.kind() == "variable_declarator" { + if let Some(name) = child.child_by_field_name("name") { + return Some(name.utf8_text(source).ok()?.to_string()); + } + } + } + } + + let mut cursor = node.walk(); + let name = node + .named_children(&mut cursor) + .find(|child| matches!(child.kind(), "identifier" | "type_identifier")) + .and_then(|child| child.utf8_text(source).ok()) + .map(str::to_string); + name +} + +fn ts_bare_default_export_identifier(node: tree_sitter::Node, source: &[u8]) -> Option { + let text = node.utf8_text(source).ok()?.trim(); + let rest = text.strip_prefix("export")?.trim_start(); + let rest = rest.strip_prefix("default")?.trim_start(); + let name_end = js_ts_identifier_end(rest)?; + let name = &rest[..name_end]; + let trailing = rest[name_end..].trim_start(); + only_js_ts_statement_trivia(trailing).then(|| name.to_string()) +} + +fn js_ts_identifier_end(text: &str) -> Option { + let mut chars = text.char_indices(); + let (_, first) = chars.next()?; + if !(first == '_' || first == '$' || first.is_ascii_alphabetic()) { + return None; + } + + let mut end = first.len_utf8(); + for (idx, ch) in chars { + if ch == '_' || ch == '$' || ch.is_ascii_alphanumeric() { + end = idx + ch.len_utf8(); + } else { + break; + } + } + Some(end) +} + +fn only_js_ts_statement_trivia(mut text: &str) -> bool { + loop { + text = text.trim_start(); + if let Some(rest) = text.strip_prefix(';') { + text = rest; + continue; + } + if text.is_empty() { + return true; + } + if text.starts_with("//") { + return true; + } + if let Some(rest) = text.strip_prefix("/*") { + let Some(end) = rest.find("*/") else { + return false; + }; + text = &rest[end + 2..]; + continue; + } + return false; + } +} + /// Extract import statements from the AST. fn extract_imports_from_ast( root: tree_sitter::Node, @@ -3103,6 +3480,8 @@ fn extract_imports_from_ast( scopes: &mut Vec, config: &ScopeResolveConfig, go_pkg_index: &HashMap>, + ts_default_exports: &TsDefaultExportTable, + top_level_entities: &OnceLock, ) { let mut worklist = vec![root]; while let Some(node) = worklist.pop() { @@ -3148,6 +3527,21 @@ fn extract_imports_from_ast( entity_map, import_table, scopes, + ts_default_exports, + top_level_entities, + ); + true + } + "export_statement" if !config.self_keywords.contains(&"cls") => { + extract_ts_re_export( + child, + file_path, + source, + symbol_table, + entity_map, + import_table, + scopes, + ts_default_exports, ); true } @@ -3194,6 +3588,8 @@ fn extract_ts_import( entity_map: &HashMap, import_table: &mut HashMap<(String, String), String>, scopes: &mut Vec, + ts_default_exports: &TsDefaultExportTable, + top_level_entities: &OnceLock, ) { // Extract the source module from the `from '...'` clause let source_path = node @@ -3232,7 +3628,7 @@ fn extract_ts_import( local, source_path, file_path, - &[".ts", ".tsx", ".js", ".jsx"], + JS_TS_EXTENSIONS, symbol_table, entity_map, import_table, @@ -3255,11 +3651,12 @@ fn extract_ts_import( .and_then(|n| n.utf8_text(source).ok()) .unwrap_or(""); if !alias.is_empty() { - register_namespace_import( + register_ts_namespace_import( alias, source_path, file_path, - &[".ts", ".tsx", ".js", ".jsx"], + JS_TS_EXTENSIONS, + top_level_entities, symbol_table, entity_map, import_table, @@ -3270,12 +3667,78 @@ fn extract_ts_import( // Default import: import Foo from './module' let name = clause_child.utf8_text(source).unwrap_or(""); if !name.is_empty() { - resolve_import_name( - name, + resolve_default_import( name, source_path, file_path, - &[".ts", ".tsx", ".js", ".jsx"], + JS_TS_EXTENSIONS, + ts_default_exports, + import_table, + scopes, + ); + } + } + } + } + } +} + +fn extract_ts_re_export( + node: tree_sitter::Node, + file_path: &str, + source: &[u8], + symbol_table: &HashMap>, + entity_map: &HashMap, + import_table: &mut HashMap<(String, String), String>, + scopes: &mut Vec, + ts_default_exports: &TsDefaultExportTable, +) { + let source_path = node + .child_by_field_name("source") + .and_then(|n| n.utf8_text(source).ok()) + .unwrap_or("") + .trim_matches(|c: char| c == '\'' || c == '"'); + + if source_path.is_empty() { + return; + } + + let mut worklist = vec![node]; + while let Some(current) = worklist.pop() { + let mut cursor = current.walk(); + for child in current.named_children(&mut cursor) { + match child.kind() { + "export_specifier" => { + let original = child + .child_by_field_name("name") + .and_then(|n| n.utf8_text(source).ok()) + .unwrap_or(""); + let local = child + .child_by_field_name("alias") + .and_then(|n| n.utf8_text(source).ok()) + .unwrap_or(original); + + if original.is_empty() || local.is_empty() { + continue; + } + + if original == "default" { + resolve_default_import( + local, + source_path, + file_path, + JS_TS_EXTENSIONS, + ts_default_exports, + import_table, + scopes, + ); + } else { + resolve_import_name( + original, + local, + source_path, + file_path, + JS_TS_EXTENSIONS, symbol_table, entity_map, import_table, @@ -3283,6 +3746,10 @@ fn extract_ts_import( ); } } + "export_clause" | "namespace_export" => { + worklist.push(child); + } + _ => {} } } } @@ -3514,9 +3981,70 @@ fn resolve_import_name( } } +fn resolve_default_import( + local_name: &str, + source_path: &str, + file_path: &str, + extensions: &[&str], + default_exports: &TsDefaultExportTable, + import_table: &mut HashMap<(String, String), String>, + scopes: &mut Vec, +) { + let target = find_import_file( + &default_exports.sorted_files, + source_path, + file_path, + extensions, + ) + .and_then(|target_file| default_exports.exports_by_file.get(target_file)) + .cloned(); + + if let Some(target_id) = target { + import_table.insert( + (file_path.to_string(), local_name.to_string()), + target_id.clone(), + ); + if !scopes.is_empty() { + scopes[0].defs.insert(local_name.to_string(), target_id); + } + } +} + /// Register all entities from a source module under a namespace alias. /// For `import * as m from './module'`, all entities from the module /// are registered so that `m.foo()` resolves via the method call path. +fn register_ts_namespace_import( + alias: &str, + source_path: &str, + file_path: &str, + extensions: &[&str], + top_level_entities: &OnceLock, + symbol_table: &HashMap>, + entity_map: &HashMap, + import_table: &mut HashMap<(String, String), String>, + _scopes: &mut Vec, +) { + let top_level_entities = + top_level_entities.get_or_init(|| build_top_level_entity_index(symbol_table, entity_map)); + let Some(candidate_file) = find_import_file( + &top_level_entities.sorted_files, + source_path, + file_path, + extensions, + ) else { + return; + }; + let Some(entries) = top_level_entities.entities_by_file.get(candidate_file) else { + return; + }; + for (name, target_id) in entries { + let qualified_name = format!("{alias}.{name}"); + import_table + .entry((file_path.to_string(), qualified_name)) + .or_insert_with(|| target_id.clone()); + } +} + fn register_namespace_import( alias: &str, source_path: &str, @@ -4317,6 +4845,7 @@ fn resolve_ref( scopes: &[Scope], symbol_table: &HashMap>, class_members: &HashMap>, + owner_members: &HashMap>, import_table: &HashMap<(String, String), String>, instance_attr_types: &HashMap<(String, String), String>, entity_map: &HashMap, @@ -4388,10 +4917,8 @@ fn resolve_ref( } else { Vec::new() }; - if has_ambiguous_swift_signature_candidates( - &visible_targets, - swift_call_signatures, - ) { + if has_ambiguous_swift_signature_candidates(&visible_targets, swift_call_signatures) + { return None; } } @@ -4651,8 +5178,20 @@ fn resolve_ref( lookup_scope_chain_cached(scope_idx, scopes, receiver, lookup_cache) { if let Some(info) = entity_map.get(&class_id) { - if matches!(info.entity_type.as_str(), "class" | "struct" | "interface") + if matches!(info.entity_type.as_str(), "module" | "variable" | "object") && info.name == receiver + { + if let Some(mid) = + lookup_entity_member(owner_members, &class_id, method).or_else( + || lookup_owned_scope_member(scopes, &class_id, method), + ) + { + return Some((mid, RefType::Calls, "scope_chain")); + } + } else if matches!( + info.entity_type.as_str(), + "class" | "struct" | "interface" + ) && info.name == receiver { if let Some(members) = class_members.get(&info.name) { match select_member_candidate( @@ -4822,6 +5361,24 @@ fn is_simple_identifier_name(name: &str) -> bool { (first == '_' || first.is_alphabetic()) && chars.all(|ch| ch == '_' || ch.is_alphanumeric()) } +fn lookup_owned_scope_member(scopes: &[Scope], owner_id: &str, member: &str) -> Option { + scopes + .iter() + .find(|scope| scope.owner_id.as_deref() == Some(owner_id)) + .and_then(|scope| scope.defs.get(member).cloned()) +} + +fn lookup_entity_member( + owner_members: &HashMap>, + owner_id: &str, + member: &str, +) -> Option { + owner_members + .get(owner_id) + .and_then(|members| members.iter().find(|(name, _)| name == member)) + .map(|(_, id)| id.clone()) +} + /// Find the class name for the enclosing class scope. fn find_enclosing_class( start_scope: usize,