From b297ea45a02b90ee2ea3792c6dc2418ead78fea8 Mon Sep 17 00:00:00 2001 From: Hesham Salman Date: Mon, 1 Jun 2026 16:51:43 -0400 Subject: [PATCH] fix: extract TypeScript ambient function signatures --- .../parser/plugins/code/entity_extractor.rs | 64 +++++++- .../src/parser/plugins/code/languages.rs | 2 + .../sem-core/src/parser/plugins/code/mod.rs | 144 ++++++++++++++++++ 3 files changed, 209 insertions(+), 1 deletion(-) diff --git a/crates/sem-core/src/parser/plugins/code/entity_extractor.rs b/crates/sem-core/src/parser/plugins/code/entity_extractor.rs index f38973fe..553669c8 100644 --- a/crates/sem-core/src/parser/plugins/code/entity_extractor.rs +++ b/crates/sem-core/src/parser/plugins/code/entity_extractor.rs @@ -1,6 +1,6 @@ use tree_sitter::{Node, Tree}; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use crate::model::entity::{build_entity_id, build_entity_id_disambiguated, SemanticEntity}; use crate::utils::hash::{content_hash, structural_hash, structural_hash_excluding_range}; use super::languages::LanguageConfig; @@ -112,6 +112,8 @@ fn visit_node( root_parent_id.map(str::to_owned), root_suppression.map(str::to_owned), )]; + let mut ts_implementation_names_by_scope: HashMap<(usize, usize), HashSet> = + HashMap::new(); while let Some((node, pid_owned, sup_owned)) = worklist.pop() { let parent_id = pid_owned.as_deref(); @@ -383,6 +385,15 @@ fn visit_node( } } + if should_skip_ts_overload_signature( + node, + config, + source, + &mut ts_implementation_names_by_scope, + ) { + continue; + } + if config.entity_node_types.contains(&node_type) { if let Some(name) = extract_name(node, source) { let name = qualify_hcl_name(&name, node_type, parent_id, suppression_context); @@ -1501,6 +1512,57 @@ fn should_skip_entity( }) } +fn should_skip_ts_overload_signature( + node: Node, + config: &LanguageConfig, + source: &[u8], + implementation_names_by_scope: &mut HashMap<(usize, usize), HashSet>, +) -> bool { + if !matches!(config.id, "typescript" | "tsx") || node.kind() != "function_signature" { + return false; + } + + let Some(signature_name) = extract_name(node, source) else { + return false; + }; + + let anchor = match node.parent() { + Some(parent) if parent.kind() == "export_statement" => parent, + _ => node, + }; + + let Some(scope) = anchor.parent() else { + return false; + }; + + let scope_key = (scope.start_byte(), scope.end_byte()); + let implementation_names = implementation_names_by_scope + .entry(scope_key) + .or_insert_with(|| collect_ts_function_implementation_names(scope, source)); + + implementation_names.contains(&signature_name) +} + +fn collect_ts_function_implementation_names(scope: Node, source: &[u8]) -> HashSet { + let mut cursor = scope.walk(); + scope + .named_children(&mut cursor) + .filter_map(|sibling| ts_function_declaration_name(sibling, source)) + .collect() +} + +fn ts_function_declaration_name(node: Node, source: &[u8]) -> Option { + let declaration = if node.kind() == "export_statement" { + node.child_by_field_name("declaration")? + } else { + node + }; + + (declaration.kind() == "function_declaration") + .then(|| extract_name(declaration, source)) + .flatten() +} + /// Extract the name from a C declarator (handles pointer_declarator, function_declarator, etc.) fn extract_declarator_name(mut node: Node, source: &[u8]) -> Option { loop { diff --git a/crates/sem-core/src/parser/plugins/code/languages.rs b/crates/sem-core/src/parser/plugins/code/languages.rs index e809f7b2..a150a9ee 100644 --- a/crates/sem-core/src/parser/plugins/code/languages.rs +++ b/crates/sem-core/src/parser/plugins/code/languages.rs @@ -421,6 +421,7 @@ static TYPESCRIPT_CONFIG: LanguageConfig = LanguageConfig { "variable_declaration", "method_definition", "public_field_definition", + "function_signature", "method_signature", "property_signature", ], @@ -448,6 +449,7 @@ static TSX_CONFIG: LanguageConfig = LanguageConfig { "variable_declaration", "method_definition", "public_field_definition", + "function_signature", "method_signature", "property_signature", ], diff --git a/crates/sem-core/src/parser/plugins/code/mod.rs b/crates/sem-core/src/parser/plugins/code/mod.rs index 26233d27..41013943 100644 --- a/crates/sem-core/src/parser/plugins/code/mod.rs +++ b/crates/sem-core/src/parser/plugins/code/mod.rs @@ -558,6 +558,150 @@ export async function* streamUsers(): AsyncGenerator { assert_eq!(stream.unwrap().entity_type, "function"); } + #[test] + fn test_typescript_declare_function_signature_entity_extraction() { + let code = r#" +export declare function createClient(opts: ClientOptions): Client; +export declare class Client { + connect(): Promise; +} +export interface ClientOptions { host: string; } +export declare const VERSION: string; +export type Row = Record; +"#; + let plugin = CodeParserPlugin; + let entities = plugin.extract_entities(code, "types.d.ts"); + let names: Vec<&str> = entities.iter().map(|e| e.name.as_str()).collect(); + let create_client = entities.iter().find(|e| e.name == "createClient") + .unwrap_or_else(|| panic!("missing createClient, got: {:?}", names)); + + assert_eq!(create_client.entity_type, "function"); + assert!(names.contains(&"Client"), "Should find Client class, got: {:?}", names); + assert!(names.contains(&"ClientOptions"), "Should find ClientOptions interface, got: {:?}", names); + assert!(names.contains(&"VERSION"), "Should find VERSION const, got: {:?}", names); + assert!(names.contains(&"Row"), "Should find Row type, got: {:?}", names); + } + + #[test] + fn test_typescript_overload_signatures_do_not_duplicate_implementation() { + let code = r#" +export function combine(a: string, b: string): string; +export function combine(a: number, b: number): number; +export function combine(a: any, b: any): any { return a + b; } +declare function ambientFn(x: number): number; +"#; + let plugin = CodeParserPlugin; + let entities = plugin.extract_entities(code, "overloads.ts"); + let combine_entities: Vec<_> = entities.iter() + .filter(|e| e.name == "combine") + .collect(); + let ambient_fn = entities.iter().find(|e| e.name == "ambientFn") + .unwrap_or_else(|| panic!("missing ambientFn, got: {:?}", entities.iter().map(|e| &e.name).collect::>())); + + assert_eq!( + combine_entities.len(), + 1, + "Should only emit the overload implementation, got: {:?}", + combine_entities.iter().map(|e| (&e.name, &e.content)).collect::>() + ); + assert_eq!(combine_entities[0].entity_type, "function"); + assert!( + combine_entities[0].content.contains("return a + b"), + "Should keep the implementation entity" + ); + assert_eq!(ambient_fn.entity_type, "function"); + } + + #[test] + fn test_typescript_ambient_overload_signatures_remain_visible() { + let code = r#" +export declare function lookup(id: string): User; +export declare function lookup(id: number): User; +"#; + let plugin = CodeParserPlugin; + let entities = plugin.extract_entities(code, "ambient-overloads.d.ts"); + let lookup_entities: Vec<_> = entities.iter() + .filter(|e| e.name == "lookup") + .collect(); + + assert_eq!( + lookup_entities.len(), + 2, + "Should emit ambient overload signatures without an implementation, got: {:?}", + lookup_entities.iter().map(|e| (&e.name, &e.content)).collect::>() + ); + } + + #[test] + fn test_typescript_nested_overload_signatures_do_not_duplicate_implementation() { + let code = r#" +namespace Api { + export function request(path: string): string; + export function request(path: URL): string; + export function request(path: string | URL): string { + return path.toString(); + } +} +"#; + let plugin = CodeParserPlugin; + let entities = plugin.extract_entities(code, "namespace-overloads.ts"); + let request_entities: Vec<_> = entities.iter() + .filter(|e| e.name == "request") + .collect(); + + assert_eq!( + request_entities.len(), + 1, + "Should only emit the nested overload implementation, got: {:?}", + request_entities.iter().map(|e| (&e.name, &e.content)).collect::>() + ); + assert_eq!(request_entities[0].entity_type, "function"); + assert!( + request_entities[0].content.contains("return path.toString()"), + "Should keep the nested implementation entity" + ); + } + + #[test] + fn test_typescript_mixed_export_overload_signature_matches_implementation() { + let code = r#" +export function parse(input: string): string; +function parse(input: unknown): string { + return String(input); +} +"#; + let plugin = CodeParserPlugin; + let entities = plugin.extract_entities(code, "mixed-export-overloads.ts"); + let parse_entities: Vec<_> = entities.iter() + .filter(|e| e.name == "parse") + .collect(); + + assert_eq!( + parse_entities.len(), + 1, + "Should only emit the mixed-export overload implementation, got: {:?}", + parse_entities.iter().map(|e| (&e.name, &e.content)).collect::>() + ); + assert!( + parse_entities[0].content.contains("return String(input)"), + "Should keep the implementation entity" + ); + } + + #[test] + fn test_tsx_declare_function_signature_entity_extraction() { + let code = r#" +declare function useWidget(name: string): JSX.Element; +export const Widget = () =>
; +"#; + let plugin = CodeParserPlugin; + let entities = plugin.extract_entities(code, "widget.tsx"); + let use_widget = entities.iter().find(|e| e.name == "useWidget") + .unwrap_or_else(|| panic!("missing useWidget, got: {:?}", entities.iter().map(|e| &e.name).collect::>())); + + assert_eq!(use_widget.entity_type, "function"); + } + #[test] fn test_javascript_generator_function_entity_extraction() { let code = r#"