diff --git a/graphify/extract.py b/graphify/extract.py index fe2916ff6..6cd8228ae 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -126,9 +126,15 @@ def _read_tsconfig_aliases(tsconfig: Path, base_dir: Path, seen: set) -> dict[st for alias, targets in paths.items(): if not targets: continue - alias_prefix = alias.rstrip("/*") - target_base = targets[0].rstrip("/*") - aliases[alias_prefix] = str(base_dir / target_base) + target = targets[0] + # Preserve the wildcard in both alias and target so the resolver can + # substitute the captured segment back in. TypeScript's path mapping + # spec uses '*' as a single capture group; keeping it lets us tell + # 'wildcard alias' from 'exact alias' at resolve time. + if "*" in alias or "*" in target: + aliases[alias] = str(base_dir / target) if target else str(base_dir) + else: + aliases[alias.rstrip("/")] = str(base_dir / target.rstrip("/")) return aliases @@ -186,6 +192,12 @@ class LanguageConfig: # Import handler: called for import nodes instead of generic handling import_handler: Callable | None = None + # Re-export handler: called for `export ... from '...'` nodes to record + # barrel re-export relationships. Used post-extraction to redirect + # symbol-level imports through barrels to the symbol's defining file. + export_types: frozenset = frozenset() + export_handler: Callable | None = None + # Optional custom name resolver for functions (C, C++ declarator unwrapping) resolve_function_name_fn: Callable | None = None @@ -343,31 +355,151 @@ def _import_python(node, source: bytes, file_nid: str, stem: str, edges: list, s }) +def _resolved_path_to_nid(resolved: Path) -> str: + """Compute a node id from a resolved import target. + + The file extractor uses ``_make_id(str(path))`` against whatever Path + representation it was handed (usually project-relative). Alias resolution + builds an absolute path against the tsconfig's location, so we relativize + to cwd before computing the id — otherwise the edge target id never + matches the file node id and every alias-resolved import becomes a + phantom. + """ + try: + rel = resolved.relative_to(Path.cwd()) + return _make_id(str(rel)) + except ValueError: + return _make_id(str(resolved)) + + +def _alias_specificity(alias: str) -> "tuple[int, int]": + """Sort key so the most specific alias matches first. + + TypeScript's path-mapping spec is "longest matching prefix wins", with + exact (non-wildcard) entries winning over wildcard entries when both + would match. Returning a tuple lets us pass this straight to ``sorted``: + exact aliases get 0, wildcards 1, then negative-length so longer prefixes + sort first. + """ + if "*" in alias: + return (1, -len(alias.split("*", 1)[0])) + return (0, -len(alias)) + + def _resolve_js_import_target(raw: str, str_path: str) -> "tuple[str, Path | None] | None": """Resolve a JS/TS import path string to (target_nid, resolved_path). - Handles relative paths, tsconfig path aliases, and bare/scoped imports. - Returns None if `raw` is empty. + Handles relative paths, tsconfig path aliases (exact and wildcard), and + bare/scoped imports. Returns None if `raw` is empty. """ if not raw: return None if raw.startswith("."): resolved = Path(os.path.normpath(Path(str_path).parent / raw)) resolved = _resolve_js_module_path(resolved) - return _make_id(str(resolved)), resolved + return _resolved_path_to_nid(resolved), resolved aliases = _load_tsconfig_aliases(Path(str_path).parent) - for alias_prefix, alias_base in aliases.items(): - if raw == alias_prefix or raw.startswith(alias_prefix + "/"): - rest = raw[len(alias_prefix):].lstrip("/") - resolved_alias = Path(os.path.normpath(Path(alias_base) / rest)) - resolved_alias = _resolve_js_module_path(resolved_alias) - return _make_id(str(resolved_alias)), resolved_alias + for alias_pattern, target_pattern in sorted(aliases.items(), key=lambda kv: _alias_specificity(kv[0])): + if "*" in alias_pattern: + star_idx = alias_pattern.index("*") + prefix = alias_pattern[:star_idx] + suffix = alias_pattern[star_idx + 1:] + if ( + raw.startswith(prefix) + and raw.endswith(suffix) + and len(raw) > len(prefix) + len(suffix) + ): + captured = raw[len(prefix): len(raw) - len(suffix)] if suffix else raw[len(prefix):] + resolved_str = target_pattern.replace("*", captured) + resolved_alias = Path(os.path.normpath(resolved_str)) + resolved_alias = _resolve_js_module_path(resolved_alias) + return _resolved_path_to_nid(resolved_alias), resolved_alias + else: + if raw == alias_pattern or raw.startswith(alias_pattern + "/"): + rest = raw[len(alias_pattern):].lstrip("/") + resolved_alias = Path(os.path.normpath(Path(target_pattern) / rest)) + resolved_alias = _resolve_js_module_path(resolved_alias) + return _resolved_path_to_nid(resolved_alias), resolved_alias module_name = raw.split("/")[-1] if not module_name: return None return _make_id(module_name), None +def _export_js(node, source: bytes, file_nid: str, stem: str, str_path: str, reexports: list) -> None: + """Record `export ... from '...'` re-exports for post-extraction barrel resolution. + + Handles three forms: + export { X, Y as Z } from './mod' -> named re-exports + export * from './mod' -> star re-export + export * as ns from './mod' -> namespace re-export (treated like star) + + Plain local exports (without a `from` clause) are not re-exports — they + just declare what this file exposes — so they're skipped. + """ + from_string_node = None + export_clause_node = None + namespace_export = False + star_export = False + + for child in node.children: + ct = child.type + if ct == "string": + from_string_node = child + elif ct == "export_clause": + export_clause_node = child + elif ct == "namespace_export": + namespace_export = True + elif ct in ("*", "asterisk"): + star_export = True + + if from_string_node is None: + return + + raw_source = _read_text(from_string_node, source).strip("'\"` ") + if not raw_source: + return + + resolved = _resolve_js_import_target(raw_source, str_path) + if resolved is None: + return + _, resolved_path = resolved + if resolved_path is None: + return + + if star_export or namespace_export: + reexports.append({ + "from_path": str_path, + "to_path": str(resolved_path), + "kind": "star", + "exposed": None, + "original": None, + }) + return + + if export_clause_node is None: + return + + for spec in export_clause_node.children: + if spec.type != "export_specifier": + continue + name_node = spec.child_by_field_name("name") + alias_node = spec.child_by_field_name("alias") + if name_node is None: + continue + original = _read_text(name_node, source).strip() + exposed = _read_text(alias_node, source).strip() if alias_node else original + if not original or not exposed: + continue + reexports.append({ + "from_path": str_path, + "to_path": str(resolved_path), + "kind": "named", + "exposed": exposed, + "original": original, + }) + + def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str) -> None: resolved_path: "Path | None" = None for child in node.children: @@ -395,6 +527,7 @@ def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_p # defining the symbol, so these edges wire importers directly to existing symbol nodes. if resolved_path is not None: target_stem = _file_stem(resolved_path) + target_path_str = str(resolved_path) line = node.start_point[0] + 1 for child in node.children: if child.type == "import_clause": @@ -414,6 +547,13 @@ def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_p "source_file": str_path, "source_location": f"L{line}", "weight": 1.0, + # Metadata for post-extraction re-export resolution. + # If `target_path_str` is a barrel re-exporting `sym`, + # the post-process pass follows the chain to find the + # symbol's actual defining file and rewrites the + # target id. Stripped before graph.json is written. + "_reexport_target_path": target_path_str, + "_reexport_symbol": sym, }) @@ -900,6 +1040,8 @@ def _swift_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: s call_accessor_field="property", function_boundary_types=frozenset({"function_declaration", "arrow_function", "method_definition"}), import_handler=_import_js, + export_types=frozenset({"export_statement"}), + export_handler=_export_js, ) _TS_CONFIG = LanguageConfig( @@ -919,6 +1061,8 @@ def _swift_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: s call_accessor_field="property", function_boundary_types=frozenset({"function_declaration", "arrow_function", "method_definition"}), import_handler=_import_js, + export_types=frozenset({"export_statement"}), + export_handler=_export_js, ) # .tsx files must use the TSX grammar (JSX-aware), not the plain TypeScript grammar. @@ -1257,6 +1401,15 @@ def walk(node, parent_class_nid: str | None = None) -> None: config.import_handler(node, source, file_nid, stem, edges, str_path) return + # Re-export types (`export ... from '...'`). Recorded for the + # post-extraction barrel resolution pass in extract(). + if t in config.export_types: + if config.export_handler: + config.export_handler(node, source, file_nid, stem, str_path, reexports) + # Don't `return` here — children of export_statement may include + # class/function declarations (e.g. `export class Foo {}`) that + # we still want to walk normally. + # Class types if t in config.class_types: # Resolve class name @@ -1522,6 +1675,13 @@ def _emit_java_parent(base_name: str, rel: str, at_line: int) -> None: for child in node.children: walk(child, parent_class_nid=None) + # `reexports` is filled by config.export_handler during walk, so it must + # exist in the enclosing scope before walk runs. Other accumulators + # (raw_calls, etc.) are referenced only on conditional paths and survive + # being initialised later, but the export handler fires for every file + # with any `export ... from '...'` statement. + reexports: list[dict] = [] + walk(root) # ── Call-graph pass ─────────────────────────────────────────────────────── @@ -1858,7 +2018,7 @@ def walk_calls(node, caller_nid: str) -> None: if src in valid_ids and (tgt in valid_ids or edge["relation"] in ("imports", "imports_from")): clean_edges.append(edge) - return {"nodes": nodes, "edges": clean_edges, "raw_calls": raw_calls} + return {"nodes": nodes, "edges": clean_edges, "raw_calls": raw_calls, "reexports": reexports} # ── Python rationale extraction ─────────────────────────────────────────────── @@ -5902,6 +6062,88 @@ def extract( if e.get("target") in id_remap: e["target"] = id_remap[e["target"]] + # ── Barrel re-export resolution for JS/TS symbol-level imports ──────────── + # `import { Foo } from '@pillar/topic'` resolves the file-level edge to the + # topic's barrel index.ts, but the symbol-level edge ends up targeting a + # synthetic id like `_` that nothing actually defines — + # build_from_json drops it and the consumer ↔ definer coupling is lost. + # Follow the `export { Foo } from './foo'` / `export * from './foo'` chain + # collected per-file at extract time, then rewrite each symbol-level edge + # to point at the symbol's defining file. + named_reexports: dict[tuple[str, str], tuple[str, str]] = {} + star_reexports: dict[str, list[str]] = {} + for result in per_file: + if not result: + continue + for rx in result.get("reexports", ()): + kind = rx.get("kind") + frm = rx.get("from_path") + to = rx.get("to_path") + if not frm or not to: + continue + if kind == "named": + exposed = rx.get("exposed") + original = rx.get("original") + if exposed and original: + named_reexports[(frm, exposed)] = (to, original) + elif kind == "star": + star_reexports.setdefault(frm, []).append(to) + + def _resolve_reexport_chain(initial_path: str, initial_symbol: str, max_depth: int = 16) -> "tuple[str, str]": + visited: set[tuple[str, str]] = set() + cur_path, cur_symbol = initial_path, initial_symbol + for _ in range(max_depth): + key = (cur_path, cur_symbol) + if key in visited: + break + visited.add(key) + nxt = named_reexports.get(key) + if nxt is not None: + cur_path, cur_symbol = nxt + continue + stars = star_reexports.get(cur_path) + if stars: + advanced = False + for st in stars: + if (st, cur_symbol) in named_reexports or st in star_reexports: + cur_path = st + advanced = True + break + if advanced: + continue + break + return cur_path, cur_symbol + + if named_reexports or star_reexports: + existing_ids = {n["id"] for n in all_nodes} + reexport_rewrites = 0 + for e in all_edges: + tp = e.pop("_reexport_target_path", None) + sym = e.pop("_reexport_symbol", None) + if tp is None or sym is None: + continue + final_path, final_symbol = _resolve_reexport_chain(tp, sym) + if (final_path, final_symbol) == (tp, sym): + continue + new_target = _make_id(_file_stem(Path(final_path)), final_symbol) + if new_target in existing_ids: + e["target"] = new_target + reexport_rewrites += 1 + # Strip metadata from any remaining edges (e.g. when no chain applied). + for e in all_edges: + e.pop("_reexport_target_path", None) + e.pop("_reexport_symbol", None) + if reexport_rewrites: + import logging + logging.getLogger(__name__).info( + "Resolved %d symbol-level imports through barrel re-exports", + reexport_rewrites, + ) + else: + for e in all_edges: + e.pop("_reexport_target_path", None) + e.pop("_reexport_symbol", None) + # Add cross-file class-level edges (Python only - uses Python parser internally) py_paths = [p for p in paths if p.suffix == ".py"] if py_paths: