From cabe327d126507c3d2be07825972eef47d56bb84 Mon Sep 17 00:00:00 2001 From: ghaiat-yoobic Date: Mon, 18 May 2026 11:43:50 +0100 Subject: [PATCH 1/2] fix(extract): resolve TypeScript wildcard path aliases (@*, @*/foo) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related bugs in _read_tsconfig_aliases / _resolve_js_import_target that left wildcard aliases unusable on real-world Angular/Nx monorepos where most cross-pillar imports go through a generic catch-all alias. ## What was broken `_read_tsconfig_aliases` was stripping trailing /* from both the alias key and the target. Patterns like: "@*": ["features/*/src/"] "@*/interfaces": ["features/*/src/interfaces.ts"] became { "@": "/features" } in the aliases dict, losing the substitution group entirely. `_resolve_js_import_target` then matched aliases by literal prefix only — so `@communicate/documentv2` matched the bare `@` prefix and joined the rest directly to a stripped target. No wildcard substitution, no real file resolved, edge collapsed to bare basename `documentv2`. ## Fix 1. `_read_tsconfig_aliases` keeps both the alias key and the target verbatim when either contains *. Non-wildcard aliases still get the trailing-/ strip they had before. 2. `_resolve_js_import_target` splits wildcard aliases on the * into (prefix, suffix), matches the import as raw.startswith(prefix) + raw.endswith(suffix) with at least one captured char, then substitutes the captured segment into the target's *. 3. Aliases are sorted by specificity before matching, per the TypeScript path-mapping spec: exact (non-wildcard) entries win over wildcard entries; among wildcards, the longer non-wildcard prefix wins. So `@common/integration/*` beats `@*` when both could match. ## Phantom-node fix Even when alias resolution produced the right target Path, the node id was derived from the absolute path (`/Users/.../features/...`) while the file extractor uses `str(path)` against the project-relative path it was given. The ids never matched, so alias-resolved imports pointed at phantom nodes. New `_resolved_path_to_nid` helper relativizes the resolved Path to Path.cwd() before computing the node id, falling back to the absolute form only for paths outside cwd. ## Measured impact Same Nx/Angular monorepo (7,759 TS files, ~50 tsconfig path entries including `@*` catch-all and `@*/interfaces`/`@*/widgets`/`@*/test-data`): - AST edges: 164,041 → 193,970 (+30K wildcard-resolved imports) - Final graph edges (after build_from_json dedup): 99,871 → 107,369 - Specific failing case from triage: `import { FileChipComponent } from '@communicate/documentv2'` Before: zero imports_from edges on the importing file, all internal contains-only. After: 1 imports_from edge to `features_communicate_documentv2_src_index_ts` + 1 symbol-level imports edge to FileChipComponent. Bare/scoped imports (npm packages, unmatched aliases) keep the existing basename fallback so external imports continue to behave the same way. --- graphify/extract.py | 76 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 64 insertions(+), 12 deletions(-) diff --git a/graphify/extract.py b/graphify/extract.py index fe2916ff6..5d6b95610 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -126,9 +126,15 @@ def _read_tsconfig_aliases(tsconfig: Path, base_dir: Path, seen: set) -> dict[st for alias, targets in paths.items(): if not targets: continue - alias_prefix = alias.rstrip("/*") - target_base = targets[0].rstrip("/*") - aliases[alias_prefix] = str(base_dir / target_base) + target = targets[0] + # Preserve the wildcard in both alias and target so the resolver can + # substitute the captured segment back in. TypeScript's path mapping + # spec uses '*' as a single capture group; keeping it lets us tell + # 'wildcard alias' from 'exact alias' at resolve time. + if "*" in alias or "*" in target: + aliases[alias] = str(base_dir / target) if target else str(base_dir) + else: + aliases[alias.rstrip("/")] = str(base_dir / target.rstrip("/")) return aliases @@ -343,25 +349,71 @@ def _import_python(node, source: bytes, file_nid: str, stem: str, edges: list, s }) +def _resolved_path_to_nid(resolved: Path) -> str: + """Compute a node id from a resolved import target. + + The file extractor uses ``_make_id(str(path))`` against whatever Path + representation it was handed (usually project-relative). Alias resolution + builds an absolute path against the tsconfig's location, so we relativize + to cwd before computing the id — otherwise the edge target id never + matches the file node id and every alias-resolved import becomes a + phantom. + """ + try: + rel = resolved.relative_to(Path.cwd()) + return _make_id(str(rel)) + except ValueError: + return _make_id(str(resolved)) + + +def _alias_specificity(alias: str) -> "tuple[int, int]": + """Sort key so the most specific alias matches first. + + TypeScript's path-mapping spec is "longest matching prefix wins", with + exact (non-wildcard) entries winning over wildcard entries when both + would match. Returning a tuple lets us pass this straight to ``sorted``: + exact aliases get 0, wildcards 1, then negative-length so longer prefixes + sort first. + """ + if "*" in alias: + return (1, -len(alias.split("*", 1)[0])) + return (0, -len(alias)) + + def _resolve_js_import_target(raw: str, str_path: str) -> "tuple[str, Path | None] | None": """Resolve a JS/TS import path string to (target_nid, resolved_path). - Handles relative paths, tsconfig path aliases, and bare/scoped imports. - Returns None if `raw` is empty. + Handles relative paths, tsconfig path aliases (exact and wildcard), and + bare/scoped imports. Returns None if `raw` is empty. """ if not raw: return None if raw.startswith("."): resolved = Path(os.path.normpath(Path(str_path).parent / raw)) resolved = _resolve_js_module_path(resolved) - return _make_id(str(resolved)), resolved + return _resolved_path_to_nid(resolved), resolved aliases = _load_tsconfig_aliases(Path(str_path).parent) - for alias_prefix, alias_base in aliases.items(): - if raw == alias_prefix or raw.startswith(alias_prefix + "/"): - rest = raw[len(alias_prefix):].lstrip("/") - resolved_alias = Path(os.path.normpath(Path(alias_base) / rest)) - resolved_alias = _resolve_js_module_path(resolved_alias) - return _make_id(str(resolved_alias)), resolved_alias + for alias_pattern, target_pattern in sorted(aliases.items(), key=lambda kv: _alias_specificity(kv[0])): + if "*" in alias_pattern: + star_idx = alias_pattern.index("*") + prefix = alias_pattern[:star_idx] + suffix = alias_pattern[star_idx + 1:] + if ( + raw.startswith(prefix) + and raw.endswith(suffix) + and len(raw) > len(prefix) + len(suffix) + ): + captured = raw[len(prefix): len(raw) - len(suffix)] if suffix else raw[len(prefix):] + resolved_str = target_pattern.replace("*", captured) + resolved_alias = Path(os.path.normpath(resolved_str)) + resolved_alias = _resolve_js_module_path(resolved_alias) + return _resolved_path_to_nid(resolved_alias), resolved_alias + else: + if raw == alias_pattern or raw.startswith(alias_pattern + "/"): + rest = raw[len(alias_pattern):].lstrip("/") + resolved_alias = Path(os.path.normpath(Path(target_pattern) / rest)) + resolved_alias = _resolve_js_module_path(resolved_alias) + return _resolved_path_to_nid(resolved_alias), resolved_alias module_name = raw.split("/")[-1] if not module_name: return None From 285ef70cd0abe66c543971c76ff845f8ea50cf3f Mon Sep 17 00:00:00 2001 From: ghaiat-yoobic Date: Mon, 18 May 2026 13:43:36 +0100 Subject: [PATCH 2/2] fix(extract): chase barrel re-exports for JS/TS symbol-level imports MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When tsconfig aliases resolve through a barrel (`features//src/ index.ts` that does `export { X } from './x'`), the file-level `imports_from` edge correctly targets the barrel — but the symbol-level `imports` edge was synthesized as `_make_id(barrel_stem, symbol)`. That id matches no node (the symbol's real id is derived from the file where it's actually defined), so build_from_json silently dropped every barrel-mediated symbol edge. Common monorepo pattern (Nx, Angular, Lerna, Turborepo). On a 7,761-file Angular monorepo: every class consumed via `@pillar/topic` barrels stayed degree-1, masking the actual cross-pillar coupling that the graph's community detection and god-node analysis are meant to surface. ## Approach 1. New `_export_js` handler emits a record per `export ... from '...'` statement (named with optional aliasing, `export *`, `export * as ns`). Wired into JS_CONFIG / TS_CONFIG via new `export_types` / `export_handler` LanguageConfig fields. `_extract_generic` now exposes the export handler dispatch in its walk; the `reexports` accumulator must be initialised before `walk(root)` since every TS file with re-exports hits it. 2. `_import_js` attaches `_reexport_target_path` (the import target file's absolute path) and `_reexport_symbol` (the imported name) to every symbol-level `imports` edge as metadata. 3. `extract()` post-processes after id_remap: builds `named_reexports[(from_path, exposed)] = (to_path, original)` and `star_reexports[from_path] = [to_paths]`. For each symbol-level edge, calls `_resolve_reexport_chain` (visited set, max depth 16) to find the symbol's defining file. If the synthesized target matches an existing node id, the edge is rewritten. Metadata fields are stripped before the dict is serialised. ## Measured impact 7,761-file Angular monorepo, holding everything else constant: - Symbol-level imports edges rewritten through barrels: **19,108** - Total edges in final graph: 107,369 → 126,485 (+19K) - Communities: 2,902 → 2,692 (cross-pillar coupling collapses fragmented per-pillar islands into shared communities) - Top in-degree symbol nodes are now real architectural anchors: TranslatePipe (1,044 callers) YobiFlexDirective (983) TranslateService (746) YobiTextDirective (692) BrokerService (354) AuthenticationService (350) All these had degree 1 (just their own `contains` edge) before the barrel chase. Tally's specific failing case from triage: // features/cross/agent/src/context/context-providers.ts import { FileChipComponent } from '@communicate/documentv2'; // features/communicate/documentv2/src/index.ts export { FileChipComponent } from './file/file-chip/file-chip.component'; Before: context-providers ↔ FileChipComponent had no edge. After: 1 `imports` edge directly to `file_chip_file_chip_component_filechipcomponent` (the class node). ## Star re-exports `export * from './x'` is treated as a passthrough: when chasing a `(barrel, sym)` key, if no named re-export matches but the barrel has `*` re-exports, the chain advances to the first `*` target that itself either has a named re-export of `sym` or further `*` re-exports. Best-effort — ambiguous multi-`*` cases land on the first target. Test plan should include the multi-`*` case explicitly. --- graphify/extract.py | 192 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 191 insertions(+), 1 deletion(-) diff --git a/graphify/extract.py b/graphify/extract.py index 5d6b95610..6cd8228ae 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -192,6 +192,12 @@ class LanguageConfig: # Import handler: called for import nodes instead of generic handling import_handler: Callable | None = None + # Re-export handler: called for `export ... from '...'` nodes to record + # barrel re-export relationships. Used post-extraction to redirect + # symbol-level imports through barrels to the symbol's defining file. + export_types: frozenset = frozenset() + export_handler: Callable | None = None + # Optional custom name resolver for functions (C, C++ declarator unwrapping) resolve_function_name_fn: Callable | None = None @@ -420,6 +426,80 @@ def _resolve_js_import_target(raw: str, str_path: str) -> "tuple[str, Path | Non return _make_id(module_name), None +def _export_js(node, source: bytes, file_nid: str, stem: str, str_path: str, reexports: list) -> None: + """Record `export ... from '...'` re-exports for post-extraction barrel resolution. + + Handles three forms: + export { X, Y as Z } from './mod' -> named re-exports + export * from './mod' -> star re-export + export * as ns from './mod' -> namespace re-export (treated like star) + + Plain local exports (without a `from` clause) are not re-exports — they + just declare what this file exposes — so they're skipped. + """ + from_string_node = None + export_clause_node = None + namespace_export = False + star_export = False + + for child in node.children: + ct = child.type + if ct == "string": + from_string_node = child + elif ct == "export_clause": + export_clause_node = child + elif ct == "namespace_export": + namespace_export = True + elif ct in ("*", "asterisk"): + star_export = True + + if from_string_node is None: + return + + raw_source = _read_text(from_string_node, source).strip("'\"` ") + if not raw_source: + return + + resolved = _resolve_js_import_target(raw_source, str_path) + if resolved is None: + return + _, resolved_path = resolved + if resolved_path is None: + return + + if star_export or namespace_export: + reexports.append({ + "from_path": str_path, + "to_path": str(resolved_path), + "kind": "star", + "exposed": None, + "original": None, + }) + return + + if export_clause_node is None: + return + + for spec in export_clause_node.children: + if spec.type != "export_specifier": + continue + name_node = spec.child_by_field_name("name") + alias_node = spec.child_by_field_name("alias") + if name_node is None: + continue + original = _read_text(name_node, source).strip() + exposed = _read_text(alias_node, source).strip() if alias_node else original + if not original or not exposed: + continue + reexports.append({ + "from_path": str_path, + "to_path": str(resolved_path), + "kind": "named", + "exposed": exposed, + "original": original, + }) + + def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_path: str) -> None: resolved_path: "Path | None" = None for child in node.children: @@ -447,6 +527,7 @@ def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_p # defining the symbol, so these edges wire importers directly to existing symbol nodes. if resolved_path is not None: target_stem = _file_stem(resolved_path) + target_path_str = str(resolved_path) line = node.start_point[0] + 1 for child in node.children: if child.type == "import_clause": @@ -466,6 +547,13 @@ def _import_js(node, source: bytes, file_nid: str, stem: str, edges: list, str_p "source_file": str_path, "source_location": f"L{line}", "weight": 1.0, + # Metadata for post-extraction re-export resolution. + # If `target_path_str` is a barrel re-exporting `sym`, + # the post-process pass follows the chain to find the + # symbol's actual defining file and rewrites the + # target id. Stripped before graph.json is written. + "_reexport_target_path": target_path_str, + "_reexport_symbol": sym, }) @@ -952,6 +1040,8 @@ def _swift_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: s call_accessor_field="property", function_boundary_types=frozenset({"function_declaration", "arrow_function", "method_definition"}), import_handler=_import_js, + export_types=frozenset({"export_statement"}), + export_handler=_export_js, ) _TS_CONFIG = LanguageConfig( @@ -971,6 +1061,8 @@ def _swift_extra_walk(node, source: bytes, file_nid: str, stem: str, str_path: s call_accessor_field="property", function_boundary_types=frozenset({"function_declaration", "arrow_function", "method_definition"}), import_handler=_import_js, + export_types=frozenset({"export_statement"}), + export_handler=_export_js, ) # .tsx files must use the TSX grammar (JSX-aware), not the plain TypeScript grammar. @@ -1309,6 +1401,15 @@ def walk(node, parent_class_nid: str | None = None) -> None: config.import_handler(node, source, file_nid, stem, edges, str_path) return + # Re-export types (`export ... from '...'`). Recorded for the + # post-extraction barrel resolution pass in extract(). + if t in config.export_types: + if config.export_handler: + config.export_handler(node, source, file_nid, stem, str_path, reexports) + # Don't `return` here — children of export_statement may include + # class/function declarations (e.g. `export class Foo {}`) that + # we still want to walk normally. + # Class types if t in config.class_types: # Resolve class name @@ -1574,6 +1675,13 @@ def _emit_java_parent(base_name: str, rel: str, at_line: int) -> None: for child in node.children: walk(child, parent_class_nid=None) + # `reexports` is filled by config.export_handler during walk, so it must + # exist in the enclosing scope before walk runs. Other accumulators + # (raw_calls, etc.) are referenced only on conditional paths and survive + # being initialised later, but the export handler fires for every file + # with any `export ... from '...'` statement. + reexports: list[dict] = [] + walk(root) # ── Call-graph pass ─────────────────────────────────────────────────────── @@ -1910,7 +2018,7 @@ def walk_calls(node, caller_nid: str) -> None: if src in valid_ids and (tgt in valid_ids or edge["relation"] in ("imports", "imports_from")): clean_edges.append(edge) - return {"nodes": nodes, "edges": clean_edges, "raw_calls": raw_calls} + return {"nodes": nodes, "edges": clean_edges, "raw_calls": raw_calls, "reexports": reexports} # ── Python rationale extraction ─────────────────────────────────────────────── @@ -5954,6 +6062,88 @@ def extract( if e.get("target") in id_remap: e["target"] = id_remap[e["target"]] + # ── Barrel re-export resolution for JS/TS symbol-level imports ──────────── + # `import { Foo } from '@pillar/topic'` resolves the file-level edge to the + # topic's barrel index.ts, but the symbol-level edge ends up targeting a + # synthetic id like `_` that nothing actually defines — + # build_from_json drops it and the consumer ↔ definer coupling is lost. + # Follow the `export { Foo } from './foo'` / `export * from './foo'` chain + # collected per-file at extract time, then rewrite each symbol-level edge + # to point at the symbol's defining file. + named_reexports: dict[tuple[str, str], tuple[str, str]] = {} + star_reexports: dict[str, list[str]] = {} + for result in per_file: + if not result: + continue + for rx in result.get("reexports", ()): + kind = rx.get("kind") + frm = rx.get("from_path") + to = rx.get("to_path") + if not frm or not to: + continue + if kind == "named": + exposed = rx.get("exposed") + original = rx.get("original") + if exposed and original: + named_reexports[(frm, exposed)] = (to, original) + elif kind == "star": + star_reexports.setdefault(frm, []).append(to) + + def _resolve_reexport_chain(initial_path: str, initial_symbol: str, max_depth: int = 16) -> "tuple[str, str]": + visited: set[tuple[str, str]] = set() + cur_path, cur_symbol = initial_path, initial_symbol + for _ in range(max_depth): + key = (cur_path, cur_symbol) + if key in visited: + break + visited.add(key) + nxt = named_reexports.get(key) + if nxt is not None: + cur_path, cur_symbol = nxt + continue + stars = star_reexports.get(cur_path) + if stars: + advanced = False + for st in stars: + if (st, cur_symbol) in named_reexports or st in star_reexports: + cur_path = st + advanced = True + break + if advanced: + continue + break + return cur_path, cur_symbol + + if named_reexports or star_reexports: + existing_ids = {n["id"] for n in all_nodes} + reexport_rewrites = 0 + for e in all_edges: + tp = e.pop("_reexport_target_path", None) + sym = e.pop("_reexport_symbol", None) + if tp is None or sym is None: + continue + final_path, final_symbol = _resolve_reexport_chain(tp, sym) + if (final_path, final_symbol) == (tp, sym): + continue + new_target = _make_id(_file_stem(Path(final_path)), final_symbol) + if new_target in existing_ids: + e["target"] = new_target + reexport_rewrites += 1 + # Strip metadata from any remaining edges (e.g. when no chain applied). + for e in all_edges: + e.pop("_reexport_target_path", None) + e.pop("_reexport_symbol", None) + if reexport_rewrites: + import logging + logging.getLogger(__name__).info( + "Resolved %d symbol-level imports through barrel re-exports", + reexport_rewrites, + ) + else: + for e in all_edges: + e.pop("_reexport_target_path", None) + e.pop("_reexport_symbol", None) + # Add cross-file class-level edges (Python only - uses Python parser internally) py_paths = [p for p in paths if p.suffix == ".py"] if py_paths: