From da0d38ad410673914e7f245be55572c72340154b Mon Sep 17 00:00:00 2001 From: adityachaudhary99 Date: Thu, 28 May 2026 11:38:54 +0530 Subject: [PATCH] fix: prevent bare-name node ID collisions across files - ensure_named_node now always uses stem-qualified IDs - Same fix for superclass/inheritance resolution in walk() - Same fix for C#, Swift, C++, Java base type fallbacks - Removes bare-name fallback that caused cross-file collisions Previously, _make_id(name) (bare, no stem) was used as fallback when _make_id(stem, name) was not in the per-file seen_ids set, causing identically-named entities in different files to produce colliding IDs. This caused the second entity's node to overwrite the first in the NetworkX graph, losing one entity entirely (#952). --- graphify/extract.py | 111 +++++++++++++++++++------------------------- 1 file changed, 48 insertions(+), 63 deletions(-) diff --git a/graphify/extract.py b/graphify/extract.py index 20e697e9e..693d9b0a2 100644 --- a/graphify/extract.py +++ b/graphify/extract.py @@ -2056,9 +2056,6 @@ def add_edge(src: str, tgt: str, relation: str, line: int, def ensure_named_node(name: str, line: int) -> str: nid = _make_id(stem, name) - if nid in seen_ids: - return nid - nid = _make_id(name) if nid not in seen_ids: add_node(nid, name, line) return nid @@ -2114,16 +2111,14 @@ def walk(node, parent_class_nid: str | None = None) -> None: base = _read_text(arg, source) base_nid = _make_id(stem, base) if base_nid not in seen_ids: - base_nid = _make_id(base) - if base_nid not in seen_ids: - nodes.append({ - "id": base_nid, - "label": base, - "file_type": "code", - "source_file": "", - "source_location": "", - }) - seen_ids.add(base_nid) + nodes.append({ + "id": base_nid, + "label": base, + "file_type": "code", + "source_file": "", + "source_location": "", + }) + seen_ids.add(base_nid) add_edge(class_nid, base_nid, "inherits", line) # Swift-specific: conformance / inheritance @@ -2187,16 +2182,14 @@ def _php_emit_base(base_name: str, rel: str, at_line: int) -> None: return base_nid = _make_id(stem, base_name) if base_nid not in seen_ids: - base_nid = _make_id(base_name) - if base_nid not in seen_ids: - nodes.append({ - "id": base_nid, - "label": base_name, - "file_type": "code", - "source_file": "", - "source_location": "", - }) - seen_ids.add(base_nid) + nodes.append({ + "id": base_nid, + "label": base_name, + "file_type": "code", + "source_file": "", + "source_location": "", + }) + seen_ids.add(base_nid) add_edge(class_nid, base_nid, rel, at_line) for child in node.children: @@ -2253,16 +2246,14 @@ def _php_emit_base(base_name: str, rel: str, at_line: int) -> None: continue base_nid = _make_id(stem, base) if base_nid not in seen_ids: - base_nid = _make_id(base) - if base_nid not in seen_ids: - nodes.append({ - "id": base_nid, - "label": base, - "file_type": "code", - "source_file": "", - "source_location": "", - }) - seen_ids.add(base_nid) + nodes.append({ + "id": base_nid, + "label": base, + "file_type": "code", + "source_file": "", + "source_location": "", + }) + seen_ids.add(base_nid) add_edge(class_nid, base_nid, relation, line) for arg_child in user_type_node.children: if arg_child.type != "type_arguments": @@ -2301,16 +2292,14 @@ def _php_emit_base(base_name: str, rel: str, at_line: int) -> None: continue base_nid = _make_id(stem, base) if base_nid not in seen_ids: - base_nid = _make_id(base) - if base_nid not in seen_ids: - nodes.append({ - "id": base_nid, - "label": base, - "file_type": "code", - "source_file": "", - "source_location": "", - }) - seen_ids.add(base_nid) + nodes.append({ + "id": base_nid, + "label": base, + "file_type": "code", + "source_file": "", + "source_location": "", + }) + seen_ids.add(base_nid) relation = _csharp_classify_base(base, csharp_interface_names) add_edge(class_nid, base_nid, relation, line) if sub.type == "generic_name": @@ -2334,16 +2323,14 @@ def _emit_java_parent(base_name: str, rel: str, at_line: int) -> None: return base_nid = _make_id(stem, base_name) if base_nid not in seen_ids: - base_nid = _make_id(base_name) - if base_nid not in seen_ids: - nodes.append({ - "id": base_nid, - "label": base_name, - "file_type": "code", - "source_file": "", - "source_location": "", - }) - seen_ids.add(base_nid) + nodes.append({ + "id": base_nid, + "label": base_name, + "file_type": "code", + "source_file": "", + "source_location": "", + }) + seen_ids.add(base_nid) add_edge(class_nid, base_nid, rel, at_line) sup = node.child_by_field_name("superclass") @@ -2403,16 +2390,14 @@ def _emit_java_parent(base_name: str, rel: str, at_line: int) -> None: continue base_nid = _make_id(stem, base) if base_nid not in seen_ids: - base_nid = _make_id(base) - if base_nid not in seen_ids: - nodes.append({ - "id": base_nid, - "label": base, - "file_type": "code", - "source_file": "", - "source_location": "", - }) - seen_ids.add(base_nid) + nodes.append({ + "id": base_nid, + "label": base, + "file_type": "code", + "source_file": "", + "source_location": "", + }) + seen_ids.add(base_nid) add_edge(class_nid, base_nid, "inherits", line) # Find body and recurse