diff --git a/code_review_graph/changes.py b/code_review_graph/changes.py index 61099560..1bd97787 100644 --- a/code_review_graph/changes.py +++ b/code_review_graph/changes.py @@ -309,10 +309,15 @@ def analyze_changes( for fp in changed_files: changed_nodes.extend(store.get_nodes_by_file(fp)) - # Filter to functions/tests for risk scoring (skip File nodes). + # Filter to functions/tests for risk scoring (skip File nodes). Verilog/ + # SystemVerilog signal-level nodes (ports/nets/params, modeled as Function + # nodes) are declarations, not callable functions: they have no TESTED_BY + # edges, so including them would flood the output with bogus "test gap" + # entries on any .sv edit. changed_funcs = [ n for n in changed_nodes if n.kind in ("Function", "Test", "Class") + and not n.extra.get("verilog_kind") ] # Cap to prevent O(N*M) query explosion on large PRs. diff --git a/code_review_graph/flows.py b/code_review_graph/flows.py index fd12ade6..aa686550 100644 --- a/code_review_graph/flows.py +++ b/code_review_graph/flows.py @@ -177,6 +177,12 @@ def detect_entry_points( if not include_tests and (node.is_test or _is_test_file(node.file_path)): continue + # Skip Verilog/SystemVerilog signal-level nodes (ports/nets/params, + # modeled as Function nodes): they have no incoming CALLS edges and + # would otherwise all register as bogus entry-point roots. + if node.extra.get("verilog_kind"): + continue + is_entry = False # True root: no one calls this function. diff --git a/code_review_graph/graph.py b/code_review_graph/graph.py index bde3e8ec..5462e3ae 100644 --- a/code_review_graph/graph.py +++ b/code_review_graph/graph.py @@ -719,6 +719,14 @@ def get_impact_radius_sql( # Batch-fetch nodes changed_nodes = self._batch_get_nodes(seeds) impacted_nodes = self._batch_get_nodes(impacted_qns) + # Drop Verilog/SystemVerilog signal-level nodes (ports/nets/params, + # modeled as Function nodes). They are leaves reached via CONTAINS/ + # REFERENCES edges from a touched module, so excluding them removes no + # real downstream impact but stops every signal of that module from + # inflating impacted_files and the impact count. + impacted_nodes = [ + n for n in impacted_nodes if not n.extra.get("verilog_kind") + ] total_impacted = len(impacted_nodes) truncated = total_impacted > max_nodes @@ -830,9 +838,19 @@ def get_stats(self) -> GraphStats: total_nodes = self._conn.execute("SELECT COUNT(*) FROM nodes").fetchone()[0] total_edges = self._conn.execute("SELECT COUNT(*) FROM edges").fetchone()[0] + # Verilog/SystemVerilog signal-level nodes (ports/nets/params/etc.) are + # stored as kind='Function' with a ``verilog_kind`` discriminator in + # ``extra``. Re-bucket them under 'Signal' so they do not inflate the + # user-facing Function count. (LIKE on the JSON text avoids a json1 + # dependency; the key is a fixed literal, not user input.) nodes_by_kind: dict[str, int] = {} - for row in self._conn.execute("SELECT kind, COUNT(*) as cnt FROM nodes GROUP BY kind"): - nodes_by_kind[row["kind"]] = row["cnt"] + for row in self._conn.execute( + "SELECT CASE WHEN extra LIKE '%\"verilog_kind\"%' THEN 'Signal'" + " ELSE kind END AS k, COUNT(*) as cnt FROM nodes" + " GROUP BY CASE WHEN extra LIKE '%\"verilog_kind\"%' THEN 'Signal'" + " ELSE kind END" + ): + nodes_by_kind[row["k"]] = row["cnt"] edges_by_kind: dict[str, int] = {} for row in self._conn.execute("SELECT kind, COUNT(*) as cnt FROM edges GROUP BY kind"): @@ -884,6 +902,10 @@ def get_nodes_by_size( "line_start IS NOT NULL", "line_end IS NOT NULL", "(line_end - line_start + 1) >= ?", + # Exclude Verilog/SystemVerilog signal-level nodes (ports/nets/params, + # modeled as Function nodes): a multi-line covergroup/typedef is not + # an oversized function to decompose. + "extra NOT LIKE '%\"verilog_kind\"%'", ] params: list = [min_lines] diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py index f94519db..9841091b 100644 --- a/code_review_graph/parser.py +++ b/code_review_graph/parser.py @@ -229,7 +229,10 @@ class EdgeInfo: "julia": [ "struct_definition", "abstract_definition", "module_definition", ], - "verilog": ["module_declaration", "interface_declaration", "class_declaration"], + "verilog": [ + "module_declaration", "interface_declaration", "class_declaration", + "package_declaration", + ], # GDScript: inner classes use ``class Name:`` (class_definition); the # file-level ``class_name Name`` gives the script itself an identity. "gdscript": ["class_definition", "class_name_statement"], @@ -399,7 +402,8 @@ class EdgeInfo: "macrocall_expression", ], "verilog": [ - "module_instantiation", "function_subroutine_call", "subroutine_call", "system_tf_call" + "module_instantiation", "interface_instantiation", + "function_subroutine_call", "subroutine_call", "system_tf_call" ], # GDScript: bare calls produce ``call``; ``obj.method()`` is an # ``attribute`` node whose right-hand side is an ``attribute_call``. @@ -2352,6 +2356,13 @@ def _extract_from_tree( ): continue + # --- Verilog/SystemVerilog signal-level constructs --- + if language == "verilog" and self._extract_verilog_constructs( + child, node_type, source, file_path, nodes, edges, + enclosing_class, enclosing_func, + ): + continue + # Recurse for other node types self._extract_from_tree( child, source, language, file_path, nodes, edges, @@ -5178,6 +5189,323 @@ def _extract_solidity_constructs( return False + def _extract_verilog_constructs( + self, + child, + node_type: str, + source: bytes, + file_path: str, + nodes: list[NodeInfo], + edges: list[EdgeInfo], + enclosing_class: Optional[str], + enclosing_func: Optional[str], + ) -> bool: + """Handle Verilog/SystemVerilog signal-level constructs. + + Emits ``Function`` nodes for ports, nets/wires, and parameters, + each tagged with ``extra["verilog_kind"]`` and linked to its + enclosing module/interface via a ``CONTAINS`` edge (or the File + node at top level). Returns True if the child was fully handled + (skip default recursion); False to fall through to recursion. + """ + def _decode(node) -> str: + return node.text.decode("utf-8", errors="replace") + + def _find_simple_identifier(node) -> Optional[str]: + # Depth-first search for the first simple_identifier leaf. + if node.type == "simple_identifier": + return _decode(node) + for sub in node.children: + found = _find_simple_identifier(sub) + if found: + return found + return None + + def _emit( + name: Optional[str], src_node, kind: str, + modifiers: Optional[str], return_type: Optional[str], + default: Optional[str] = None, + ) -> None: + if not name: + return + qualified = self._qualify(name, file_path, enclosing_class) + extra: dict = {"verilog_kind": kind} + if default is not None: + extra["default"] = default + nodes.append(NodeInfo( + kind="Function", + name=name, + file_path=file_path, + line_start=src_node.start_point[0] + 1, + line_end=src_node.end_point[0] + 1, + language="verilog", + parent_name=enclosing_class, + return_type=return_type, + modifiers=modifiers, + extra=extra, + )) + container = ( + self._qualify(enclosing_class, file_path, None) + if enclosing_class else file_path + ) + edges.append(EdgeInfo( + kind="CONTAINS", + source=container, + target=qualified, + file_path=file_path, + line=src_node.start_point[0] + 1, + )) + + # --- ANSI ports: iterate the whole port-declaration list so that + # headerless trailing ports (``input logic a, b``) inherit the + # direction/datatype of the preceding declaration. --- + if node_type == "list_of_port_declarations": + last_dir: Optional[str] = None + last_type: Optional[str] = None + for port in child.children: + if port.type != "ansi_port_declaration": + continue + pdir = last_dir + ptype = last_type + pname = None + for sub in port.children: + if sub.type in ( + "variable_port_header", "net_port_header", + "net_port_header1", "interface_port_header", + ): + for hh in sub.children: + if hh.type == "port_direction": + pdir = _decode(hh).strip() or pdir + elif hh.type in ( + "data_type", "net_port_type1", + "variable_port_type", "data_type_or_implicit1", + ): + # Net-typed ports (``output wire [3:0] b``) keep + # their type under net_port_type1, not a direct + # data_type child — read both so the datatype is + # correct and not stale-inherited from the prior + # port. + ptype = _decode(hh) + elif sub.type == "port_identifier": + pname = _find_simple_identifier(sub) + if pname: + last_dir = pdir + last_type = ptype + _emit(pname, port, "port", pdir, ptype) + return True + + # --- Non-ANSI ports: input/output/inout declarations --- + if node_type in ( + "input_declaration", "output_declaration", "inout_declaration", + ): + direction = node_type.split("_", 1)[0] # input / output / inout + dtype = None + for sub in child.children: + if sub.type == "data_type": + dtype = _decode(sub) + # The identifier list uses different node types by form: + # ``input a;`` -> list_of_port_identifiers (port_identifier) + # ``input a, b;`` -> list_of_variable_identifiers (bare + # simple_identifier leaves) + # ``output c, d;`` -> list_of_variable_port_identifiers + # (port_identifier). Walk all three so + # multi-name declarations are not silently dropped. + for sub in child.children: + if sub.type in ( + "list_of_port_identifiers", + "list_of_variable_identifiers", + "list_of_variable_port_identifiers", + ): + for pid in sub.children: + if pid.type == "port_identifier": + _emit( + _find_simple_identifier(pid), pid, + "port", direction, dtype, + ) + elif pid.type == "simple_identifier": + _emit(_decode(pid), pid, "port", direction, dtype) + return True + + # --- Parameters / localparams (header #(...) and module body) --- + if node_type in ( + "parameter_declaration", "local_parameter_declaration", + ): + kind = ( + "localparam" + if node_type == "local_parameter_declaration" + else "parameter" + ) + dtype = None + for sub in child.children: + if sub.type in ("data_type_or_implicit1", "data_type"): + dtype = _decode(sub) + elif sub.type == "list_of_param_assignments": + for pa in sub.children: + if pa.type != "param_assignment": + continue + pname = None + default = None + for piece in pa.children: + if piece.type in ( + "parameter_identifier", "simple_identifier", + ): + pname = _find_simple_identifier(piece) + elif piece.type == "constant_param_expression": + default = _decode(piece) + _emit(pname, pa, kind, None, dtype, default) + return True + + # --- Nets / wires --- + if node_type == "net_declaration": + keyword = None + for sub in child.children: + if sub.type == "net_type": + keyword = _decode(sub).strip() + for sub in child.children: + if sub.type == "list_of_net_decl_assignments": + for na in sub.children: + if na.type == "net_decl_assignment": + _emit( + _find_simple_identifier(na), na, + "net", keyword, keyword, + ) + return True + + # --- Variables (logic / reg / var / bit / integer) --- + if node_type == "data_declaration": + # Skip package imports (handled as IMPORTS_FROM) and typedefs + # (Tier 2) — let those fall through to existing handling. + for sub in child.children: + if sub.type in ( + "package_import_declaration", "type_declaration", + ): + return False + dtype = None + for sub in child.children: + if sub.type == "data_type_or_implicit1": + dtype = _decode(sub) + keyword = dtype.split()[0] if dtype else None + for sub in child.children: + if sub.type == "list_of_variable_decl_assignments": + for va in sub.children: + if va.type == "variable_decl_assignment": + _emit( + _find_simple_identifier(va), va, + "net", keyword, dtype, + ) + return True + + # --- Typedefs / enums / structs --- + # Grammar: data_declaration > type_declaration > + # [typedef, data_type, simple_identifier, ;]. The typedef name is the + # DIRECT-child simple_identifier; a DFS would wrongly return the first + # enum member (e.g. IDLE) nested inside the data_type, so iterate direct + # children only. + if node_type == "type_declaration": + tname = None + ttype = None + for sub in child.children: + if sub.type == "simple_identifier": + tname = _decode(sub) + elif sub.type == "data_type": + ttype = _decode(sub) + _emit(tname, child, "typedef", None, ttype) + return True + + # --- Modports (inside an interface) --- + # Grammar: modport_declaration > modport_item > + # modport_identifier > modport_identifier > simple_identifier. + # One modport_declaration may hold several modport_item children. + if node_type == "modport_declaration": + for mi in child.children: + if mi.type != "modport_item": + continue + mname = None + for sub in mi.children: + if sub.type == "modport_identifier": + mname = _find_simple_identifier(sub) + break + _emit(mname, mi, "modport", None, None) + return True + + # --- Named port connections: REFERENCES edges (signal data flow) --- + # Grammar: named_port_connection > [., port_identifier, (, expression, )]. + # The signal(s) wired to the instance port are the simple_identifier + # leaves inside the expression. Empty maps (``.sum()``) have no + # expression child and emit nothing. The connection lives at module + # scope, so the edge source is the enclosing module (matching the + # CALLS-edge attribution in _extract_calls); targets resolve to the + # Tier-1 signal nodes of that same module. + if node_type == "named_port_connection": + expr = next( + (c for c in child.children if c.type == "expression"), None, + ) + if expr is None: + return True + seen: set[str] = set() + + def _collect_idents(node) -> None: + if node.type == "simple_identifier": + seen.add(_decode(node)) + return + for sub in node.children: + # Skip bit/part-select index expressions (e.g. the WIDTH in + # ``wr_ptr[WIDTH-1:0]``): those are sizing constants, not the + # net wired to the port. + if sub.type in ( + "select1", "select", "bit_select", + "constant_range", "constant_expression", + ): + continue + _collect_idents(sub) + + _collect_idents(expr) + source_name = ( + self._qualify(enclosing_class, file_path, None) + if enclosing_class else file_path + ) + for sig in seen: + edges.append(EdgeInfo( + kind="REFERENCES", + source=source_name, + target=self._qualify(sig, file_path, enclosing_class), + file_path=file_path, + line=child.start_point[0] + 1, + )) + return True + + # --- Verification: covergroups / properties (named identifier child) --- + if node_type in ("covergroup_declaration", "property_declaration"): + id_type = ( + "covergroup_identifier" + if node_type == "covergroup_declaration" + else "property_identifier" + ) + kind = node_type.split("_", 1)[0] # covergroup / property + vname = None + for sub in child.children: + if sub.type == id_type: + vname = _find_simple_identifier(sub) + break + _emit(vname, child, kind, None, None) + return True + + # --- Verification: sequences --- + # Grammar: sequence_declaration > [sequence, simple_identifier, ;, + # sequence_expr, ;, endsequence]. The name is the DIRECT-child + # simple_identifier; a DFS would wrongly grab a body signal + # (sequence_expr > ... > simple_identifier), mirroring the typedef trap. + if node_type == "sequence_declaration": + sname = None + for sub in child.children: + if sub.type == "simple_identifier": + sname = _decode(sub) + break + _emit(sname, child, "sequence", None, None) + return True + + return False + def _collect_file_scope( self, root, language: str, source: bytes, ) -> tuple[dict[str, str], set[str]]: @@ -5826,6 +6154,13 @@ def _leaf_name(qi): if ss.type == "simple_identifier": return ss.text.decode("utf-8", errors="replace") return sub.text.decode("utf-8", errors="replace") + # package_declaration: name is in package_identifier > simple_identifier + if node.type == "package_declaration": + for child in node.children: + if child.type == "package_identifier": + for ss in child.children: + if ss.type == "simple_identifier": + return ss.text.decode("utf-8", errors="replace") # task_declaration: name is in task_body_declaration > task_identifier if node.type == "task_declaration": for child in node.children: @@ -6432,11 +6767,24 @@ def _normalize_php_name(text: str) -> str: return txt or None return None - # Verilog/SystemVerilog: module_instantiation's first child is the module name + # Verilog/SystemVerilog: module_instantiation's first child is the + # module name. Note the tree-sitter-verilog grammar's GLR resolution + # parses a *parameterized* instantiation (``Foo #(.W(W)) inst (...)``) + # inside a generate block as interface_instantiation instead, wrapping + # the instantiated name in an interface_identifier — both forms are + # handled so generate-block instances still emit CALLS edges. if language == "verilog" and node.type == "module_instantiation": if first.type == "simple_identifier": return first.text.decode("utf-8", errors="replace") return None + if language == "verilog" and node.type == "interface_instantiation": + for sub in node.children: + if sub.type == "interface_identifier": + for leaf in sub.children: + if leaf.type == "simple_identifier": + return leaf.text.decode("utf-8", errors="replace") + return sub.text.decode("utf-8", errors="replace") + return None # Solidity wraps call targets in an 'expression' node – unwrap it if language == "solidity" and first.type == "expression" and first.children: diff --git a/code_review_graph/refactor.py b/code_review_graph/refactor.py index 938762f3..494bf793 100644 --- a/code_review_graph/refactor.py +++ b/code_review_graph/refactor.py @@ -376,6 +376,12 @@ def _is_plausible_caller( if _is_entry_point(node): continue + # Verilog/SystemVerilog signal-level nodes (ports/nets/params) are + # declarations modeled as Function nodes, not callable functions -- + # they have no CALLS edges and must never be flagged as "dead code". + if node.extra.get("verilog_kind"): + continue + # Check for callers (CALLS), test refs (TESTED_BY), importers (IMPORTS_FROM), # and value references (REFERENCES -- function-as-value in maps, arrays, etc.). @@ -624,7 +630,12 @@ def suggest_refactorings(store: GraphStore) -> list[dict[str, Any]]: } # Check functions called only by members of a different community. - all_funcs = store.get_nodes_by_kind(["Function"]) + # Skip Verilog/SystemVerilog signal-level nodes (ports/nets/params, + # modeled as Function nodes) -- "move wire X to community B" is nonsense. + all_funcs = [ + n for n in store.get_nodes_by_kind(["Function"]) + if not n.extra.get("verilog_kind") + ] for fnode in all_funcs: f_community = node_community.get(fnode.qualified_name) diff --git a/tests/fixtures/sample.sv b/tests/fixtures/sample.sv index 3cd22b5f..71719952 100644 --- a/tests/fixtures/sample.sv +++ b/tests/fixtures/sample.sv @@ -1,6 +1,11 @@ // sample.sv - SystemVerilog fixture for parser tests `timescale 1ns / 1ps +// Package definitions kept compact on single lines. These make the imports +// below resolve to real package nodes, and exercise typedefs. +package utils_pkg; typedef logic [7:0] data_byte_t; endpackage +package arith_pkg; typedef enum logic [1:0] { IDLE, RUNNING, DONE } counter_state_t; typedef logic [15:0] counter_t; endpackage + // File-level package import import utils_pkg::*; @@ -36,6 +41,8 @@ module FIFOController #(parameter int DEPTH = 16, parameter int WIDTH = 8) ( logic [WIDTH-1:0] mem [0:DEPTH-1]; logic [$clog2(DEPTH):0] wr_ptr, rd_ptr, count; + wire overflow_flag; + localparam int ALMOST_FULL = DEPTH - 1; // Module instantiation - creates CALLS edge from FIFOController to Adder Adder #(.WIDTH(WIDTH)) ptr_adder (.a(wr_ptr[WIDTH-1:0]), .b(rd_ptr[WIDTH-1:0]), .sum()); @@ -74,4 +81,25 @@ module FIFOController #(parameter int DEPTH = 16, parameter int WIDTH = 8) ( empty = (count == 0); end + // Tier 3 verification constructs (kept compact on single lines). Each + // becomes a Function node tagged with verilog_kind. + covergroup cg_count @(posedge clk); cp: coverpoint count; endgroup + property p_full; @(posedge clk) full |-> !empty; endproperty + sequence s_wr; wr_en ##1 !full; endsequence + +endmodule + +// Top-level wrapper: multi-module hierarchy with wire feedthrough. +// stage_data/stage_valid carry a signal from the Adder output into the +// FIFOController input. Instantiation port maps kept compact on single lines. +module Top #(parameter int WIDTH = 8) (input logic clk, input logic rst_n, input logic [WIDTH-1:0] din, output logic [WIDTH-1:0] dout); + wire [WIDTH-1:0] stage_data; + wire stage_valid; + localparam int STAGES = 2; + Adder #(.WIDTH(WIDTH)) u_add (.a(din), .b(din), .sum(stage_data)); + FIFOController #(.WIDTH(WIDTH)) u_fifo (.clk(clk), .rst_n(rst_n), .data_in(stage_data), .wr_en(stage_valid), .rd_en(stage_valid), .data_out(dout), .full(), .empty()); + // Tier 3: instantiation inside a generate block. Recursion must descend + // into the generate body to emit the CALLS edge to Adder and the + // REFERENCES edges for the named connections. + generate genvar gi; for (gi = 0; gi < STAGES; gi++) begin: gblk Adder #(.WIDTH(WIDTH)) g_add (.a(din), .b(din), .sum(stage_data)); end endgenerate endmodule diff --git a/tests/test_multilang.py b/tests/test_multilang.py index afda355e..b02511f2 100644 --- a/tests/test_multilang.py +++ b/tests/test_multilang.py @@ -2494,6 +2494,332 @@ def test_file_node_language(self): assert len(file_nodes) == 1 assert file_nodes[0].language == "verilog" + # --- Tier 1: ports, nets/wires, parameters ------------------------------- + + def _sig(self, name, parent=None): + """Return the signal-level Function node with the given name (and + optional parent module), or None.""" + for n in self.nodes: + if ( + n.name == name + and n.extra.get("verilog_kind") + and (parent is None or n.parent_name == parent) + ): + return n + return None + + def test_ansi_ports_have_direction_and_parent(self): + clk = self._sig("clk", "FIFOController") + dout = self._sig("data_out", "FIFOController") + assert clk is not None and clk.extra["verilog_kind"] == "port" + assert clk.modifiers == "input" + assert dout is not None and dout.extra["verilog_kind"] == "port" + assert dout.modifiers == "output" + + def test_header_parameters_with_defaults(self): + depth = self._sig("DEPTH", "FIFOController") + width = self._sig("WIDTH", "FIFOController") + assert depth is not None and depth.extra["verilog_kind"] == "parameter" + assert depth.extra.get("default") == "16" + assert width is not None and width.extra.get("default") == "8" + + def test_multi_identifier_nets(self): + # `logic [..] wr_ptr, rd_ptr, count;` -> three separate net nodes. + for nm in ("wr_ptr", "rd_ptr", "count"): + node = self._sig(nm, "FIFOController") + assert node is not None, f"missing net {nm}" + assert node.extra["verilog_kind"] == "net" + + def test_wire_net_and_localparam(self): + wire = self._sig("overflow_flag", "FIFOController") + assert wire is not None and wire.extra["verilog_kind"] == "net" + assert wire.modifiers == "wire" + lp = self._sig("ALMOST_FULL", "FIFOController") + assert lp is not None and lp.extra["verilog_kind"] == "localparam" + + def test_multi_module_signal_attribution(self): + # Signals must attribute to the correct module when several coexist. + for nm in ("stage_data", "stage_valid"): + net = self._sig(nm, "Top") + assert net is not None, f"missing feedthrough net {nm}" + assert net.extra["verilog_kind"] == "net" + assert net.modifiers == "wire" + stages = self._sig("STAGES", "Top") + assert stages is not None and stages.extra["verilog_kind"] == "localparam" + din = self._sig("din", "Top") + dout = self._sig("dout", "Top") + assert din is not None and din.extra["verilog_kind"] == "port" + assert din.modifiers == "input" + assert dout is not None and dout.modifiers == "output" + + def test_signal_nodes_have_contains_edge_from_parent(self): + contains = [e for e in self.edges if e.kind == "CONTAINS"] + targets = {e.target.split(".")[-1] for e in contains} + for nm in ("clk", "DEPTH", "overflow_flag", "stage_data", "STAGES"): + assert nm in targets, f"no CONTAINS edge for {nm}" + + def test_non_ansi_ports(self, tmp_path): + src = ( + "module M(a, b);\n" + " input a;\n" + " output b;\n" + "endmodule\n" + ) + f = tmp_path / "nonansi.sv" + f.write_text(src) + nodes, _ = self.parser.parse_file(f) + a = next((n for n in nodes if n.name == "a" + and n.extra.get("verilog_kind") == "port"), None) + b = next((n for n in nodes if n.name == "b" + and n.extra.get("verilog_kind") == "port"), None) + assert a is not None and a.modifiers == "input" + assert b is not None and b.modifiers == "output" + + # --- Tier 2: packages, typedefs, modports -------------------------------- + + def _kind(self, name, verilog_kind, parent=None): + """Return the Function node with the given name and verilog_kind (and + optional parent), or None.""" + for n in self.nodes: + if ( + n.name == name + and n.extra.get("verilog_kind") == verilog_kind + and (parent is None or n.parent_name == parent) + ): + return n + return None + + def test_package_definitions_are_class_nodes(self): + classes = { + n.name: n for n in self.nodes + if n.kind == "Class" and n.language == "verilog" + } + assert "arith_pkg" in classes + assert "utils_pkg" in classes + + def test_package_import_resolves_to_definition(self): + # The arith_pkg IMPORTS_FROM edge now coexists with a real package node. + import_targets = { + e.target for e in self.edges if e.kind == "IMPORTS_FROM" + } + assert "arith_pkg" in import_targets + pkg = next( + (n for n in self.nodes + if n.kind == "Class" and n.name == "arith_pkg"), + None, + ) + assert pkg is not None, "arith_pkg import has no matching package node" + + def test_typedefs_in_package(self): + counter_t = self._kind("counter_t", "typedef", "arith_pkg") + state_t = self._kind("counter_state_t", "typedef", "arith_pkg") + byte_t = self._kind("data_byte_t", "typedef", "utils_pkg") + assert counter_t is not None + assert state_t is not None + assert byte_t is not None + + def test_enum_members_are_not_typedefs(self): + # Regression guard: a DFS over type_declaration would wrongly grab the + # first enum member (IDLE) instead of the typedef name. Ensure enum + # members never surface as typedef nodes. + for member in ("IDLE", "RUNNING", "DONE"): + assert self._kind(member, "typedef") is None, ( + f"enum member {member} must not be a typedef node" + ) + + def test_modports_in_interface(self): + master = self._kind("master", "modport", "BusIf") + slave = self._kind("slave", "modport", "BusIf") + assert master is not None + assert slave is not None + + def test_typedef_and_modport_have_contains_edge(self): + contains = [e for e in self.edges if e.kind == "CONTAINS"] + targets = {e.target.split(".")[-1] for e in contains} + for nm in ("counter_t", "data_byte_t", "master", "slave"): + assert nm in targets, f"no CONTAINS edge for {nm}" + + # --- Tier 3: connection edges + verification constructs ------------------ + + def _ref_targets(self): + """Suffix names of all REFERENCES edge targets.""" + return { + e.target.split("::")[-1].split(".")[-1] + for e in self.edges + if e.kind == "REFERENCES" + } + + def test_named_port_references(self): + # Named port maps wire local module signals to instance ports, emitting + # REFERENCES edges from the enclosing module to those signals. + refs = [e for e in self.edges if e.kind == "REFERENCES"] + assert refs, "expected REFERENCES edges from named port connections" + targets = self._ref_targets() + # Top.u_add(.sum(stage_data)) and FIFOController.ptr_adder(.a(wr_ptr...)) + assert "stage_data" in targets + assert "wr_ptr" in targets and "rd_ptr" in targets + # Source is attributed to the enclosing module, not a function. + assert any("Top" in e.source for e in refs) + assert any("FIFOController" in e.source for e in refs) + + def test_empty_port_connection_no_edge(self): + # Empty maps — ptr_adder ``.sum()`` and u_fifo ``.full()``/``.empty()`` + # — have no expression and must not emit REFERENCES edges. The port + # names themselves must never surface as targets. + targets = self._ref_targets() + assert "sum" not in targets + assert "full" not in targets and "empty" not in targets + + def test_verification_constructs(self): + cg = self._kind("cg_count", "covergroup", "FIFOController") + prop = self._kind("p_full", "property", "FIFOController") + seq = self._kind("s_wr", "sequence", "FIFOController") + assert cg is not None + assert prop is not None + assert seq is not None + + def test_sequence_name_not_body_signal(self): + # Regression guard: the sequence name is the direct-child identifier, not + # a body signal. A DFS would wrongly grab ``wr_en`` / ``full``. + for signal in ("wr_en", "full"): + assert self._kind(signal, "sequence") is None, ( + f"body signal {signal} must not be a sequence node" + ) + + def test_generate_block_instantiation_calls(self): + # The g_add instantiation lives inside a generate block (parsed as + # interface_instantiation). Recursion must still descend to emit its + # CALLS edge, so Top instantiates Adder at least twice (u_add + g_add). + top_adder = [ + e for e in self.edges + if e.kind == "CALLS" and "Top" in e.source and "Adder" in e.target + ] + assert len(top_adder) >= 2, ( + "expected a CALLS edge for the generate-block Adder instantiation" + ) + + # --- Regression tests for review findings -------------------------------- + + def test_multiname_non_ansi_ports(self, tmp_path): + # Multi-name non-ANSI ports use list_of_variable_identifiers / + # list_of_variable_port_identifiers (not list_of_port_identifiers). + # Regression: these were previously dropped entirely. + src = ( + "module M(a, b, c, d);\n" + " input a, b;\n" + " output c, d;\n" + "endmodule\n" + ) + f = tmp_path / "multiname.sv" + f.write_text(src) + nodes, _ = self.parser.parse_file(f) + ports = { + n.name: n for n in nodes + if n.extra.get("verilog_kind") == "port" + } + for nm in ("a", "b", "c", "d"): + assert nm in ports, f"non-ANSI port {nm} was dropped" + assert ports["a"].modifiers == "input" + assert ports["b"].modifiers == "input" + assert ports["c"].modifiers == "output" + assert ports["d"].modifiers == "output" + + def test_net_typed_ansi_port_datatype(self, tmp_path): + # Net-typed ANSI ports keep their type under net_port_type1. Regression: + # the type was read as the stale preceding port's type, and that wrong + # type then poisoned following headerless ports. + src = ( + "module M(input logic [7:0] a, output wire [3:0] b, input c);\n" + "endmodule\n" + ) + f = tmp_path / "nettype.sv" + f.write_text(src) + nodes, _ = self.parser.parse_file(f) + ports = { + n.name: n for n in nodes + if n.extra.get("verilog_kind") == "port" + } + assert "wire" in (ports["b"].return_type or ""), ports["b"].return_type + assert "logic" not in (ports["b"].return_type or "") + + def test_multiline_constructs_parse(self, tmp_path): + # Regression guard: multi-line packages, port lists, and instantiation + # port maps parse the same as single-line. The fixture is single-line + # only for compactness, NOT because multi-line is unsupported. + src = ( + "package p;\n" + " typedef logic [7:0]\n" + " byte_t;\n" + "endpackage\n" + "module Sub(input logic [7:0] x, output logic [7:0] y);\n" + " assign y = x;\n" + "endmodule\n" + "module Top(\n" + " input logic [7:0] din,\n" + " output logic [7:0] dout\n" + ");\n" + " wire [7:0] mid;\n" + " Sub u_sub (\n" + " .x(din),\n" + " .y(mid)\n" + " );\n" + "endmodule\n" + ) + f = tmp_path / "multiline.sv" + f.write_text(src) + nodes, edges = self.parser.parse_file(f) + assert any(n.kind == "Class" and n.name == "p" for n in nodes) + ports = {n.name for n in nodes + if n.extra.get("verilog_kind") == "port"} + assert {"din", "dout", "x", "y"} <= ports + assert any( + e.kind == "CALLS" and "Top" in e.source and "Sub" in e.target + for e in edges + ), "multi-line instantiation lost its CALLS edge" + ref_targets = {e.target.split(".")[-1] for e in edges + if e.kind == "REFERENCES"} + assert "din" in ref_targets and "mid" in ref_targets + + def test_slice_index_not_referenced(self): + # Bit/part-select index constants (the WIDTH in wr_ptr[WIDTH-1:0]) must + # not surface as data-flow REFERENCES targets. + targets = self._ref_targets() + assert "wr_ptr" in targets + assert "WIDTH" not in targets + + def test_signals_excluded_from_dead_code_and_stats(self): + # Pollution guards: signal nodes (ports/nets/params, modeled as Function + # nodes) must not be flagged as dead code or counted as Functions. + from code_review_graph.graph import GraphStore + from code_review_graph.refactor import find_dead_code + store = GraphStore(":memory:") + store.store_file_nodes_edges( + str(FIXTURES / "sample.sv"), self.nodes, self.edges, + ) + dead_names = {d["name"] for d in find_dead_code(store)} + for sig in ("clk", "overflow_flag", "DEPTH", "stage_data"): + assert sig not in dead_names, f"signal {sig} wrongly flagged dead" + stats = store.get_stats() + assert stats.nodes_by_kind.get("Signal", 0) > 0 + assert "Function" in stats.nodes_by_kind # real funcs still counted + + def test_signals_excluded_from_flows_and_impact(self): + # Signal nodes must not register as flow entry points, nor inflate the + # impact radius of their enclosing module. + from code_review_graph.flows import detect_entry_points + from code_review_graph.graph import GraphStore + store = GraphStore(":memory:") + fixture = str(FIXTURES / "sample.sv") + store.store_file_nodes_edges(fixture, self.nodes, self.edges) + entry_names = {n.name for n in detect_entry_points(store)} + for sig in ("clk", "overflow_flag", "stage_data", "DEPTH"): + assert sig not in entry_names, f"signal {sig} wrongly an entry point" + impact = store.get_impact_radius([fixture]) + for n in impact["impacted_nodes"]: + assert not n.extra.get("verilog_kind"), ( + f"signal {n.name} wrongly in impact radius" + ) + class TestSQLParsing: def setup_method(self): self.parser = CodeParser()