From 0d67ff9aba8fdb4d44fa10ab56aeae51fc3df821 Mon Sep 17 00:00:00 2001
From: jhovanny linares
Date: Fri, 29 May 2026 12:21:03 -0600
Subject: [PATCH] feat: add Oracle PL/SQL parser with broad Oracle object
support
Adds a regex-based PL/SQL parser (_parse_plsql) that extracts Oracle
database objects without requiring tree-sitter grammar support.
Supported constructs:
- PACKAGE spec and PACKAGE BODY with member PROCEDURE/FUNCTION extraction
- Standalone PROCEDURE and FUNCTION (with or without CREATE OR REPLACE)
- TRIGGER with event and target table metadata
- TYPE definitions (AS OBJECT, TABLE OF, UNDER, etc.)
- IMPORTS_FROM edges for FROM/JOIN table references
- CALLS edges for inter-package calls; Oracle system packages suppressed
New Oracle file extensions: .plsql, .pks, .pkb, .prc, .fnc, .trg, .pls
Auto-detection: .sql files with Oracle headers routed to PL/SQL parser
Wrapped (obfuscated) Oracle files are silently skipped
24 new tests; tests/fixtures/sample.plsql fixture covers all constructs.
README: language list updated + Oracle PL/SQL usage collapsible section.
CHANGELOG: entry added under [Unreleased].
---
CHANGELOG.md | 15 ++
README.md | 16 +-
code_review_graph/parser.py | 416 +++++++++++++++++++++++++++++++++++-
tests/fixtures/sample.plsql | 85 ++++++++
tests/test_multilang.py | 167 +++++++++++++++
5 files changed, 695 insertions(+), 4 deletions(-)
create mode 100644 tests/fixtures/sample.plsql
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 38aa3610..e588c3ff 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,21 @@
## [Unreleased]
+### Added
+
+- **Oracle PL/SQL support**: `.pls`, `.pks`, `.pkb`, `.prc`, `.fnc`, `.trg`, and `.plsql`
+ file extensions are parsed via a dedicated regex parser (no tree-sitter grammar exists for
+ Oracle PL/SQL). Extracts PACKAGE specs, PACKAGE BODYs with member PROCEDURE/FUNCTION nodes,
+ standalone PROCEDUREs and FUNCTIONs, TRIGGERs with event and table metadata, and TYPE
+ definitions. Emits CALLS edges for inter-package calls (Oracle built-in system packages such
+ as `DBMS_*` and `UTL_*` are suppressed to reduce noise) and IMPORTS_FROM edges for FROM/JOIN
+ table references and trigger target tables. `.sql` files whose first 500 bytes match an Oracle
+ object keyword (`PACKAGE`, `TRIGGER`, `PROCEDURE`, `FUNCTION`, `TYPE`) are auto-routed to the
+ PL/SQL parser. Wrapped (obfuscated) Oracle files are silently skipped. Adds 24 tests and
+ `tests/fixtures/sample.plsql`. Files must be present on disk — no Oracle database connection
+ is required; export your objects from SQL Developer, TOAD, or version control before running
+ `code-review-graph build`.
+
## [2.3.5] - 2026-05-25
**Real-time token savings, visible to humans.** The estimated context-savings
diff --git a/README.md b/README.md
index 6396f788..d0a0a543 100644
--- a/README.md
+++ b/README.md
@@ -113,7 +113,7 @@ Large monorepos are where token waste is most painful. The graph cuts through th
-Parser support covers functions, classes, imports, call sites, inheritance, and test detection across the current parser surface, using Tree-sitter where available and targeted fallbacks where needed. Current support includes Python, JavaScript/TypeScript/TSX, Go, Rust, Java, C/C++, C#, Ruby, Kotlin, Swift, PHP, Scala, Solidity, Dart, R, Perl, Lua/Luau, Objective-C, shell scripts, Elixir, Zig, PowerShell, Julia, ReScript, GDScript, Nix, Verilog/SystemVerilog, SQL, Vue/Svelte SFCs, Astro files parsed through the TypeScript parser, Jupyter/Databricks notebooks (`.ipynb`), and Perl XS files (`.xs`).
+Parser support covers functions, classes, imports, call sites, inheritance, and test detection across the current parser surface, using Tree-sitter where available and targeted fallbacks where needed. Current support includes Python, JavaScript/TypeScript/TSX, Go, Rust, Java, C/C++, C#, Ruby, Kotlin, Swift, PHP, Scala, Solidity, Dart, R, Perl, Lua/Luau, Objective-C, shell scripts, Elixir, Zig, PowerShell, Julia, ReScript, GDScript, Nix, Verilog/SystemVerilog, SQL, Oracle PL/SQL, Vue/Svelte SFCs, Astro files parsed through the TypeScript parser, Jupyter/Databricks notebooks (`.ipynb`), and Perl XS files (`.xs`).
---
@@ -198,7 +198,7 @@ Blast-radius analysis reaches 100% recall on every one of the 13 evaluation comm
| Feature | Details |
|---------|---------|
| **Incremental updates** | Re-parses only changed files. Subsequent updates complete in under 2 seconds. |
-| **Broad language + notebook support** | Python, JavaScript/TypeScript/TSX, Go, Rust, Java, C/C++, C#, Ruby, Kotlin, Swift, PHP, Scala, Solidity, Dart, R, Perl, Lua/Luau, Objective-C, shell scripts, Elixir, Zig, PowerShell, Julia, ReScript, GDScript, Nix, Verilog/SystemVerilog, SQL, Vue/Svelte SFCs, Astro files parsed through the TypeScript parser, Jupyter/Databricks (.ipynb), and Perl XS (.xs) |
+| **Broad language + notebook support** | Python, JavaScript/TypeScript/TSX, Go, Rust, Java, C/C++, C#, Ruby, Kotlin, Swift, PHP, Scala, Solidity, Dart, R, Perl, Lua/Luau, Objective-C, shell scripts, Elixir, Zig, PowerShell, Julia, ReScript, GDScript, Nix, Verilog/SystemVerilog, SQL, Oracle PL/SQL, Vue/Svelte SFCs, Astro files parsed through the TypeScript parser, Jupyter/Databricks (.ipynb), and Perl XS (.xs) |
| **Blast-radius analysis** | Shows which functions, classes, and files are likely affected by a change |
| **Auto-update hooks** | Hooks and watch mode can update the graph on file saves and supported commit hooks |
| **Semantic search** | Optional vector embeddings via sentence-transformers, Google Gemini, MiniMax, or any OpenAI-compatible endpoint (real OpenAI, Azure, new-api, LiteLLM, vLLM, LocalAI) |
@@ -562,6 +562,18 @@ pip install -e ".[dev]"
pytest
```
+
+Oracle PL/SQL projects
+
+
+Oracle PL/SQL objects live inside the database, not the filesystem. To use `code-review-graph` with an Oracle codebase you need the source files checked out locally first — export them from **SQL Developer** (Tools → Export DDL), **TOAD** (Schema Browser → Script), or pull them from your version-control repository.
+
+The parser recognises these Oracle-specific extensions automatically: `.pks` (package spec), `.pkb` (package body), `.prc` (procedure), `.fnc` (function), `.trg` (trigger), `.pls` / `.plsql` (generic PL/SQL). Plain `.sql` files are also detected when their first lines contain an Oracle object keyword (`PACKAGE`, `TRIGGER`, `PROCEDURE`, `FUNCTION`, `TYPE`).
+
+Folder layout does not matter — any directory structure is scanned recursively. Wrapped (obfuscated) files are silently skipped. No Oracle database connection is required at any point.
+
+
+
Adding a new language
diff --git a/code_review_graph/parser.py b/code_review_graph/parser.py
index f94519db..a42abc76 100644
--- a/code_review_graph/parser.py
+++ b/code_review_graph/parser.py
@@ -142,6 +142,14 @@ class EdgeInfo:
".v": "verilog",
".vh": "verilog",
".sql": "sql",
+ # Oracle PL/SQL: projects using Oracle-specific file extensions
+ ".plsql": "plsql", # generic PL/SQL (also used as test fixture extension)
+ ".pls": "plsql", # generic PL/SQL source
+ ".pks": "plsql", # package specification
+ ".pkb": "plsql", # package body
+ ".prc": "plsql", # stored procedure
+ ".fnc": "plsql", # function
+ ".trg": "plsql", # trigger
}
# Shebang interpreter → language mapping for extension-less Unix scripts.
@@ -235,6 +243,8 @@ class EdgeInfo:
"gdscript": ["class_definition", "class_name_statement"],
# SQL: CREATE TABLE / CREATE VIEW are handled via _parse_sql dispatch.
"sql": [],
+ # PL/SQL (Oracle): all constructs handled via _parse_plsql dispatch.
+ "plsql": [],
}
_FUNCTION_TYPES: dict[str, list[str]] = {
@@ -296,6 +306,8 @@ class EdgeInfo:
"gdscript": ["function_definition"],
# SQL: CREATE FUNCTION / CREATE PROCEDURE handled via _parse_sql dispatch.
"sql": [],
+ # PL/SQL (Oracle): all constructs handled via _parse_plsql dispatch.
+ "plsql": [],
}
_IMPORT_TYPES: dict[str, list[str]] = {
@@ -348,6 +360,8 @@ class EdgeInfo:
"gdscript": ["extends_statement"],
# SQL: table references extracted as IMPORTS_FROM via _parse_sql dispatch.
"sql": [],
+ # PL/SQL (Oracle): all constructs handled via _parse_plsql dispatch.
+ "plsql": [],
}
_CALL_TYPES: dict[str, list[str]] = {
@@ -406,6 +420,8 @@ class EdgeInfo:
"gdscript": ["call", "attribute_call"],
# SQL: no call edges extracted (grammar too unreliable for procedure calls).
"sql": [],
+ # PL/SQL (Oracle): all constructs handled via _parse_plsql dispatch.
+ "plsql": [],
}
# Patterns that indicate a test function
@@ -753,6 +769,33 @@ def _is_test_file(path: str) -> bool:
return any(p.search(path) for p in _TEST_FILE_PATTERNS)
+# ---------------------------------------------------------------------------
+# Oracle PL/SQL detection helpers (module-level, used by parse_bytes)
+# ---------------------------------------------------------------------------
+
+# Matches the first keyword of any Oracle PL/SQL object definition,
+# with or without a leading CREATE [OR REPLACE] prefix.
+_ORACLE_HEADER_RE = re.compile(
+ r"^\s*(?:CREATE\s+(?:OR\s+REPLACE\s+)?)?"
+ r"(?:PACKAGE(?:\s+BODY)?|TRIGGER|PROCEDURE|FUNCTION|TYPE(?:\s+BODY)?)\s+",
+ re.IGNORECASE,
+)
+
+# Oracle wraps obfuscated package bodies with the literal word "wrapped"
+# on the first line: e.g. "PACKAGE BODY blc_accounts wrapped".
+_ORACLE_WRAPPED_RE = re.compile(r"\bwrapped\b", re.IGNORECASE)
+
+
+def _is_oracle_plsql(source: bytes) -> bool:
+ """Return True if *source* looks like an Oracle PL/SQL object definition.
+
+ Inspects the first 500 bytes only. Wrapped (obfuscated) files are NOT
+ considered Oracle PL/SQL here — the caller must skip them separately.
+ """
+ head = source[:500].decode("utf-8", errors="replace").lstrip()
+ return bool(_ORACLE_HEADER_RE.match(head))
+
+
def _is_test_function(
name: str, file_path: str, decorators: tuple[str, ...] = (),
) -> bool:
@@ -934,11 +977,21 @@ def parse_bytes(self, path: Path, source: bytes) -> tuple[list[NodeInfo], list[E
if language == "rescript":
return self._parse_rescript(path, source)
- # SQL: dedicated parser — tree-sitter for tables/views/functions +
- # regex fallback for CREATE PROCEDURE (unsupported by the grammar).
+ # SQL / Oracle PL/SQL: route to the appropriate dedicated parser.
+ # Wrapped Oracle objects (obfuscated bytecode) are skipped entirely.
if language == "sql":
+ if _is_oracle_plsql(source):
+ if _ORACLE_WRAPPED_RE.search(source[:300].decode("utf-8", errors="replace")):
+ return [], []
+ return self._parse_plsql(path, source)
return self._parse_sql(path, source)
+ # PL/SQL via Oracle-specific file extensions (.pks, .pkb, .prc, etc.)
+ if language == "plsql":
+ if _ORACLE_WRAPPED_RE.search(source[:300].decode("utf-8", errors="replace")):
+ return [], []
+ return self._parse_plsql(path, source)
+
parser = self._get_parser(language)
if not parser:
return [], []
@@ -2110,6 +2163,365 @@ def _extract_sql_ddl(
line=line_start,
))
+ # ------------------------------------------------------------------
+ # PL/SQL parser (Oracle) — regex constants and parser method
+ # ------------------------------------------------------------------
+
+ # PACKAGE spec: "PACKAGE ["]name["]" with optional CREATE [OR REPLACE].
+ # IS/AS may appear on a later line after comments, so we only capture name.
+ _PLSQL_PACKAGE_SPEC_RE = re.compile(
+ r"^\s*(?:CREATE\s+(?:OR\s+REPLACE\s+)?)?PACKAGE\s+(?!BODY\b)"
+ r'"?([\w.]+)"?',
+ re.IGNORECASE | re.MULTILINE,
+ )
+
+ # PACKAGE BODY: IS/AS may appear on the same or next line (\s+ covers both).
+ _PLSQL_PACKAGE_BODY_RE = re.compile(
+ r"^\s*(?:CREATE\s+(?:OR\s+REPLACE\s+)?)?PACKAGE\s+BODY\s+"
+ r'"?([\w.]+)"?\s+(?:IS|AS)\b',
+ re.IGNORECASE | re.MULTILINE,
+ )
+
+ # Standalone PROCEDURE (with or without CREATE OR REPLACE / schema prefix).
+ _PLSQL_PROC_RE = re.compile(
+ r"^\s*(?:CREATE\s+(?:OR\s+REPLACE\s+)?)?PROCEDURE\s+"
+ r'"?([\w.]+)"?',
+ re.IGNORECASE | re.MULTILINE,
+ )
+
+ # Standalone FUNCTION.
+ _PLSQL_FUNC_RE = re.compile(
+ r"^\s*(?:CREATE\s+(?:OR\s+REPLACE\s+)?)?FUNCTION\s+"
+ r'"?([\w.]+)"?',
+ re.IGNORECASE | re.MULTILINE,
+ )
+
+ # TRIGGER with event and table captured.
+ _PLSQL_TRIGGER_RE = re.compile(
+ r"^\s*(?:CREATE\s+(?:OR\s+REPLACE\s+)?)?TRIGGER\s+"
+ r'"?([\w.]+)"?'
+ r"\s+(BEFORE|AFTER|INSTEAD\s+OF)\s+"
+ r"((?:INSERT|UPDATE|DELETE)(?:\s+OR\s+(?:INSERT|UPDATE|DELETE))*)"
+ r"\s+ON\s+"
+ r'"?([\w.]+)"?',
+ re.IGNORECASE | re.MULTILINE,
+ )
+
+ # TYPE spec (TABLE OF, AS OBJECT, UNDER, etc.).
+ # FORCE and EDITIONABLE/NONEDITIONABLE are each optional and separated by
+ # their own \s+, so that "FORCE AS" doesn't consume the space before AS.
+ _PLSQL_TYPE_RE = re.compile(
+ r"^\s*(?:CREATE\s+(?:OR\s+REPLACE\s+)?)?TYPE\s+(?!BODY\b)"
+ r'"?([\w.]+)"?'
+ r"(?:\s+FORCE)?"
+ r"(?:\s+(?:EDITIONABLE|NONEDITIONABLE))?"
+ r"\s+(?:AS|IS|UNDER)\b",
+ re.IGNORECASE | re.MULTILINE,
+ )
+
+ # PROCEDURE/FUNCTION member inside a package body slice.
+ # No indentation requirement — some Oracle shops place members at column 0.
+ _PLSQL_MEMBER_RE = re.compile(
+ r"^(?:MEMBER\s+)?(PROCEDURE|FUNCTION)\s+(\w+)",
+ re.IGNORECASE | re.MULTILINE,
+ )
+
+ # Package-qualified call: PKG.PROC( or SCHEMA.PKG.PROC( (matches innermost pair)
+ _PLSQL_INTERCALL_RE = re.compile(
+ r"\b([A-Za-z_][A-Za-z0-9_$#]*)\.([A-Za-z_][A-Za-z0-9_$#]*)\s*\(",
+ re.IGNORECASE,
+ )
+
+ # Oracle built-in / system packages — suppress noisy but semantically
+ # uninteresting edges (infra calls rather than business-logic calls).
+ _ORACLE_SYSTEM_PKGS: frozenset[str] = frozenset({
+ "DBMS_OUTPUT", "DBMS_SCHEDULER", "DBMS_SQL", "DBMS_LOB", "DBMS_CRYPTO",
+ "DBMS_UTILITY", "DBMS_METADATA", "DBMS_STATS", "DBMS_LOCK", "DBMS_PIPE",
+ "DBMS_ALERT", "DBMS_TRANSACTION", "DBMS_SESSION", "DBMS_APPLICATION_INFO",
+ "DBMS_RANDOM", "DBMS_XMLGEN", "DBMS_ROWID", "DBMS_TYPES",
+ "UTL_FILE", "UTL_HTTP", "UTL_SMTP", "UTL_TCP", "UTL_RAW",
+ "UTL_I18N", "UTL_URL", "UTL_ENCODE",
+ "SYS", "STANDARD", "APEX_UTIL", "HTP", "OWA_UTIL",
+ })
+
+ def _extract_plsql_calls(
+ self,
+ body_text: str,
+ body_offset: int,
+ source_qname: str,
+ file_path_str: str,
+ edges: list[EdgeInfo],
+ line_of_fn,
+ ) -> None:
+ """Emit CALLS edges for package-qualified calls found in *body_text*.
+
+ *body_offset* is the character offset of *body_text* within the full
+ source — used to compute correct line numbers. Each unique
+ PKG.PROC target produces at most one edge per source node.
+ """
+ seen_calls: set[str] = set()
+ for cm in self._PLSQL_INTERCALL_RE.finditer(body_text):
+ pkg = cm.group(1)
+ proc = cm.group(2)
+ if pkg.upper() in self._ORACLE_SYSTEM_PKGS:
+ continue
+ call_target = f"{pkg}.{proc}"
+ if call_target not in seen_calls:
+ seen_calls.add(call_target)
+ edges.append(EdgeInfo(
+ kind="CALLS",
+ source=source_qname,
+ target=call_target,
+ file_path=file_path_str,
+ line=line_of_fn(body_offset + cm.start()),
+ ))
+
+ def _parse_plsql(
+ self, path: Path, source: bytes,
+ ) -> tuple[list[NodeInfo], list[EdgeInfo]]:
+ """Parse an Oracle PL/SQL file using regex (no tree-sitter grammar).
+
+ Extracts:
+ - PACKAGE specs → Class nodes, extra["plsql_kind"]="package"
+ - PACKAGE BODYs → Class nodes, extra["plsql_kind"]="package_body"
+ - member PROCEDURE/FUNCTION → Function nodes with parent_name=pkg
+ - Standalone PROCEDURE → Function nodes, extra["plsql_kind"]="procedure"
+ - Standalone FUNCTION → Function nodes, extra["plsql_kind"]="function"
+ - TRIGGER → Function nodes, extra["plsql_kind"]="trigger"
+ - TYPE → Class nodes, extra["plsql_kind"]="type"
+ - FROM/JOIN references → IMPORTS_FROM edges (reuses _SQL_TABLE_RE)
+
+ Wrapped (obfuscated) files must be rejected by the caller before reaching
+ this method — they are never passed in.
+ """
+ text = source.decode("utf-8", errors="replace")
+ file_path_str = str(path)
+ test_file = _is_test_file(file_path_str)
+
+ nodes: list[NodeInfo] = []
+ edges: list[EdgeInfo] = []
+
+ nodes.append(NodeInfo(
+ kind="File",
+ name=file_path_str,
+ file_path=file_path_str,
+ line_start=1,
+ line_end=text.count("\n") + 1,
+ language="plsql",
+ is_test=test_file,
+ ))
+
+ seen: set[str] = set()
+ # Bare member names added via the package body loop — used to avoid
+ # re-extracting their CALLS in the standalone proc/func loops below.
+ member_bare_names: set[str] = set()
+
+ def _strip_schema(raw: str) -> str:
+ return raw.strip('"').split(".")[-1]
+
+ def _qualified(name: str) -> str:
+ return f"{file_path_str}::{name}"
+
+ def _line_of(offset: int) -> int:
+ return text[:offset].count("\n") + 1
+
+ # --- PACKAGE BODY ---
+ # Spec and body use distinct seen-keys so both nodes coexist when a
+ # file contains both (common in Oracle shops).
+ for m in self._PLSQL_PACKAGE_BODY_RE.finditer(text):
+ name = _strip_schema(m.group(1))
+ body_key = _qualified(f"__body__{name}")
+ qname = _qualified(name)
+ line = _line_of(m.start())
+ if body_key not in seen:
+ seen.add(body_key)
+ nodes.append(NodeInfo(
+ kind="Class", name=name, file_path=file_path_str,
+ line_start=line, line_end=line, language="plsql",
+ extra={"plsql_kind": "package_body"},
+ ))
+ edges.append(EdgeInfo(
+ kind="CONTAINS", source=file_path_str,
+ target=qname, file_path=file_path_str, line=line,
+ ))
+
+ # Find the body boundary using the package name specifically to
+ # avoid stopping at inner END proc_name; closers.
+ body_start = m.end()
+ end_pat = re.compile(
+ rf"^\s*END\s+{re.escape(name)}\s*;",
+ re.IGNORECASE | re.MULTILINE,
+ )
+ end_m = end_pat.search(text, body_start)
+ body_end = end_m.start() if end_m else len(text)
+ body_slice = text[body_start:body_end]
+ for mm in self._PLSQL_MEMBER_RE.finditer(body_slice):
+ kind_kw = mm.group(1).upper()
+ member_name = mm.group(2)
+ member_line = _line_of(body_start + mm.start())
+ mq = _qualified(f"{name}.{member_name}")
+ if mq not in seen:
+ seen.add(mq)
+ nodes.append(NodeInfo(
+ kind="Function", name=member_name,
+ file_path=file_path_str,
+ line_start=member_line, line_end=member_line,
+ language="plsql", parent_name=name,
+ extra={"plsql_kind": kind_kw.lower()},
+ ))
+ edges.append(EdgeInfo(
+ kind="CONTAINS", source=_qualified(name),
+ target=mq, file_path=file_path_str, line=member_line,
+ ))
+ member_bare_names.add(member_name)
+ # Extract inter-package CALLS from this member's body.
+ mem_body_start = body_start + mm.end()
+ end_mem_pat = re.compile(
+ rf"^\s*END\s+{re.escape(member_name)}\s*;",
+ re.IGNORECASE | re.MULTILINE,
+ )
+ end_mem_m = end_mem_pat.search(text, body_start + mm.start())
+ mem_body_end = end_mem_m.start() if end_mem_m else body_end
+ self._extract_plsql_calls(
+ text[mem_body_start:mem_body_end],
+ mem_body_start, mq, file_path_str, edges, _line_of,
+ )
+
+ # --- PACKAGE SPEC ---
+ for m in self._PLSQL_PACKAGE_SPEC_RE.finditer(text):
+ name = _strip_schema(m.group(1))
+ spec_key = _qualified(f"__spec__{name}")
+ qname = _qualified(name)
+ line = _line_of(m.start())
+ if spec_key not in seen:
+ seen.add(spec_key)
+ nodes.append(NodeInfo(
+ kind="Class", name=name, file_path=file_path_str,
+ line_start=line, line_end=line, language="plsql",
+ extra={"plsql_kind": "package"},
+ ))
+ edges.append(EdgeInfo(
+ kind="CONTAINS", source=file_path_str,
+ target=qname, file_path=file_path_str, line=line,
+ ))
+
+ # --- TRIGGERS ---
+ for m in self._PLSQL_TRIGGER_RE.finditer(text):
+ name = _strip_schema(m.group(1))
+ event = f"{m.group(2).upper()} {m.group(3).upper()}"
+ table = _strip_schema(m.group(4))
+ qname = _qualified(name)
+ line = _line_of(m.start())
+ if qname not in seen:
+ seen.add(qname)
+ nodes.append(NodeInfo(
+ kind="Function", name=name, file_path=file_path_str,
+ line_start=line, line_end=line, language="plsql",
+ extra={
+ "plsql_kind": "trigger",
+ "trigger_event": event,
+ "trigger_table": table,
+ },
+ ))
+ edges.append(EdgeInfo(
+ kind="CONTAINS", source=file_path_str,
+ target=qname, file_path=file_path_str, line=line,
+ ))
+ edges.append(EdgeInfo(
+ kind="IMPORTS_FROM", source=file_path_str,
+ target=table, file_path=file_path_str, line=line,
+ ))
+
+ # --- STANDALONE PROCEDURES ---
+ for m in self._PLSQL_PROC_RE.finditer(text):
+ name = _strip_schema(m.group(1))
+ qname = _qualified(name)
+ line = _line_of(m.start())
+ if qname not in seen:
+ seen.add(qname)
+ nodes.append(NodeInfo(
+ kind="Function", name=name, file_path=file_path_str,
+ line_start=line, line_end=line, language="plsql",
+ extra={"plsql_kind": "procedure"},
+ ))
+ edges.append(EdgeInfo(
+ kind="CONTAINS", source=file_path_str,
+ target=qname, file_path=file_path_str, line=line,
+ ))
+ # Skip procedures already extracted as package body members to
+ # avoid emitting duplicate CALLS edges with a bare source name.
+ if name not in member_bare_names:
+ proc_body_start = m.end()
+ end_proc_pat = re.compile(
+ rf"^\s*END\s+{re.escape(name)}\s*;",
+ re.IGNORECASE | re.MULTILINE,
+ )
+ end_proc_m = end_proc_pat.search(text, m.start())
+ proc_body_end = end_proc_m.start() if end_proc_m else len(text)
+ self._extract_plsql_calls(
+ text[proc_body_start:proc_body_end],
+ proc_body_start, qname, file_path_str, edges, _line_of,
+ )
+
+ # --- STANDALONE FUNCTIONS ---
+ for m in self._PLSQL_FUNC_RE.finditer(text):
+ name = _strip_schema(m.group(1))
+ qname = _qualified(name)
+ line = _line_of(m.start())
+ if qname not in seen:
+ seen.add(qname)
+ nodes.append(NodeInfo(
+ kind="Function", name=name, file_path=file_path_str,
+ line_start=line, line_end=line, language="plsql",
+ extra={"plsql_kind": "function"},
+ ))
+ edges.append(EdgeInfo(
+ kind="CONTAINS", source=file_path_str,
+ target=qname, file_path=file_path_str, line=line,
+ ))
+ if name not in member_bare_names:
+ func_body_start = m.end()
+ end_func_pat = re.compile(
+ rf"^\s*END\s+{re.escape(name)}\s*;",
+ re.IGNORECASE | re.MULTILINE,
+ )
+ end_func_m = end_func_pat.search(text, m.start())
+ func_body_end = end_func_m.start() if end_func_m else len(text)
+ self._extract_plsql_calls(
+ text[func_body_start:func_body_end],
+ func_body_start, qname, file_path_str, edges, _line_of,
+ )
+
+ # --- TYPES ---
+ for m in self._PLSQL_TYPE_RE.finditer(text):
+ name = _strip_schema(m.group(1))
+ qname = _qualified(name)
+ line = _line_of(m.start())
+ if qname not in seen:
+ seen.add(qname)
+ nodes.append(NodeInfo(
+ kind="Class", name=name, file_path=file_path_str,
+ line_start=line, line_end=line, language="plsql",
+ extra={"plsql_kind": "type"},
+ ))
+ edges.append(EdgeInfo(
+ kind="CONTAINS", source=file_path_str,
+ target=qname, file_path=file_path_str, line=line,
+ ))
+
+ # --- TABLE REFERENCES (FROM / JOIN) — reuse SQL regex ---
+ seen_refs: set[str] = set()
+ for m in _SQL_TABLE_RE.finditer(text):
+ ref = m.group(1).strip('"`').split(".")[-1]
+ if ref and ref.upper() not in _SQL_KEYWORDS and ref not in seen_refs:
+ seen_refs.add(ref)
+ edges.append(EdgeInfo(
+ kind="IMPORTS_FROM", source=file_path_str,
+ target=ref, file_path=file_path_str, line=_line_of(m.start()),
+ ))
+
+ return nodes, edges
+
def _resolve_call_targets(
self,
nodes: list[NodeInfo],
diff --git a/tests/fixtures/sample.plsql b/tests/fixtures/sample.plsql
new file mode 100644
index 00000000..d5304527
--- /dev/null
+++ b/tests/fixtures/sample.plsql
@@ -0,0 +1,85 @@
+-- Oracle PL/SQL fixture covering common real-world Oracle code patterns.
+-- Some files start directly with the object keyword (no CREATE OR REPLACE prefix).
+
+--------------------------------------------------------------------------------
+-- Package specification: PACKAGE name ... IS
+--------------------------------------------------------------------------------
+PACKAGE HR_PKG
+--------------------------------------------------------------------------------
+-- PACKAGE DESCRIPTION: Human resources utilities.
+--------------------------------------------------------------------------------
+IS
+ PROCEDURE hire_employee(p_name VARCHAR2, p_dept NUMBER, pio_Err IN OUT SrvErr);
+ FUNCTION get_salary(p_id NUMBER) RETURN NUMBER;
+END HR_PKG;
+/
+
+--------------------------------------------------------------------------------
+-- Package body: PACKAGE BODY name AS
+--------------------------------------------------------------------------------
+PACKAGE BODY HR_PKG AS
+
+PROCEDURE hire_employee(p_name VARCHAR2, p_dept NUMBER, pio_Err IN OUT SrvErr) IS
+BEGIN
+ INSERT INTO employees (emp_name, dept_id) VALUES (p_name, p_dept);
+ AUDIT_PKG.log_change('HIRE', p_name);
+END hire_employee;
+
+FUNCTION get_salary(p_id NUMBER) RETURN NUMBER IS
+ v_sal NUMBER;
+BEGIN
+ SELECT salary INTO v_sal FROM employees WHERE emp_id = p_id;
+ NOTIF_PKG.send_alert(p_id, v_sal);
+ RETURN v_sal;
+END get_salary;
+
+END HR_PKG;
+/
+
+--------------------------------------------------------------------------------
+-- Trigger: schema-qualified name and table (TRIGGER schema.name BEFORE ... ON schema.table)
+--------------------------------------------------------------------------------
+TRIGGER CURRENCIES.AUDIT_EMP_TRG
+ BEFORE
+ INSERT OR UPDATE
+ ON CURRENCIES.EMPLOYEES
+ REFERENCING OLD AS OLD NEW AS NEW
+ FOR EACH ROW
+BEGIN
+ :new.updated_at := SYSDATE;
+END;
+/
+
+--------------------------------------------------------------------------------
+-- Standalone procedure
+--------------------------------------------------------------------------------
+PROCEDURE standalone_proc(p_id IN NUMBER) IS
+BEGIN
+ UPDATE employees SET active = 0 WHERE emp_id = p_id;
+END;
+/
+
+--------------------------------------------------------------------------------
+-- Standalone function (with CREATE OR REPLACE — some files use this form)
+--------------------------------------------------------------------------------
+create or replace PROCEDURE HR_SCHEMA.standalone_func_alt(pi_msg VARCHAR2) IS
+BEGIN NULL; END;
+/
+
+FUNCTION standalone_func(p_id IN NUMBER) RETURN VARCHAR2 AS
+ v_name VARCHAR2(100);
+BEGIN
+ SELECT emp_name INTO v_name FROM employees WHERE emp_id = p_id;
+ RETURN v_name;
+END;
+/
+
+--------------------------------------------------------------------------------
+-- Type definition
+--------------------------------------------------------------------------------
+TYPE "ADDRESS_T" FORCE AS OBJECT(
+ street VARCHAR2(100),
+ city VARCHAR2(50),
+ MEMBER FUNCTION to_string RETURN VARCHAR2
+);
+/
diff --git a/tests/test_multilang.py b/tests/test_multilang.py
index afda355e..0db09870 100644
--- a/tests/test_multilang.py
+++ b/tests/test_multilang.py
@@ -2548,3 +2548,170 @@ def test_table_reference_edges(self):
targets = {e.target for e in imports}
# active_orders view and archive procedure both reference orders/users
assert "orders" in targets or "users" in targets
+
+
+# ---------------------------------------------------------------------------
+# Oracle PL/SQL
+# ---------------------------------------------------------------------------
+
+class TestPlsqlParsing:
+ def setup_method(self):
+ self.parser = CodeParser()
+ self.nodes, self.edges = self.parser.parse_file(FIXTURES / "sample.plsql")
+
+ # --- language detection ---
+
+ def test_detects_language_by_extension(self):
+ for ext in (".pks", ".pkb", ".prc", ".fnc", ".trg", ".pls"):
+ assert self.parser.detect_language(Path(f"hr{ext}")) == "plsql"
+
+ def test_sql_file_with_oracle_header_routes_to_plsql(self):
+ """A .sql file starting with PACKAGE/TRIGGER/etc. is parsed as PL/SQL."""
+ src = b"PACKAGE BODY hr_pkg AS\nPROCEDURE p IS BEGIN NULL; END;\nEND hr_pkg;\n"
+ nodes, _ = self.parser.parse_bytes(Path("hr.sql"), src)
+ langs = {n.language for n in nodes}
+ assert "plsql" in langs
+
+ def test_wrapped_sql_returns_empty(self):
+ """Wrapped (obfuscated) Oracle files are silently skipped."""
+ src = b"PACKAGE BODY blc_accounts wrapped \na000000\n1\nabcd\n"
+ nodes, edges = self.parser.parse_bytes(Path("blc.sql"), src)
+ assert nodes == [] and edges == []
+
+ def test_wrapped_plsql_extension_returns_empty(self):
+ src = b"PACKAGE BODY blc_accounts wrapped \na000000\nabcd\n"
+ nodes, edges = self.parser.parse_bytes(Path("blc.pkb"), src)
+ assert nodes == [] and edges == []
+
+ # --- file node ---
+
+ def test_file_node_exists_and_language(self):
+ file_nodes = [n for n in self.nodes if n.kind == "File"]
+ assert len(file_nodes) == 1
+ assert file_nodes[0].language == "plsql"
+
+ # --- package spec ---
+
+ def test_finds_package_spec(self):
+ pkgs = [n for n in self.nodes
+ if n.kind == "Class" and n.extra.get("plsql_kind") == "package"]
+ assert any(p.name == "HR_PKG" for p in pkgs)
+
+ def test_package_spec_contains_edge(self):
+ contains = [e for e in self.edges if e.kind == "CONTAINS"]
+ targets = {e.target.split("::")[-1] for e in contains}
+ assert "HR_PKG" in targets
+
+ # --- package body ---
+
+ def test_finds_package_body(self):
+ bodies = [n for n in self.nodes
+ if n.extra.get("plsql_kind") == "package_body"]
+ assert any(b.name == "HR_PKG" for b in bodies)
+
+ def test_finds_package_members(self):
+ members = [n for n in self.nodes
+ if n.kind == "Function" and n.parent_name == "HR_PKG"]
+ names = {m.name for m in members}
+ assert "hire_employee" in names
+ assert "get_salary" in names
+
+ def test_member_plsql_kind(self):
+ members = {n.name: n for n in self.nodes
+ if n.kind == "Function" and n.parent_name == "HR_PKG"}
+ assert members["hire_employee"].extra["plsql_kind"] == "procedure"
+ assert members["get_salary"].extra["plsql_kind"] == "function"
+
+ def test_package_member_contains_edge(self):
+ contains = [e for e in self.edges if e.kind == "CONTAINS"]
+ # Edge source should reference the package name
+ pkg_contains = [e for e in contains if "HR_PKG" in e.source]
+ assert len(pkg_contains) >= 2
+
+ # --- trigger ---
+
+ def test_finds_trigger(self):
+ trigs = [n for n in self.nodes
+ if n.extra.get("plsql_kind") == "trigger"]
+ assert any(t.name == "AUDIT_EMP_TRG" for t in trigs)
+
+ def test_trigger_event_metadata(self):
+ trig = next(n for n in self.nodes if n.extra.get("plsql_kind") == "trigger")
+ assert "trigger_event" in trig.extra
+ assert "trigger_table" in trig.extra
+ assert "INSERT" in trig.extra["trigger_event"]
+
+ def test_trigger_imports_table_edge(self):
+ imports = [e for e in self.edges if e.kind == "IMPORTS_FROM"]
+ targets = {e.target for e in imports}
+ assert "EMPLOYEES" in targets
+
+ # --- standalone procedure / function ---
+
+ def test_finds_standalone_procedure(self):
+ procs = [n for n in self.nodes
+ if n.kind == "Function"
+ and n.extra.get("plsql_kind") == "procedure"
+ and n.parent_name is None]
+ assert any(p.name == "standalone_proc" for p in procs)
+
+ def test_finds_standalone_function(self):
+ funcs = [n for n in self.nodes
+ if n.kind == "Function"
+ and n.extra.get("plsql_kind") == "function"
+ and n.parent_name is None]
+ assert any(f.name == "standalone_func" for f in funcs)
+
+ def test_create_or_replace_procedure_detected(self):
+ """Procedures using CREATE OR REPLACE prefix are also captured."""
+ procs = [n for n in self.nodes
+ if n.kind == "Function" and n.extra.get("plsql_kind") == "procedure"]
+ names = {p.name for p in procs}
+ assert "standalone_func_alt" in names
+
+ # --- type ---
+
+ def test_finds_type(self):
+ types = [n for n in self.nodes
+ if n.kind == "Class" and n.extra.get("plsql_kind") == "type"]
+ assert any(t.name == "ADDRESS_T" for t in types)
+
+ # --- table references ---
+
+ def test_table_reference_edges(self):
+ imports = [e for e in self.edges if e.kind == "IMPORTS_FROM"]
+ assert len(imports) >= 1
+
+ # --- inter-package CALLS edges ---
+
+ def test_calls_edges_emitted(self):
+ calls = [e for e in self.edges if e.kind == "CALLS"]
+ assert len(calls) >= 1, "Expected at least one CALLS edge from package body members"
+
+ def test_calls_edge_from_hire_employee_to_audit_pkg(self):
+ calls = [e for e in self.edges if e.kind == "CALLS"]
+ targets = {e.target for e in calls}
+ assert "AUDIT_PKG.log_change" in targets
+
+ def test_calls_edge_from_get_salary_to_notif_pkg(self):
+ calls = [e for e in self.edges if e.kind == "CALLS"]
+ targets = {e.target for e in calls}
+ assert "NOTIF_PKG.send_alert" in targets
+
+ def test_calls_edge_source_is_qualified_member(self):
+ """CALLS edge source must be the package-qualified member, not bare file."""
+ hire_calls = [
+ e for e in self.edges
+ if e.kind == "CALLS" and e.target == "AUDIT_PKG.log_change"
+ ]
+ assert len(hire_calls) == 1
+ assert "HR_PKG.hire_employee" in hire_calls[0].source
+
+ def test_system_pkg_calls_not_emitted(self):
+ """Calls to Oracle system packages (DBMS_*, UTL_*) are suppressed."""
+ calls = [e for e in self.edges if e.kind == "CALLS"]
+ targets = {e.target for e in calls}
+ assert not any(
+ t.upper().startswith(("DBMS_", "UTL_", "SYS."))
+ for t in targets
+ )