Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,5 @@ __pycache__/
node_modules/
apps/openant-cli/bin/
libs/openant-core/parsers/go/go_parser/go_parser
libs/openant-core/parsers/javascript/.openant-npm-install.lock
# docs/
107 changes: 107 additions & 0 deletions libs/openant-core/core/parser_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
sys.path hacks in the original code.
"""

import contextlib
import json
import os
import shutil
import subprocess
import sys
from pathlib import Path
Expand All @@ -20,6 +22,9 @@
# Root of openant-core (where parsers/ lives)
_CORE_ROOT = Path(__file__).parent.parent

# JS parser directory (holds its own package.json / node_modules)
_JS_PARSER_DIR = _CORE_ROOT / "parsers" / "javascript"


def detect_language(repo_path: str) -> str:
"""Auto-detect the primary language of a repository.
Expand Down Expand Up @@ -374,12 +379,114 @@ def _parse_python(repo_path: str, output_dir: str, processing_level: str, skip_t
# JavaScript/TypeScript parser
# ---------------------------------------------------------------------------

def _js_deps_installed() -> bool:
"""Return True only if a *complete* npm install has previously succeeded.

Checking that ``node_modules/`` exists is not enough: a prior install that
was killed (Ctrl+C, OOM, disk full) leaves a partial directory. npm writes
``node_modules/.package-lock.json`` at the *end* of a successful install,
so we use that as the completion sentinel.
"""
return (_JS_PARSER_DIR / "node_modules" / ".package-lock.json").is_file()


def _ensure_js_parser_dependencies() -> None:
"""Install the JS parser's Node dependencies on first use.

Mirrors the Go CLI's venv bootstrap (apps/openant-cli/internal/python/runtime.go):
the first invocation installs, subsequent invocations are a no-op. Runs only
when a JS repo is actually being parsed, so Python/Go-only users never need npm.

Concurrency: uses a lockfile so two parallel parses don't both run
``npm install`` in the same directory (which can corrupt node_modules).
"""
if _js_deps_installed():
return

if not (_JS_PARSER_DIR / "package.json").is_file():
raise RuntimeError(
f"JS parser package.json not found at {_JS_PARSER_DIR / 'package.json'}. "
"The openant-core install may be incomplete."
)

npm = shutil.which("npm")
if npm is None:
raise RuntimeError(
"JavaScript parser dependencies are not installed and `npm` is not on PATH. "
f"Install Node.js/npm, then run: npm install (from {_JS_PARSER_DIR})"
)

# Serialize concurrent bootstraps. The lockfile lives next to package.json so
# it's always on the same filesystem as the install target.
lock_path = _JS_PARSER_DIR / ".openant-npm-install.lock"
with _file_lock(lock_path):
# Re-check under the lock: another process may have finished while we waited.
if _js_deps_installed():
return

print(
"[Parser] Installing JS parser dependencies (first run, this may take a minute)...",
file=sys.stderr,
)
result = subprocess.run(
[npm, "install"],
cwd=str(_JS_PARSER_DIR),
stdout=sys.stderr,
stderr=sys.stderr,
)
if result.returncode != 0:
raise RuntimeError(
f"`npm install` failed in {_JS_PARSER_DIR} with exit code "
f"{result.returncode}. See npm output above for details; you can "
f"reproduce with: npm install (from {_JS_PARSER_DIR})"
)


@contextlib.contextmanager
def _file_lock(lock_path: Path):
"""Cross-platform exclusive file lock as a context manager.

Uses ``msvcrt`` on Windows and ``fcntl`` elsewhere. Blocks until the lock is
acquired, releases on exit. The lockfile itself is left in place; only the
OS-level lock matters for mutual exclusion.
"""
lock_path.parent.mkdir(parents=True, exist_ok=True)
# "w" (not "a+") so the file pointer is at byte 0 — msvcrt.locking locks a
# range starting at the *current* file position, so different positions
# would mean non-overlapping (i.e. non-exclusive) locks.
f = open(lock_path, "w")
try:
if os.name == "nt":
import msvcrt

f.seek(0)
# LK_LOCK blocks (with retries) until the byte range is exclusive.
msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1)
try:
yield
finally:
f.seek(0)
msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1)
else:
import fcntl

fcntl.flock(f.fileno(), fcntl.LOCK_EX)
try:
yield
finally:
fcntl.flock(f.fileno(), fcntl.LOCK_UN)
finally:
f.close()


def _parse_javascript(repo_path: str, output_dir: str, processing_level: str, skip_tests: bool = True, name: str = None) -> ParseResult:
"""Invoke the JavaScript/TypeScript parser.

The JS parser is a PipelineTest class that runs Node.js subprocesses.
We invoke it via subprocess to avoid the sys.path hacks.
"""
_ensure_js_parser_dependencies()

print("[Parser] Running JavaScript parser...", file=sys.stderr)

parser_script = _CORE_ROOT / "parsers" / "javascript" / "test_pipeline.py"
Expand Down
181 changes: 181 additions & 0 deletions libs/openant-core/tests/test_js_parser_bootstrap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
"""Tests for the JS parser's lazy npm-install bootstrap.

Covers `_ensure_js_parser_dependencies` in core.parser_adapter: behavior when
node_modules is present, missing, partially installed, npm is unavailable, or
`npm install` fails. These tests monkeypatch subprocess and shutil.which so
they don't need Node.
"""
from pathlib import Path

import pytest

from core import parser_adapter


@pytest.fixture
def fake_parser_dir(tmp_path, monkeypatch):
"""Point _JS_PARSER_DIR at a tmp dir (with package.json) so tests don't
touch the real one."""
monkeypatch.setattr(parser_adapter, "_JS_PARSER_DIR", tmp_path)
# All happy-path tests assume package.json exists. Tests that need to
# exercise the missing-package.json branch can delete it.
(tmp_path / "package.json").write_text('{"name": "fake"}')
return tmp_path


def _mark_installed(parser_dir: Path) -> None:
"""Create the success sentinel npm writes after a complete install."""
nm = parser_dir / "node_modules"
nm.mkdir(exist_ok=True)
(nm / ".package-lock.json").write_text("{}")


def test_skips_install_when_deps_already_installed(fake_parser_dir, monkeypatch):
_mark_installed(fake_parser_dir)

calls = []
monkeypatch.setattr(parser_adapter.subprocess, "run", lambda *a, **kw: calls.append((a, kw)))
monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm")

parser_adapter._ensure_js_parser_dependencies()

assert calls == []


def test_retries_install_when_node_modules_partially_installed(fake_parser_dir, monkeypatch):
"""A killed prior install leaves node_modules/ but no .package-lock.json
sentinel. The bootstrap must retry rather than skip."""
(fake_parser_dir / "node_modules").mkdir() # no .package-lock.json -> partial

calls = []

class _Ok:
returncode = 0

def _fake_run(cmd, **kwargs):
calls.append((cmd, kwargs))
# Simulate npm completing the install by writing the sentinel.
_mark_installed(fake_parser_dir)
return _Ok()

monkeypatch.setattr(parser_adapter.subprocess, "run", _fake_run)
monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm")

parser_adapter._ensure_js_parser_dependencies()

assert len(calls) == 1, "Partial node_modules should trigger a re-install"


def test_runs_npm_install_when_node_modules_missing(fake_parser_dir, monkeypatch):
calls = []

class _Ok:
returncode = 0

def _fake_run(cmd, **kwargs):
calls.append((cmd, kwargs))
return _Ok()

monkeypatch.setattr(parser_adapter.subprocess, "run", _fake_run)
monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm")

parser_adapter._ensure_js_parser_dependencies()

assert len(calls) == 1
cmd, kwargs = calls[0]
assert cmd == ["/usr/bin/npm", "install"]
assert kwargs["cwd"] == str(fake_parser_dir)


def test_raises_when_npm_not_on_path(fake_parser_dir, monkeypatch):
monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: None)

with pytest.raises(RuntimeError, match="npm"):
parser_adapter._ensure_js_parser_dependencies()


def test_raises_when_package_json_missing(fake_parser_dir, monkeypatch):
"""If the JS parser dir has no package.json, surface a clear error rather
than silently letting npm create an empty install."""
(fake_parser_dir / "package.json").unlink()

monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm")

with pytest.raises(RuntimeError, match="package.json not found"):
parser_adapter._ensure_js_parser_dependencies()


def test_raises_when_npm_install_fails(fake_parser_dir, monkeypatch):
class _Fail:
returncode = 1

monkeypatch.setattr(parser_adapter.subprocess, "run", lambda *a, **kw: _Fail())
monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm")

with pytest.raises(RuntimeError, match="npm install.*exit code 1"):
parser_adapter._ensure_js_parser_dependencies()


def test_install_failure_message_includes_repro_command(fake_parser_dir, monkeypatch):
"""The error message must tell the user how to reproduce the install
locally so they can read npm's diagnostics."""
class _Fail:
returncode = 1

monkeypatch.setattr(parser_adapter.subprocess, "run", lambda *a, **kw: _Fail())
monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm")

with pytest.raises(RuntimeError) as exc_info:
parser_adapter._ensure_js_parser_dependencies()

msg = str(exc_info.value)
assert "npm install" in msg
assert str(fake_parser_dir) in msg


def test_parse_javascript_surfaces_bootstrap_error(fake_parser_dir, monkeypatch):
"""When bootstrap fails, _parse_javascript must not run the Node subprocess."""
monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: None)

ran_node = []
monkeypatch.setattr(
parser_adapter.subprocess,
"run",
lambda *a, **kw: ran_node.append((a, kw)),
)

with pytest.raises(RuntimeError, match="npm"):
parser_adapter._parse_javascript(
repo_path="/tmp/fake-repo",
output_dir="/tmp/fake-out",
processing_level="all",
)

assert ran_node == [], "Node subprocess should not run when bootstrap fails"


def test_concurrent_bootstrap_serialized_by_lock(fake_parser_dir, monkeypatch):
"""The lockfile must serialize installs: the second caller, blocked behind
the first, must observe the sentinel on entry and skip its own install."""
install_count = 0

class _Ok:
returncode = 0

def _fake_run(cmd, **kwargs):
nonlocal install_count
install_count += 1
_mark_installed(fake_parser_dir)
return _Ok()

monkeypatch.setattr(parser_adapter.subprocess, "run", _fake_run)
monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm")

# Two sequential calls in the same process: first installs and writes the
# sentinel, second sees the sentinel and is a no-op. (True multi-process
# concurrency is exercised by the OS lock; we just verify the
# re-check-under-lock + sentinel logic.)
parser_adapter._ensure_js_parser_dependencies()
parser_adapter._ensure_js_parser_dependencies()

assert install_count == 1
Loading