diff --git a/.gitignore b/.gitignore index 5aa0e7b..bfa93f3 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ __pycache__/ node_modules/ apps/openant-cli/bin/ libs/openant-core/parsers/go/go_parser/go_parser +libs/openant-core/parsers/javascript/.openant-npm-install.lock # docs/ diff --git a/libs/openant-core/core/parser_adapter.py b/libs/openant-core/core/parser_adapter.py index 314d470..8ef12ce 100644 --- a/libs/openant-core/core/parser_adapter.py +++ b/libs/openant-core/core/parser_adapter.py @@ -9,8 +9,10 @@ sys.path hacks in the original code. """ +import contextlib import json import os +import shutil import subprocess import sys from pathlib import Path @@ -20,6 +22,9 @@ # Root of openant-core (where parsers/ lives) _CORE_ROOT = Path(__file__).parent.parent +# JS parser directory (holds its own package.json / node_modules) +_JS_PARSER_DIR = _CORE_ROOT / "parsers" / "javascript" + def detect_language(repo_path: str) -> str: """Auto-detect the primary language of a repository. @@ -374,12 +379,114 @@ def _parse_python(repo_path: str, output_dir: str, processing_level: str, skip_t # JavaScript/TypeScript parser # --------------------------------------------------------------------------- +def _js_deps_installed() -> bool: + """Return True only if a *complete* npm install has previously succeeded. + + Checking that ``node_modules/`` exists is not enough: a prior install that + was killed (Ctrl+C, OOM, disk full) leaves a partial directory. npm writes + ``node_modules/.package-lock.json`` at the *end* of a successful install, + so we use that as the completion sentinel. + """ + return (_JS_PARSER_DIR / "node_modules" / ".package-lock.json").is_file() + + +def _ensure_js_parser_dependencies() -> None: + """Install the JS parser's Node dependencies on first use. + + Mirrors the Go CLI's venv bootstrap (apps/openant-cli/internal/python/runtime.go): + the first invocation installs, subsequent invocations are a no-op. Runs only + when a JS repo is actually being parsed, so Python/Go-only users never need npm. + + Concurrency: uses a lockfile so two parallel parses don't both run + ``npm install`` in the same directory (which can corrupt node_modules). + """ + if _js_deps_installed(): + return + + if not (_JS_PARSER_DIR / "package.json").is_file(): + raise RuntimeError( + f"JS parser package.json not found at {_JS_PARSER_DIR / 'package.json'}. " + "The openant-core install may be incomplete." + ) + + npm = shutil.which("npm") + if npm is None: + raise RuntimeError( + "JavaScript parser dependencies are not installed and `npm` is not on PATH. " + f"Install Node.js/npm, then run: npm install (from {_JS_PARSER_DIR})" + ) + + # Serialize concurrent bootstraps. The lockfile lives next to package.json so + # it's always on the same filesystem as the install target. + lock_path = _JS_PARSER_DIR / ".openant-npm-install.lock" + with _file_lock(lock_path): + # Re-check under the lock: another process may have finished while we waited. + if _js_deps_installed(): + return + + print( + "[Parser] Installing JS parser dependencies (first run, this may take a minute)...", + file=sys.stderr, + ) + result = subprocess.run( + [npm, "install"], + cwd=str(_JS_PARSER_DIR), + stdout=sys.stderr, + stderr=sys.stderr, + ) + if result.returncode != 0: + raise RuntimeError( + f"`npm install` failed in {_JS_PARSER_DIR} with exit code " + f"{result.returncode}. See npm output above for details; you can " + f"reproduce with: npm install (from {_JS_PARSER_DIR})" + ) + + +@contextlib.contextmanager +def _file_lock(lock_path: Path): + """Cross-platform exclusive file lock as a context manager. + + Uses ``msvcrt`` on Windows and ``fcntl`` elsewhere. Blocks until the lock is + acquired, releases on exit. The lockfile itself is left in place; only the + OS-level lock matters for mutual exclusion. + """ + lock_path.parent.mkdir(parents=True, exist_ok=True) + # "w" (not "a+") so the file pointer is at byte 0 — msvcrt.locking locks a + # range starting at the *current* file position, so different positions + # would mean non-overlapping (i.e. non-exclusive) locks. + f = open(lock_path, "w") + try: + if os.name == "nt": + import msvcrt + + f.seek(0) + # LK_LOCK blocks (with retries) until the byte range is exclusive. + msvcrt.locking(f.fileno(), msvcrt.LK_LOCK, 1) + try: + yield + finally: + f.seek(0) + msvcrt.locking(f.fileno(), msvcrt.LK_UNLCK, 1) + else: + import fcntl + + fcntl.flock(f.fileno(), fcntl.LOCK_EX) + try: + yield + finally: + fcntl.flock(f.fileno(), fcntl.LOCK_UN) + finally: + f.close() + + def _parse_javascript(repo_path: str, output_dir: str, processing_level: str, skip_tests: bool = True, name: str = None) -> ParseResult: """Invoke the JavaScript/TypeScript parser. The JS parser is a PipelineTest class that runs Node.js subprocesses. We invoke it via subprocess to avoid the sys.path hacks. """ + _ensure_js_parser_dependencies() + print("[Parser] Running JavaScript parser...", file=sys.stderr) parser_script = _CORE_ROOT / "parsers" / "javascript" / "test_pipeline.py" diff --git a/libs/openant-core/tests/test_js_parser_bootstrap.py b/libs/openant-core/tests/test_js_parser_bootstrap.py new file mode 100644 index 0000000..a6c5b99 --- /dev/null +++ b/libs/openant-core/tests/test_js_parser_bootstrap.py @@ -0,0 +1,181 @@ +"""Tests for the JS parser's lazy npm-install bootstrap. + +Covers `_ensure_js_parser_dependencies` in core.parser_adapter: behavior when +node_modules is present, missing, partially installed, npm is unavailable, or +`npm install` fails. These tests monkeypatch subprocess and shutil.which so +they don't need Node. +""" +from pathlib import Path + +import pytest + +from core import parser_adapter + + +@pytest.fixture +def fake_parser_dir(tmp_path, monkeypatch): + """Point _JS_PARSER_DIR at a tmp dir (with package.json) so tests don't + touch the real one.""" + monkeypatch.setattr(parser_adapter, "_JS_PARSER_DIR", tmp_path) + # All happy-path tests assume package.json exists. Tests that need to + # exercise the missing-package.json branch can delete it. + (tmp_path / "package.json").write_text('{"name": "fake"}') + return tmp_path + + +def _mark_installed(parser_dir: Path) -> None: + """Create the success sentinel npm writes after a complete install.""" + nm = parser_dir / "node_modules" + nm.mkdir(exist_ok=True) + (nm / ".package-lock.json").write_text("{}") + + +def test_skips_install_when_deps_already_installed(fake_parser_dir, monkeypatch): + _mark_installed(fake_parser_dir) + + calls = [] + monkeypatch.setattr(parser_adapter.subprocess, "run", lambda *a, **kw: calls.append((a, kw))) + monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm") + + parser_adapter._ensure_js_parser_dependencies() + + assert calls == [] + + +def test_retries_install_when_node_modules_partially_installed(fake_parser_dir, monkeypatch): + """A killed prior install leaves node_modules/ but no .package-lock.json + sentinel. The bootstrap must retry rather than skip.""" + (fake_parser_dir / "node_modules").mkdir() # no .package-lock.json -> partial + + calls = [] + + class _Ok: + returncode = 0 + + def _fake_run(cmd, **kwargs): + calls.append((cmd, kwargs)) + # Simulate npm completing the install by writing the sentinel. + _mark_installed(fake_parser_dir) + return _Ok() + + monkeypatch.setattr(parser_adapter.subprocess, "run", _fake_run) + monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm") + + parser_adapter._ensure_js_parser_dependencies() + + assert len(calls) == 1, "Partial node_modules should trigger a re-install" + + +def test_runs_npm_install_when_node_modules_missing(fake_parser_dir, monkeypatch): + calls = [] + + class _Ok: + returncode = 0 + + def _fake_run(cmd, **kwargs): + calls.append((cmd, kwargs)) + return _Ok() + + monkeypatch.setattr(parser_adapter.subprocess, "run", _fake_run) + monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm") + + parser_adapter._ensure_js_parser_dependencies() + + assert len(calls) == 1 + cmd, kwargs = calls[0] + assert cmd == ["/usr/bin/npm", "install"] + assert kwargs["cwd"] == str(fake_parser_dir) + + +def test_raises_when_npm_not_on_path(fake_parser_dir, monkeypatch): + monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: None) + + with pytest.raises(RuntimeError, match="npm"): + parser_adapter._ensure_js_parser_dependencies() + + +def test_raises_when_package_json_missing(fake_parser_dir, monkeypatch): + """If the JS parser dir has no package.json, surface a clear error rather + than silently letting npm create an empty install.""" + (fake_parser_dir / "package.json").unlink() + + monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm") + + with pytest.raises(RuntimeError, match="package.json not found"): + parser_adapter._ensure_js_parser_dependencies() + + +def test_raises_when_npm_install_fails(fake_parser_dir, monkeypatch): + class _Fail: + returncode = 1 + + monkeypatch.setattr(parser_adapter.subprocess, "run", lambda *a, **kw: _Fail()) + monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm") + + with pytest.raises(RuntimeError, match="npm install.*exit code 1"): + parser_adapter._ensure_js_parser_dependencies() + + +def test_install_failure_message_includes_repro_command(fake_parser_dir, monkeypatch): + """The error message must tell the user how to reproduce the install + locally so they can read npm's diagnostics.""" + class _Fail: + returncode = 1 + + monkeypatch.setattr(parser_adapter.subprocess, "run", lambda *a, **kw: _Fail()) + monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm") + + with pytest.raises(RuntimeError) as exc_info: + parser_adapter._ensure_js_parser_dependencies() + + msg = str(exc_info.value) + assert "npm install" in msg + assert str(fake_parser_dir) in msg + + +def test_parse_javascript_surfaces_bootstrap_error(fake_parser_dir, monkeypatch): + """When bootstrap fails, _parse_javascript must not run the Node subprocess.""" + monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: None) + + ran_node = [] + monkeypatch.setattr( + parser_adapter.subprocess, + "run", + lambda *a, **kw: ran_node.append((a, kw)), + ) + + with pytest.raises(RuntimeError, match="npm"): + parser_adapter._parse_javascript( + repo_path="/tmp/fake-repo", + output_dir="/tmp/fake-out", + processing_level="all", + ) + + assert ran_node == [], "Node subprocess should not run when bootstrap fails" + + +def test_concurrent_bootstrap_serialized_by_lock(fake_parser_dir, monkeypatch): + """The lockfile must serialize installs: the second caller, blocked behind + the first, must observe the sentinel on entry and skip its own install.""" + install_count = 0 + + class _Ok: + returncode = 0 + + def _fake_run(cmd, **kwargs): + nonlocal install_count + install_count += 1 + _mark_installed(fake_parser_dir) + return _Ok() + + monkeypatch.setattr(parser_adapter.subprocess, "run", _fake_run) + monkeypatch.setattr(parser_adapter.shutil, "which", lambda name: "/usr/bin/npm") + + # Two sequential calls in the same process: first installs and writes the + # sentinel, second sees the sentinel and is a no-op. (True multi-process + # concurrency is exercised by the OS lock; we just verify the + # re-check-under-lock + sentinel logic.) + parser_adapter._ensure_js_parser_dependencies() + parser_adapter._ensure_js_parser_dependencies() + + assert install_count == 1