From 1207bbb5ee4ba45e583e5e0dc531792823f771d8 Mon Sep 17 00:00:00 2001 From: joshbouncesecurity Date: Mon, 4 May 2026 21:42:31 +0300 Subject: [PATCH 1/3] fix: extract Express.js anonymous route handler callbacks The JS/TS parser pipeline did not extract anonymous arrow function callbacks used as Express route handlers, so most application code in a typical Express app was invisible to analysis. This change adds detection in typescript_analyzer.js: - Recognises Express-style call sites (`.(, ...callbacks)` with verb in `get|post|put|patch|delete|options|head|all|use`). - Filters the receiver to plausibly-Express identifiers (`app`, `router`, `routes`, `server` and chained `.route(...)`/`.Router()`) so generic `.get(...)` calls on caches/clients aren't misread as routes. - Extracts anonymous arrow / function expressions in callback positions as units, marking the last as `route_handler` (with `is_entry_point: true`) and earlier callbacks as `route_middleware`. - Adds metadata: `http_method`, `http_path`, `callback_index`, `named_middleware`. - Records explicit call-graph edges from each anonymous callback to named middleware identifiers in the same call (e.g. `authenticateToken`); dependency_resolver.js merges these with the body-text regex edges so reachability/upstream-deps work. - unit_generator.js surfaces the metadata on the unit (`route`, `is_entry_point`, `http_method`, `http_path`, `callback_index`). Addresses #21. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../parsers/javascript/dependency_resolver.js | 15 ++ .../parsers/javascript/typescript_analyzer.js | 214 ++++++++++++++++ .../parsers/javascript/unit_generator.js | 17 ++ .../tests/parsers/javascript/__init__.py | 0 .../javascript/test_express_route_handlers.py | 234 ++++++++++++++++++ 5 files changed, 480 insertions(+) create mode 100644 libs/openant-core/tests/parsers/javascript/__init__.py create mode 100644 libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py diff --git a/libs/openant-core/parsers/javascript/dependency_resolver.js b/libs/openant-core/parsers/javascript/dependency_resolver.js index 52d130e..30ab96f 100644 --- a/libs/openant-core/parsers/javascript/dependency_resolver.js +++ b/libs/openant-core/parsers/javascript/dependency_resolver.js @@ -60,6 +60,21 @@ class DependencyResolver { buildCallGraph() { for (const [funcId, funcData] of Object.entries(this.functions)) { const calls = this._extractCalls(funcData.code, funcId); + + // Merge in any explicit call edges declared by the analyzer. + // This is used for cases the body-text regex can't see — e.g. + // Express middleware identifiers passed as sibling args: + // app.post('/x', authenticateToken, async (req,res) => {...}) + const explicitCalls = funcData.explicitCalls || []; + const callerFile = funcId.split(':')[0]; + for (const name of explicitCalls) { + if (!name) continue; + const resolved = this._resolveCall(name, callerFile, funcId); + if (resolved && !calls.includes(resolved)) { + calls.push(resolved); + } + } + this.callGraph[funcId] = calls; // Build reverse graph diff --git a/libs/openant-core/parsers/javascript/typescript_analyzer.js b/libs/openant-core/parsers/javascript/typescript_analyzer.js index a41a80d..97abec1 100644 --- a/libs/openant-core/parsers/javascript/typescript_analyzer.js +++ b/libs/openant-core/parsers/javascript/typescript_analyzer.js @@ -240,6 +240,220 @@ class TypeScriptAnalyzer { // Extract functions from module.exports.propertyName = function() {...} // Pattern used by DVNA and similar CommonJS codebases this._extractModuleExportsPropertyFunctions(sourceFile, relativePath); + + // Extract anonymous callbacks used as Express route handlers / middleware + // Pattern: app.get('/x', auth, async (req, res) => {...}) + this._extractExpressRouteCallbacks(sourceFile, relativePath); + } + + /** + * Express HTTP verbs we recognise on a router/app object. + * `use` is included to pick up middleware-mount callbacks. + */ + static EXPRESS_VERBS = new Set([ + "get", + "post", + "put", + "patch", + "delete", + "options", + "head", + "all", + "use", + ]); + + /** + * Walk a source file looking for Express-style route registrations and + * emit a synthetic function entry for each anonymous arrow / function + * expression used as a callback. + * + * Recognises patterns of the form: + * .(, ...callbacks) + * .(...callbacks) // only for `use` + * where `` is one of the Express HTTP verbs (or `use`) and the + * first argument (when present) is a string-literal path. + * + * For each anonymous callback at index >= 1 we synthesise a function + * entry. The last anonymous-or-named callback is treated as the route + * handler; earlier callbacks are middleware. Named identifiers in + * callback positions are recorded as explicit call edges from the + * synthesised callbacks (e.g. `authenticateToken` becomes an upstream + * dependency of the handler so call-graph based analyses see the + * relationship). + */ + /** + * Heuristic: does `receiver` look like an Express app / router? + * + * We accept identifiers whose name contains `app`, `router`, `routes`, or + * `server` (case-insensitive), and chained calls like `app.route(...)` or + * `router.route(...)`. We deliberately reject other receivers so generic + * `.get(...)` calls on caches / clients / query-builders aren't misread + * as routes. + */ + _isPlausibleExpressReceiver(receiver) { + if (!receiver) return false; + const kind = receiver.getKindName(); + + if (kind === "Identifier") { + const name = receiver.getText().toLowerCase(); + return /(^|_)(app|router|routes|server)(\d|$|_)/.test(name) + || /app$|router$|routes$|server$/.test(name) + || name === "app" + || name === "router" + || name === "routes" + || name === "server"; + } + if (kind === "CallExpression") { + // e.g. app.route('/x').get(...) — receiver is the .route() call + const inner = receiver.getExpression && receiver.getExpression(); + if (inner && inner.getKindName && inner.getKindName() === "PropertyAccessExpression") { + const innerName = inner.getName && inner.getName(); + if (innerName === "route" || innerName === "Router") return true; + } + return false; + } + if (kind === "PropertyAccessExpression") { + // e.g. this.app.get(...) or express.Router().get(...) — accept when + // the trailing identifier matches our identifier pattern. + const trailing = receiver.getName && receiver.getName(); + if (!trailing) return false; + const lower = trailing.toLowerCase(); + return ["app", "router", "routes", "server"].some((s) => lower.endsWith(s)); + } + return false; + } + + _extractExpressRouteCallbacks(sourceFile, relativePath) { + const callExpressions = sourceFile + .getDescendantsOfKind(ts.SyntaxKind.CallExpression); + + for (const callExpr of callExpressions) { + const expression = callExpr.getExpression(); + if (!expression || expression.getKindName() !== "PropertyAccessExpression") { + continue; + } + + const methodName = expression.getName ? expression.getName() : null; + if (!methodName || !TypeScriptAnalyzer.EXPRESS_VERBS.has(methodName)) { + continue; + } + + // Filter to plausibly-Express receivers. Without this we'd match any + // `foo.get('x', () => {})` style call (e.g. cache lookups, query + // builders) and synthesise bogus route units. + const receiver = expression.getExpression + ? expression.getExpression() + : null; + if (!this._isPlausibleExpressReceiver(receiver)) { + continue; + } + + const args = callExpr.getArguments(); + if (args.length === 0) continue; + + // Determine whether the first argument is a path string literal. + const firstArg = args[0]; + const firstKind = firstArg.getKindName(); + let httpPath = null; + let callbackStartIndex = 0; + if (firstKind === "StringLiteral" || firstKind === "NoSubstitutionTemplateLiteral") { + httpPath = firstArg.getLiteralValue + ? firstArg.getLiteralValue() + : firstArg.getText().slice(1, -1); + callbackStartIndex = 1; + } else if (methodName === "use") { + // `app.use(middleware)` — no path, all args are callbacks. + httpPath = null; + callbackStartIndex = 0; + } else { + // Not an Express-shaped call (no string path and not `use`). + continue; + } + + // Gather the callback arguments (functions + named identifiers). + const callbacks = args.slice(callbackStartIndex); + if (callbacks.length === 0) continue; + + // We only emit units when at least one callback is an inline + // anonymous function. Otherwise the existing extraction logic + // already handles named handlers. + const hasInline = callbacks.some((a) => { + const k = a.getKindName(); + return k === "ArrowFunction" || k === "FunctionExpression"; + }); + if (!hasInline) continue; + + const httpMethod = methodName.toUpperCase(); + const lastCallbackIndex = callbacks.length - 1; + + // Collect named middleware identifiers (Identifier / PropertyAccess) + // that appear as siblings in the args list. They become explicit + // call-graph edges from each synthesised callback. + const namedMiddleware = []; + for (let i = 0; i < callbacks.length; i++) { + const arg = callbacks[i]; + const k = arg.getKindName(); + if (k === "Identifier") { + namedMiddleware.push(arg.getText()); + } else if (k === "PropertyAccessExpression") { + // e.g. middleware.auth — keep the trailing name + const name = arg.getName ? arg.getName() : arg.getText(); + namedMiddleware.push(name); + } + } + + for (let i = 0; i < callbacks.length; i++) { + const arg = callbacks[i]; + const k = arg.getKindName(); + if (k !== "ArrowFunction" && k !== "FunctionExpression") continue; + + // Only emit for *anonymous* function expressions. A function + // expression with a name like `function named(req,res){}` is + // already extracted elsewhere. + if (k === "FunctionExpression" && arg.getName && arg.getName()) { + continue; + } + + const isHandler = i === lastCallbackIndex; + const role = isHandler ? "handler" : `middleware:${i}`; + const pathLabel = httpPath !== null ? httpPath : ""; + const baseName = pathLabel + ? `${httpMethod} ${pathLabel} [${role}]` + : `${httpMethod} [${role}]`; + const synthName = baseName; + + const code = arg.getFullText(); + const startLine = arg.getStartLineNumber(); + const endLine = arg.getEndLineNumber(); + // Synthesise an ID that's stable per file/line so two routes on + // the same line+path don't collide. + const idSuffix = `${httpMethod}:${pathLabel}:${startLine}:${i}`; + const functionId = `${relativePath}:express(${idSuffix})`; + + if (this.functions[functionId]) continue; + + const unitType = isHandler ? "route_handler" : "route_middleware"; + const explicitCalls = namedMiddleware.filter((n) => n && n !== synthName); + + this.functions[functionId] = { + name: synthName, + code: code, + isExported: false, + unitType: unitType, + startLine: startLine, + endLine: endLine, + isEntryPoint: isHandler, + routeMetadata: { + http_method: httpMethod, + http_path: httpPath, + callback_index: i, + total_callbacks: callbacks.length, + named_middleware: explicitCalls, + }, + explicitCalls: explicitCalls, + }; + } + } } /** diff --git a/libs/openant-core/parsers/javascript/unit_generator.js b/libs/openant-core/parsers/javascript/unit_generator.js index 3650792..7b76219 100644 --- a/libs/openant-core/parsers/javascript/unit_generator.js +++ b/libs/openant-core/parsers/javascript/unit_generator.js @@ -239,6 +239,19 @@ class UnitGenerator { unitType = 'route_handler'; } + // If the analyzer attached Express route metadata directly to the + // function (anonymous arrow handler / middleware), surface it on the + // unit's `route` field even when no external routes.json was given. + if (!routeData && funcData.routeMetadata) { + const meta = funcData.routeMetadata; + routeData = { + method: meta.http_method, + path: meta.http_path, + handler: funcData.name, + middleware: meta.named_middleware || [], + }; + } + // Get upstream dependencies (functions this calls) const upstreamIds = this.resolver.getDependencies(functionId); const upstreamDependencies = []; @@ -314,6 +327,10 @@ class UnitGenerator { handler: routeData.handler, middleware: routeData.middleware || [] } : null, + is_entry_point: funcData.isEntryPoint === true ? true : undefined, + http_method: funcData.routeMetadata ? funcData.routeMetadata.http_method : undefined, + http_path: funcData.routeMetadata ? funcData.routeMetadata.http_path : undefined, + callback_index: funcData.routeMetadata ? funcData.routeMetadata.callback_index : undefined, ground_truth: { status: 'UNKNOWN', vulnerability_types: [], diff --git a/libs/openant-core/tests/parsers/javascript/__init__.py b/libs/openant-core/tests/parsers/javascript/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py b/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py new file mode 100644 index 0000000..cb1e182 --- /dev/null +++ b/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py @@ -0,0 +1,234 @@ +"""Tests for Express anonymous route handler extraction in the JS parser. + +These exercise the typescript_analyzer.js + unit_generator.js pipeline by +running the Node.js scripts as subprocesses (mirroring tests/test_js_parser.py). + +Skips when Node.js or the parser's npm dependencies aren't installed. +""" +import json +import shutil +import subprocess +from pathlib import Path + +import pytest + + +PARSERS_JS_DIR = Path(__file__).parent.parent.parent.parent / "parsers" / "javascript" +NODE_MODULES = PARSERS_JS_DIR / "node_modules" + +pytestmark = pytest.mark.skipif( + not shutil.which("node") or not NODE_MODULES.exists(), + reason="Node.js or JS parser npm dependencies not available", +) + + +def _run_node(script_name, *args): + cmd = ["node", str(PARSERS_JS_DIR / script_name)] + list(args) + return subprocess.run(cmd, capture_output=True, text=True, timeout=30) + + +def _analyze(repo_path, file_path): + """Run the analyzer on a single file and return parsed output.""" + result = _run_node("typescript_analyzer.js", str(repo_path), str(file_path)) + assert result.returncode == 0, ( + f"analyzer failed:\nstdout={result.stdout}\nstderr={result.stderr}" + ) + return json.loads(result.stdout) + + +def _generate_units(analyzer_output_path, dataset_output_path): + result = _run_node( + "unit_generator.js", + str(analyzer_output_path), + "--output", str(dataset_output_path), + ) + assert result.returncode == 0, ( + f"unit_generator failed:\nstdout={result.stdout}\nstderr={result.stderr}" + ) + return json.loads(Path(dataset_output_path).read_text()) + + +def _write_fixture(tmp_path: Path, name: str, content: str) -> Path: + repo = tmp_path / name + repo.mkdir(parents=True, exist_ok=True) + file_path = repo / "server.js" + file_path.write_text(content) + return file_path + + +def _express_units(dataset): + return [u for u in dataset["units"] if "express(" in u["id"]] + + +def test_anonymous_handler_with_named_middleware(tmp_path): + """router.post(path, namedMiddleware, async (req, res) => {...}).""" + file_path = _write_fixture( + tmp_path, + "anon_with_mw", + """ +const express = require('express'); +const router = express.Router(); + +function authenticateToken(req, res, next) { next(); } + +router.post('/orders', authenticateToken, async (req, res) => { + const { productId, quantity } = req.body; + res.json({ productId, quantity }); +}); + +module.exports = router; +""", + ) + repo = file_path.parent + out = _analyze(repo, file_path) + + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert len(express_funcs) == 1, f"expected 1 anon handler, got {express_funcs}" + + fid, fdata = next(iter(express_funcs.items())) + assert fdata["unitType"] == "route_handler" + assert fdata["isEntryPoint"] is True + meta = fdata["routeMetadata"] + assert meta["http_method"] == "POST" + assert meta["http_path"] == "/orders" + assert meta["named_middleware"] == ["authenticateToken"] + + # Run unit_generator and verify the call-graph edge to authenticateToken. + analyzer_path = tmp_path / "analyzer.json" + analyzer_path.write_text(json.dumps(out)) + dataset_path = tmp_path / "dataset.json" + dataset = _generate_units(analyzer_path, dataset_path) + + handler_unit = next(u for u in dataset["units"] if u["id"] == fid) + assert handler_unit["unit_type"] == "route_handler" + assert handler_unit["is_entry_point"] is True + assert handler_unit["http_method"] == "POST" + assert handler_unit["http_path"] == "/orders" + assert handler_unit["route"]["method"] == "POST" + assert handler_unit["route"]["path"] == "/orders" + assert handler_unit["route"]["middleware"] == ["authenticateToken"] + + # Call-graph edge: handler -> authenticateToken + upstream_ids = handler_unit["metadata"]["direct_calls"] + auth_id = "server.js:authenticateToken" + assert auth_id in upstream_ids, ( + f"expected handler to call authenticateToken; direct_calls={upstream_ids}" + ) + + +def test_handler_no_middleware(tmp_path): + """app.get(path, (req, res) => res.json([])) — no extra edges.""" + file_path = _write_fixture( + tmp_path, + "no_mw", + """ +const express = require('express'); +const app = express(); +app.get('/users', (req, res) => res.json([])); +module.exports = app; +""", + ) + repo = file_path.parent + out = _analyze(repo, file_path) + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert len(express_funcs) == 1 + fid, fdata = next(iter(express_funcs.items())) + meta = fdata["routeMetadata"] + assert meta["http_method"] == "GET" + assert meta["http_path"] == "/users" + assert meta["named_middleware"] == [] + assert fdata["isEntryPoint"] is True + + +def test_use_with_multiple_anonymous_callbacks(tmp_path): + """router.use(path, anonMw1, anonMw2, anonHandler) — + one route_handler + two route_middleware units.""" + file_path = _write_fixture( + tmp_path, + "use_multi", + """ +const express = require('express'); +const router = express.Router(); + +router.use('/api', + (req, res, next) => { req.start = Date.now(); next(); }, + (req, res, next) => { console.log(req.path); next(); }, + async (req, res, next) => { + if (!req.headers.authorization) return res.status(401).end(); + next(); + } +); + +module.exports = router; +""", + ) + repo = file_path.parent + out = _analyze(repo, file_path) + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert len(express_funcs) == 3, f"expected 3 callbacks, got {list(express_funcs)}" + + by_type = {} + for fdata in express_funcs.values(): + by_type.setdefault(fdata["unitType"], []).append(fdata) + + assert len(by_type.get("route_handler", [])) == 1 + assert len(by_type.get("route_middleware", [])) == 2 + + handler = by_type["route_handler"][0] + assert handler["isEntryPoint"] is True + assert handler["routeMetadata"]["http_method"] == "USE" + assert handler["routeMetadata"]["http_path"] == "/api" + + for mw in by_type["route_middleware"]: + assert mw["isEntryPoint"] is False or mw.get("isEntryPoint") is None + assert mw["routeMetadata"]["http_method"] == "USE" + assert mw["routeMetadata"]["http_path"] == "/api" + assert mw["routeMetadata"]["callback_index"] < 2 + + +def test_non_express_call_is_skipped(tmp_path): + """myCache.get('foo', () => {}) must not be claimed as a route.""" + file_path = _write_fixture( + tmp_path, + "non_express", + """ +const myCache = makeCache(); +myCache.get('foo', () => { return 1; }); +const queryBuilder = makeBuilder(); +queryBuilder.post('users', () => {}); +""", + ) + repo = file_path.parent + out = _analyze(repo, file_path) + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert express_funcs == {}, ( + f"non-Express receivers must not be extracted; got {list(express_funcs)}" + ) + + +def test_named_handler_no_anonymous_unit(tmp_path): + """router.get('/x', namedHandler) — no anon unit synthesised.""" + file_path = _write_fixture( + tmp_path, + "named", + """ +const express = require('express'); +const router = express.Router(); + +function namedHandler(req, res) { res.send('ok'); } + +router.get('/x', namedHandler); + +module.exports = router; +""", + ) + repo = file_path.parent + out = _analyze(repo, file_path) + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert express_funcs == {}, ( + f"named-only callbacks must not synthesise anon units; got {list(express_funcs)}" + ) + # namedHandler should still be picked up by the regular extractor. + assert any( + f.get("name") == "namedHandler" for f in out["functions"].values() + ) From f1f37417245f97cc85e870504273a339e5e0eff4 Mon Sep 17 00:00:00 2001 From: joshbouncesecurity Date: Mon, 4 May 2026 22:18:06 +0300 Subject: [PATCH 2/3] fix: include synthetic Express handlers in callGraph keys The Express anonymous route handler extraction added synthetic entries to `data["functions"]` but not to `data["callGraph"]`, breaking the existing invariant `len(callGraph) == len(functions)` exercised by `test_js_parser.TestTypeScriptAnalyzer::test_builds_call_graph`. Emit a callGraph entry for each synthesised route_handler / route_middleware unit, capturing inline call expressions from the callback body. Named middleware identifiers continue to flow through `explicitCalls` and are merged downstream by `dependency_resolver.js`. Add a regression test asserting the callGraph/functions invariant for the synthetic units. --- .../parsers/javascript/typescript_analyzer.js | 11 +++++ .../javascript/test_express_route_handlers.py | 43 +++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/libs/openant-core/parsers/javascript/typescript_analyzer.js b/libs/openant-core/parsers/javascript/typescript_analyzer.js index 97abec1..c565590 100644 --- a/libs/openant-core/parsers/javascript/typescript_analyzer.js +++ b/libs/openant-core/parsers/javascript/typescript_analyzer.js @@ -452,6 +452,17 @@ class TypeScriptAnalyzer { }, explicitCalls: explicitCalls, }; + + // Emit a callGraph entry for the synthesised callback so the + // invariant `callGraph keys ≡ functions keys` holds. The named + // middleware identifiers are recorded as upstream dependencies via + // explicitCalls (merged downstream by dependency_resolver.js); here + // we capture any inline call expressions from the callback body so + // call-graph based analyses can see them too. + this.callGraph[functionId] = this.extractCallsFromFunction( + arg, + relativePath, + ); } } } diff --git a/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py b/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py index cb1e182..43cab83 100644 --- a/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py +++ b/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py @@ -206,6 +206,49 @@ def test_non_express_call_is_skipped(tmp_path): ) +def test_synthetic_handlers_have_call_graph_entries(tmp_path): + """Synthetic Express handlers must also appear as callGraph keys. + + Regression for the invariant `len(callGraph) == len(functions)` that + other tests (e.g. test_js_parser.test_builds_call_graph) rely on. + """ + file_path = _write_fixture( + tmp_path, + "callgraph_invariant", + """ +const express = require('express'); +const router = express.Router(); + +function authenticateToken(req, res, next) { next(); } + +router.post('/orders', authenticateToken, async (req, res) => { + const { productId, quantity } = req.body; + res.json({ productId, quantity }); +}); + +module.exports = router; +""", + ) + repo = file_path.parent + out = _analyze(repo, file_path) + + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert len(express_funcs) == 1 + + # Every synthetic Express function must have a callGraph entry. + for fid in express_funcs: + assert fid in out["callGraph"], ( + f"synthetic function {fid} missing from callGraph; " + f"callGraph keys={list(out['callGraph'])}" + ) + + # Global invariant: callGraph keys ≡ functions keys. + assert len(out["callGraph"]) == len(out["functions"]), ( + f"callGraph/functions size mismatch: " + f"{len(out['callGraph'])} vs {len(out['functions'])}" + ) + + def test_named_handler_no_anonymous_unit(tmp_path): """router.get('/x', namedHandler) — no anon unit synthesised.""" file_path = _write_fixture( From b3c1fe03b5a5a7f0d8643fe36aa90483df55d93d Mon Sep 17 00:00:00 2001 From: joshbouncesecurity Date: Mon, 4 May 2026 23:03:30 +0300 Subject: [PATCH 3/3] test: cover TS-typed, dynamic-path, no-path, and mixed Express callbacks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds four regression tests for `_extractExpressRouteCallbacks`: - TypeScript-annotated callbacks `(req: Request, res: Response) => {...}` parse correctly and produce the expected synthetic handler unit. - `app.get('/' + prefix, handler)` (dynamic path) is skipped without throwing — confirms the StringLiteral check is a hard gate. - `app.use((req, res, next) => {...})` with no path produces a single unit with http_path=null and http_method='USE'. - `app.get(path, anonMw, namedHandler)` (anon middleware before named handler): the anon callback gets a route_middleware unit with named_middleware=['namedHandler'], and the named handler is left to the regular extractor. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../javascript/test_express_route_handlers.py | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) diff --git a/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py b/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py index 43cab83..804e207 100644 --- a/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py +++ b/libs/openant-core/tests/parsers/javascript/test_express_route_handlers.py @@ -249,6 +249,129 @@ def test_synthetic_handlers_have_call_graph_entries(tmp_path): ) +def test_typescript_typed_callback(tmp_path): + """TS callback with type annotations: + `(req: Request, res: Response, next: NextFunction) => {...}`. + + Type annotations on the parameters and return type must not prevent + the AST walk from recognising the callback as an ArrowFunction. + """ + repo = tmp_path / "ts_typed" + repo.mkdir(parents=True, exist_ok=True) + file_path = repo / "server.ts" + file_path.write_text( + """ +import express, { Request, Response, NextFunction } from 'express'; +const app = express(); + +function authenticateToken(req: Request, res: Response, next: NextFunction): void { next(); } + +app.post('/orders', authenticateToken, async (req: Request, res: Response): Promise => { + const { productId, quantity } = req.body; + res.json({ productId, quantity }); +}); + +export default app; +""" + ) + out = _analyze(repo, file_path) + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert len(express_funcs) == 1, ( + f"expected 1 anon TS handler, got {express_funcs}" + ) + fid, fdata = next(iter(express_funcs.items())) + assert fdata["unitType"] == "route_handler" + assert fdata["isEntryPoint"] is True + meta = fdata["routeMetadata"] + assert meta["http_method"] == "POST" + assert meta["http_path"] == "/orders" + assert meta["named_middleware"] == ["authenticateToken"] + + +def test_dynamic_path_does_not_crash(tmp_path): + """`app.get('/' + prefix, handler)` — first arg isn't a string literal. + + The extractor should skip such calls without throwing. We can't + reliably extract a path from a runtime-built expression. + """ + file_path = _write_fixture( + tmp_path, + "dynamic_path", + """ +const express = require('express'); +const app = express(); +const prefix = 'foo'; +app.get('/' + prefix, (req, res) => res.send('ok')); +module.exports = app; +""", + ) + repo = file_path.parent + out = _analyze(repo, file_path) + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert express_funcs == {}, ( + f"dynamic path should be skipped, got {list(express_funcs)}" + ) + + +def test_use_no_path_anonymous_middleware(tmp_path): + """`app.use((req, res, next) => {...})` — middleware with no path. + + The synthetic unit should be emitted with http_path=null and + http_method='USE'. + """ + file_path = _write_fixture( + tmp_path, + "use_no_path", + """ +const express = require('express'); +const app = express(); +app.use((req, res, next) => { req.start = Date.now(); next(); }); +module.exports = app; +""", + ) + repo = file_path.parent + out = _analyze(repo, file_path) + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert len(express_funcs) == 1, ( + f"expected 1 anon middleware unit, got {list(express_funcs)}" + ) + fid, fdata = next(iter(express_funcs.items())) + meta = fdata["routeMetadata"] + assert meta["http_method"] == "USE" + assert meta["http_path"] is None + + +def test_anon_middleware_named_handler_mixed(tmp_path): + """`app.get(path, anonMw, namedHandler)` — anon middleware before + named handler. Anon gets a route_middleware unit; the named handler + is left to the regular extractor (no synthetic unit for it).""" + file_path = _write_fixture( + tmp_path, + "mixed", + """ +const express = require('express'); +const app = express(); +function namedHandler(req, res) { res.send('ok'); } +app.get('/x', (req, res, next) => { console.log('mw'); next(); }, namedHandler); +module.exports = app; +""", + ) + repo = file_path.parent + out = _analyze(repo, file_path) + express_funcs = {k: v for k, v in out["functions"].items() if "express(" in k} + assert len(express_funcs) == 1, ( + f"expected 1 anon middleware unit, got {list(express_funcs)}" + ) + fid, fdata = next(iter(express_funcs.items())) + assert fdata["unitType"] == "route_middleware" + # named_middleware should include the namedHandler sibling + assert fdata["routeMetadata"]["named_middleware"] == ["namedHandler"] + # namedHandler must still be extracted normally + assert any( + f.get("name") == "namedHandler" for f in out["functions"].values() + ) + + def test_named_handler_no_anonymous_unit(tmp_path): """router.get('/x', namedHandler) — no anon unit synthesised.""" file_path = _write_fixture(