knostic · ar7casper · May 13, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/libs/openant-core/parsers/javascript/dependency_resolver.js b/libs/openant-core/parsers/javascript/dependency_resolver.js
@@ -60,6 +60,21 @@ class DependencyResolver {
   buildCallGraph() {
     for (const [funcId, funcData] of Object.entries(this.functions)) {
       const calls = this._extractCalls(funcData.code, funcId);
+
+      // Merge in any explicit call edges declared by the analyzer.
+      // This is used for cases the body-text regex can't see — e.g.
+      // Express middleware identifiers passed as sibling args:
+      //   app.post('/x', authenticateToken, async (req,res) => {...})
+      const explicitCalls = funcData.explicitCalls || [];
+      const callerFile = funcId.split(':')[0];
+      for (const name of explicitCalls) {
+        if (!name) continue;
+        const resolved = this._resolveCall(name, callerFile, funcId);
+        if (resolved && !calls.includes(resolved)) {
+          calls.push(resolved);
+        }
+      }
+
       this.callGraph[funcId] = calls;
 
       // Build reverse graph

diff --git a/libs/openant-core/parsers/javascript/typescript_analyzer.js b/libs/openant-core/parsers/javascript/typescript_analyzer.js
@@ -240,6 +240,231 @@ class TypeScriptAnalyzer {
     // Extract functions from module.exports.propertyName = function() {...}
     // Pattern used by DVNA and similar CommonJS codebases
     this._extractModuleExportsPropertyFunctions(sourceFile, relativePath);
+
+    // Extract anonymous callbacks used as Express route handlers / middleware
+    // Pattern: app.get('/x', auth, async (req, res) => {...})
+    this._extractExpressRouteCallbacks(sourceFile, relativePath);
+  }
+
+  /**
+   * Express HTTP verbs we recognise on a router/app object.
+   * `use` is included to pick up middleware-mount callbacks.
+   */
+  static EXPRESS_VERBS = new Set([
+    "get",
+    "post",
+    "put",
+    "patch",
+    "delete",
+    "options",
+    "head",
+    "all",
+    "use",
+  ]);
+
+  /**
+   * Walk a source file looking for Express-style route registrations and
+   * emit a synthetic function entry for each anonymous arrow / function
+   * expression used as a callback.
+   *
+   * Recognises patterns of the form:
+   *   <obj>.<verb>(<path>, ...callbacks)
+   *   <obj>.<verb>(...callbacks)         // only for `use`
+   * where `<verb>` is one of the Express HTTP verbs (or `use`) and the
+   * first argument (when present) is a string-literal path.
+   *
+   * For each anonymous callback at index >= 1 we synthesise a function
+   * entry. The last anonymous-or-named callback is treated as the route
+   * handler; earlier callbacks are middleware. Named identifiers in
+   * callback positions are recorded as explicit call edges from the
+   * synthesised callbacks (e.g. `authenticateToken` becomes an upstream
+   * dependency of the handler so call-graph based analyses see the
+   * relationship).
+   */
+  /**
+   * Heuristic: does `receiver` look like an Express app / router?
+   *
+   * We accept identifiers whose name contains `app`, `router`, `routes`, or
+   * `server` (case-insensitive), and chained calls like `app.route(...)` or
+   * `router.route(...)`. We deliberately reject other receivers so generic
+   * `.get(...)` calls on caches / clients / query-builders aren't misread
+   * as routes.
+   */
+  _isPlausibleExpressReceiver(receiver) {
+    if (!receiver) return false;
+    const kind = receiver.getKindName();
+
+    if (kind === "Identifier") {
+      const name = receiver.getText().toLowerCase();
+      return /(^|_)(app|router|routes|server)(\d|$|_)/.test(name)
+        || /app$|router$|routes$|server$/.test(name)
+        || name === "app"
+        || name === "router"
+        || name === "routes"
+        || name === "server";
+    }
+    if (kind === "CallExpression") {
+      // e.g. app.route('/x').get(...) — receiver is the .route() call
+      const inner = receiver.getExpression && receiver.getExpression();
+      if (inner && inner.getKindName && inner.getKindName() === "PropertyAccessExpression") {
+        const innerName = inner.getName && inner.getName();
+        if (innerName === "route" || innerName === "Router") return true;
+      }
+      return false;
+    }
+    if (kind === "PropertyAccessExpression") {
+      // e.g. this.app.get(...) or express.Router().get(...) — accept when
+      // the trailing identifier matches our identifier pattern.
+      const trailing = receiver.getName && receiver.getName();
+      if (!trailing) return false;
+      const lower = trailing.toLowerCase();
+      return ["app", "router", "routes", "server"].some((s) => lower.endsWith(s));
+    }
+    return false;
+  }
+
+  _extractExpressRouteCallbacks(sourceFile, relativePath) {
+    const callExpressions = sourceFile
+      .getDescendantsOfKind(ts.SyntaxKind.CallExpression);
+
+    for (const callExpr of callExpressions) {
+      const expression = callExpr.getExpression();
+      if (!expression || expression.getKindName() !== "PropertyAccessExpression") {
+        continue;
+      }
+
+      const methodName = expression.getName ? expression.getName() : null;
+      if (!methodName || !TypeScriptAnalyzer.EXPRESS_VERBS.has(methodName)) {
+        continue;
+      }
+
+      // Filter to plausibly-Express receivers. Without this we'd match any
+      // `foo.get('x', () => {})` style call (e.g. cache lookups, query
+      // builders) and synthesise bogus route units.
+      const receiver = expression.getExpression
+        ? expression.getExpression()
+        : null;
+      if (!this._isPlausibleExpressReceiver(receiver)) {
+        continue;
+      }
+
+      const args = callExpr.getArguments();
+      if (args.length === 0) continue;
+
+      // Determine whether the first argument is a path string literal.
+      const firstArg = args[0];
+      const firstKind = firstArg.getKindName();
+      let httpPath = null;
+      let callbackStartIndex = 0;
+      if (firstKind === "StringLiteral" || firstKind === "NoSubstitutionTemplateLiteral") {
+        httpPath = firstArg.getLiteralValue
+          ? firstArg.getLiteralValue()
+          : firstArg.getText().slice(1, -1);
+        callbackStartIndex = 1;
+      } else if (methodName === "use") {
+        // `app.use(middleware)` — no path, all args are callbacks.
+        httpPath = null;
+        callbackStartIndex = 0;
+      } else {
+        // Not an Express-shaped call (no string path and not `use`).
+        continue;
+      }
+
+      // Gather the callback arguments (functions + named identifiers).
+      const callbacks = args.slice(callbackStartIndex);
+      if (callbacks.length === 0) continue;
+
+      // We only emit units when at least one callback is an inline
+      // anonymous function. Otherwise the existing extraction logic
+      // already handles named handlers.
+      const hasInline = callbacks.some((a) => {
+        const k = a.getKindName();
+        return k === "ArrowFunction" || k === "FunctionExpression";
+      });
+      if (!hasInline) continue;
+
+      const httpMethod = methodName.toUpperCase();
+      const lastCallbackIndex = callbacks.length - 1;
+
+      // Collect named middleware identifiers (Identifier / PropertyAccess)
+      // that appear as siblings in the args list. They become explicit
+      // call-graph edges from each synthesised callback.
+      const namedMiddleware = [];
+      for (let i = 0; i < callbacks.length; i++) {
+        const arg = callbacks[i];
+        const k = arg.getKindName();
+        if (k === "Identifier") {
+          namedMiddleware.push(arg.getText());
+        } else if (k === "PropertyAccessExpression") {
+          // e.g. middleware.auth — keep the trailing name
+          const name = arg.getName ? arg.getName() : arg.getText();
+          namedMiddleware.push(name);
+        }
+      }
+
+      for (let i = 0; i < callbacks.length; i++) {
+        const arg = callbacks[i];
+        const k = arg.getKindName();
+        if (k !== "ArrowFunction" && k !== "FunctionExpression") continue;
+
+        // Only emit for *anonymous* function expressions. A function
+        // expression with a name like `function named(req,res){}` is
+        // already extracted elsewhere.
+        if (k === "FunctionExpression" && arg.getName && arg.getName()) {
+          continue;
+        }
+
+        const isHandler = i === lastCallbackIndex;
+        const role = isHandler ? "handler" : `middleware:${i}`;
+        const pathLabel = httpPath !== null ? httpPath : "";
+        const baseName = pathLabel
+          ? `${httpMethod} ${pathLabel} [${role}]`
+          : `${httpMethod} [${role}]`;
+        const synthName = baseName;
+
+        const code = arg.getFullText();
+        const startLine = arg.getStartLineNumber();
+        const endLine = arg.getEndLineNumber();
+        // Synthesise an ID that's stable per file/line so two routes on
+        // the same line+path don't collide.
+        const idSuffix = `${httpMethod}:${pathLabel}:${startLine}:${i}`;
+        const functionId = `${relativePath}:express(${idSuffix})`;
+
+        if (this.functions[functionId]) continue;
+
+        const unitType = isHandler ? "route_handler" : "route_middleware";
+        const explicitCalls = namedMiddleware.filter((n) => n && n !== synthName);
+
+        this.functions[functionId] = {
+          name: synthName,
+          code: code,
+          isExported: false,
+          unitType: unitType,
+          startLine: startLine,
+          endLine: endLine,
+          isEntryPoint: isHandler,
+          routeMetadata: {
+            http_method: httpMethod,
+            http_path: httpPath,
+            callback_index: i,
+            total_callbacks: callbacks.length,
+            named_middleware: explicitCalls,
+          },
+          explicitCalls: explicitCalls,
+        };
+
+        // Emit a callGraph entry for the synthesised callback so the
+        // invariant `callGraph keys ≡ functions keys` holds. The named
+        // middleware identifiers are recorded as upstream dependencies via
+        // explicitCalls (merged downstream by dependency_resolver.js); here
+        // we capture any inline call expressions from the callback body so
+        // call-graph based analyses can see them too.
+        this.callGraph[functionId] = this.extractCallsFromFunction(
+          arg,
+          relativePath,
+        );
+      }
+    }
   }
 
   /**

diff --git a/libs/openant-core/parsers/javascript/unit_generator.js b/libs/openant-core/parsers/javascript/unit_generator.js
@@ -239,6 +239,19 @@ class UnitGenerator {
             unitType = 'route_handler';
         }
 
+        // If the analyzer attached Express route metadata directly to the
+        // function (anonymous arrow handler / middleware), surface it on the
+        // unit's `route` field even when no external routes.json was given.
+        if (!routeData && funcData.routeMetadata) {
+            const meta = funcData.routeMetadata;
+            routeData = {
+                method: meta.http_method,
+                path: meta.http_path,
+                handler: funcData.name,
+                middleware: meta.named_middleware || [],
+            };
+        }
+
         // Get upstream dependencies (functions this calls)
         const upstreamIds = this.resolver.getDependencies(functionId);
         const upstreamDependencies = [];
@@ -314,6 +327,10 @@ class UnitGenerator {
                 handler: routeData.handler,
                 middleware: routeData.middleware || []
             } : null,
+            is_entry_point: funcData.isEntryPoint === true ? true : undefined,
+            http_method: funcData.routeMetadata ? funcData.routeMetadata.http_method : undefined,
+            http_path: funcData.routeMetadata ? funcData.routeMetadata.http_path : undefined,
+            callback_index: funcData.routeMetadata ? funcData.routeMetadata.callback_index : undefined,
             ground_truth: {
                 status: 'UNKNOWN',
                 vulnerability_types: [],

diff --git a/libs/openant-core/tests/parsers/javascript/__init__.py b/libs/openant-core/tests/parsers/javascript/__init__.py