diff --git a/web/packages/lex/index.js b/web/packages/lex/index.js index 72be63f0bac7..2c92a6faae7d 100644 --- a/web/packages/lex/index.js +++ b/web/packages/lex/index.js @@ -7,152 +7,186 @@ */ /** - * @typedef {(this: Lexer, chr: string) => any} DefunctFunction + * A token produced by a {@link LexerAction}. The lexer is agnostic to the + * concrete token shape; consumers pick whatever representation suits them. + * + * @typedef {unknown} Token */ /** - * @typedef {(this: Lexer, ...args: RegExpExecArray) => string | string[] | undefined} RuleAction + * A rule action. Invoked with the regex match (full match followed by capture + * groups) bound to the owning {@link Lexer} so it can read or set `state`, + * `index`, and `reject`. + * + * Return values: + * - `null` (or `undefined` from an implicit return) — discard the match and continue scanning. + * - a single token — yield it from {@link Lexer.lex}. + * - an array of tokens — yield the first; queue the rest for subsequent calls. + * + * @callback LexerAction + * @this {Lexer} + * @param {...string} match + * @returns {Token | Token[] | null | void} */ /** - * @typedef {Object} Rule - * @property {RegExp} pattern - * @property {boolean} global - * @property {RuleAction} action - * @property {number[]} start + * @typedef {object} LexerRule + * @property {RegExp} pattern Sticky-compiled pattern used to probe the input. + * @property {boolean} global Whether the user-supplied pattern was global. + * @property {LexerAction} action + * @property {number[]} start States in which the rule is active. `[0]` is the default state; an empty array means "any state". */ /** - * @typedef {Object} Match + * @typedef {object} LexerMatch * @property {RegExpExecArray} result - * @property {RuleAction} action + * @property {LexerAction} action * @property {number} length + * @property {boolean} global Whether the producing rule was declared with the `g` flag. + */ + +/** + * Handler invoked when no rule matches at the current position. + * + * @callback DefunctHandler + * @this {Lexer} + * @param {string} chr The unexpected character. + * @returns {Token | Token[] | null | void} */ +/** + * @type {DefunctHandler} + */ +function defaultDefunct(chr) { + throw new Error(`Unexpected character at index ${this.index - 1}: ${chr}`); +} + /** * Lexer class for tokenizing input strings. */ export class Lexer { /** - * @type {string[]} - */ - tokens = []; - /** - * @type {Rule[]} - */ - rules = []; - /** - * @type {number} - */ - remove = 0; - /** + * Current lexer state. Rules whose `start` array contains this value (or + * is empty) are eligible to match. Odd-numbered states are also matched + * by rules declared with `start: [0]`, mirroring flex's inclusive states. + * * @type {number} */ state = 0; - /** - * @type {number} - */ + + /** @type {number} */ index = 0; + + /** @type {string} */ + input = ""; + /** - * @type {string} + * When set to `true` from inside an action, the current match is rolled + * back and the next-best match is tried instead. + * + * @type {boolean} */ - input = ""; + reject = false; + + /** @type {LexerRule[]} */ + #rules = []; + + /** @type {Token[]} */ + #tokens = []; + + /** @type {number} */ + #remove = 0; + + /** @type {DefunctHandler} */ + #defunct; /** - * @param {DefunctFunction} [defunct] + * @param {DefunctHandler} [defunct] Optional handler for unexpected characters. */ constructor(defunct) { - defunct ||= function (chr) { - throw new Error("Unexpected character at index " + (this.index - 1) + ": " + chr); - }; - - this.defunct = defunct; + this.#defunct = typeof defunct === "function" ? defunct : defaultDefunct; } /** - * Add a lexing rule. + * Register a tokenization rule. * * @param {RegExp} pattern - * @param {RuleAction} action - * @param {number[]} [start] - * @returns {Lexer} + * @param {LexerAction} action + * @param {number[]} [start] States in which the rule is active. Defaults to `[0]`. + * @returns {this} */ - addRule = (pattern, action, start) => { + addRule(pattern, action, start) { const global = pattern.global; if (!global || !pattern.sticky) { let flags = "gy"; - if (pattern.multiline) flags += "m"; if (pattern.ignoreCase) flags += "i"; if (pattern.unicode) flags += "u"; pattern = new RegExp(pattern.source, flags); } - if (!Array.isArray(start)) start = [0]; - - this.rules.push({ - pattern: pattern, - global: global, - action: action, - start: start, + this.#rules.push({ + pattern, + global, + action, + start: Array.isArray(start) ? start : [0], }); return this; - }; + } /** - * Set the input string for lexing. + * Reset the lexer and load a new input string. * * @param {string} input - * @returns {Lexer} + * @returns {this} */ - setInput = (input) => { - this.remove = 0; + setInput(input) { + this.#remove = 0; this.state = 0; this.index = 0; - this.tokens.length = 0; + this.#tokens.length = 0; this.input = input; return this; - }; + } /** - * Lex the next token from the input. + * Produce the next token from the input, or `null` once exhausted. * - * @returns {string | string[] | undefined} + * @returns {Token | null} */ - lex = () => { - if (this.tokens.length) return this.tokens.shift(); + lex() { + if (this.#tokens.length) return /** @type {Token} */ (this.#tokens.shift()); this.reject = true; while (this.index <= this.input.length) { - const matches = this.scan().splice(this.remove); + const matches = this.#scan().splice(this.#remove); const index = this.index; while (matches.length) { - if (!this.reject) { - break; - } - const match = matches.shift(); - - if (!match) break; + if (!this.reject) break; - const result = match.result; - const length = match.length; + const match = /** @type {LexerMatch} */ (matches.shift()); + const { result, length } = match; this.index += length; this.reject = false; - this.remove++; + this.#remove++; - let token = match.action.apply(this, result); + let token = match.action.apply( + this, + /** @type {string[]} */ (/** @type {unknown} */ (result)), + ); if (this.reject) { this.index = result.index; - } else if (Array.isArray(token)) { - this.tokens = token.slice(1); - token = token[0]; - } else { - if (length) this.remove = 0; + } else if (token !== null && token !== undefined) { + if (Array.isArray(token)) { + this.#tokens = token.slice(1); + token = token[0]; + } + if (length) this.#remove = 0; return token; } } @@ -161,79 +195,82 @@ export class Lexer { if (index < input.length) { if (this.reject) { - this.remove = 0; - const token = this.defunct(input.charAt(this.index++)); - if (typeof token !== "undefined") { + this.#remove = 0; + const token = this.#defunct(input.charAt(this.index++)); + if (token !== null && token !== undefined) { if (Array.isArray(token)) { - this.tokens = token.slice(1); + this.#tokens = token.slice(1); return token[0]; } - return token; } } else { - if (this.index !== index) this.remove = 0; + if (this.index !== index) this.#remove = 0; this.reject = true; } - } else if (matches.length) this.reject = true; - else break; + } else if (matches.length) { + this.reject = true; + } else { + break; + } } - }; + + return null; + } /** - * Scan the input for matches. + * Probe every state-eligible rule at the current position, returning the + * matches sorted by length (longest first), with global rules pinned + * after non-global ones to preserve flex's "longest non-global wins" + * tie-breaking. * - * @returns {Match[]} + * @returns {LexerMatch[]} */ - scan = () => { - /** - * @type {Match[]} - */ + #scan() { + /** @type {LexerMatch[]} */ const matches = []; - let index = 0; const state = this.state; const lastIndex = this.index; const input = this.input; - for (let i = 0, length = this.rules.length; i < length; i++) { - const rule = this.rules[i]; + for (const rule of this.#rules) { const start = rule.start; const states = start.length; + const eligible = + !states || start.indexOf(state) >= 0 || (state % 2 && states === 1 && !start[0]); - if (!states || start.indexOf(state) >= 0 || (state % 2 && states === 1 && !start[0])) { - const pattern = rule.pattern; - pattern.lastIndex = lastIndex; - const result = pattern.exec(input); + if (!eligible) continue; - if (!result || result.index !== lastIndex) { - continue; - } + const pattern = rule.pattern; + pattern.lastIndex = lastIndex; + const result = pattern.exec(input); - let j = matches.push({ - result: result, - action: rule.action, - length: result[0].length, - }); + if (!result || result.index !== lastIndex) continue; - if (rule.global) { - index = j; - } + let j = matches.push({ + result, + action: rule.action, + length: result[0].length, + global: rule.global, + }); - while (--j > index) { - const k = j - 1; + while (--j > 0) { + const k = j - 1; + const cur = matches[j]; + const prev = matches[k]; + const longer = cur.length > prev.length; + const tieFavorsCur = cur.length === prev.length && prev.global && !cur.global; - if (matches[j].length > matches[k].length) { - const temple = matches[j]; - matches[j] = matches[k]; - matches[k] = temple; - } - } + if (!longer && !tieFavorsCur) break; + + matches[j] = prev; + matches[k] = cur; } } return matches; - }; + } } export default Lexer; diff --git a/web/src/admin/brands/BrandForm.ts b/web/src/admin/brands/BrandForm.ts index 0fb21346812e..1b949d8f809a 100644 --- a/web/src/admin/brands/BrandForm.ts +++ b/web/src/admin/brands/BrandForm.ts @@ -22,6 +22,7 @@ import { certificateProvider, certificateSelector } from "#admin/brands/Certific import { AdminFileListUsageEnum, Application, + AuthenticationEnum, Brand, CoreApi, CoreApplicationsListRequest, diff --git a/web/src/admin/flows/FlowForm.ts b/web/src/admin/flows/FlowForm.ts index dd4091c7404e..709dcdfcca6c 100644 --- a/web/src/admin/flows/FlowForm.ts +++ b/web/src/admin/flows/FlowForm.ts @@ -14,14 +14,17 @@ import { DesignationToLabel, LayoutToLabel } from "#admin/flows/utils"; import { policyEngineModes } from "#admin/policies/PolicyEngineModes"; import { +<<<<<<< HEAD AdminFileListUsageEnum, +======= + AuthenticationEnum, +>>>>>>> 1db6c3af8 (web: Fix Vendored Lex package. Add Unit Tests (#22083)) DeniedActionEnum, Flow, FlowDesignationEnum, FlowLayoutEnum, FlowsApi, } from "@goauthentik/api"; -import { AuthenticationEnum } from "@goauthentik/api/dist/models/AuthenticationEnum.js"; import { msg } from "@lit/localize"; import { html, TemplateResult } from "lit"; diff --git a/web/src/admin/stages/BaseStageForm.ts b/web/src/admin/stages/BaseStageForm.ts index 8d4ed091d32d..21c665bb4656 100644 --- a/web/src/admin/stages/BaseStageForm.ts +++ b/web/src/admin/stages/BaseStageForm.ts @@ -1,5 +1,10 @@ import { ModelForm } from "#elements/forms/ModelForm"; +<<<<<<< HEAD +======= +import type { Stage } from "@goauthentik/api"; + +>>>>>>> 1db6c3af8 (web: Fix Vendored Lex package. Add Unit Tests (#22083)) import { msg } from "@lit/localize"; export abstract class BaseStageForm extends ModelForm { diff --git a/web/src/admin/stages/user_write/UserWriteStageForm.ts b/web/src/admin/stages/user_write/UserWriteStageForm.ts index b327f8a34e15..35497fb752a1 100644 --- a/web/src/admin/stages/user_write/UserWriteStageForm.ts +++ b/web/src/admin/stages/user_write/UserWriteStageForm.ts @@ -13,10 +13,10 @@ import { CoreGroupsListRequest, Group, StagesApi, + UserCreationModeEnum, UserTypeEnum, UserWriteStage, } from "@goauthentik/api"; -import { UserCreationModeEnum } from "@goauthentik/api/dist/models/UserCreationModeEnum.js"; import { msg } from "@lit/localize"; import { html, TemplateResult } from "lit"; diff --git a/web/unit/authenticator-validate-challenge-selection.test.ts b/web/test/unit/authenticator-validate-challenge-selection.test.ts similarity index 100% rename from web/unit/authenticator-validate-challenge-selection.test.ts rename to web/test/unit/authenticator-validate-challenge-selection.test.ts diff --git a/web/test/unit/lexer.test.ts b/web/test/unit/lexer.test.ts new file mode 100644 index 000000000000..e1f8d0a6fe0d --- /dev/null +++ b/web/test/unit/lexer.test.ts @@ -0,0 +1,317 @@ +/* eslint-disable func-names */ +import { Lexer } from "lex"; +import { describe, expect, it, vi } from "vitest"; + +const drain = (lexer: Lexer): unknown[] => { + const out: unknown[] = []; + let token: unknown; + + while ((token = lexer.lex()) !== null) { + out.push(token); + } + return out; +}; + +describe("Lexer", () => { + describe("addRule", () => { + it("returns the lexer for chaining", () => { + const lexer = new Lexer(); + expect(lexer.addRule(/a/, () => "a")).toBe(lexer); + }); + + it("preserves multiline, ignoreCase, and unicode flags when re-compiling", () => { + const lexer = new Lexer(() => null); + const seen: string[] = []; + + lexer.addRule(/^a/im, (m) => { + seen.push(m); + }); + lexer.setInput("A\nA"); + + drain(lexer); + expect(seen).toEqual(["A", "A"]); + }); + + it("matches unicode patterns", () => { + const lexer = new Lexer(); + lexer.addRule(/\p{Letter}+/u, (m) => m); + lexer.setInput("café"); + + expect(lexer.lex()).toBe("café"); + }); + }); + + describe("setInput", () => { + it("resets state, index, and pending tokens", () => { + const lexer = new Lexer(); + lexer.addRule(/./, (c) => c); + + lexer.setInput("ab"); + expect(lexer.lex()).toBe("a"); + lexer.state = 7; + + lexer.setInput("xy"); + expect(lexer.state).toBe(0); + expect(lexer.index).toBe(0); + expect(lexer.lex()).toBe("x"); + expect(lexer.lex()).toBe("y"); + }); + + it("returns the lexer for chaining", () => { + const lexer = new Lexer(); + expect(lexer.setInput("")).toBe(lexer); + }); + }); + + describe("tokenization", () => { + it("tokenizes a simple expression", () => { + const lexer = new Lexer(); + lexer + .addRule(/\s+/, () => null) + .addRule(/[a-zA-Z]+/, (m) => ({ type: "ident", value: m })) + .addRule(/\d+/, (m) => ({ type: "num", value: Number(m) })) + .addRule(/[+\-*/]/, (m) => ({ type: "op", value: m })); + + lexer.setInput("foo + 12 * bar"); + expect(drain(lexer)).toEqual([ + { type: "ident", value: "foo" }, + { type: "op", value: "+" }, + { type: "num", value: 12 }, + { type: "op", value: "*" }, + { type: "ident", value: "bar" }, + ]); + }); + + it("skips matches whose action returns null", () => { + const lexer = new Lexer(); + lexer.addRule(/\s+/, () => null).addRule(/\S+/, (m) => m); + + lexer.setInput(" foo bar "); + expect(drain(lexer)).toEqual(["foo", "bar"]); + }); + + it("returns null once the input is exhausted", () => { + const lexer = new Lexer(); + lexer.addRule(/./, (c) => c); + lexer.setInput("a"); + + expect(lexer.lex()).toBe("a"); + expect(lexer.lex()).toBeNull(); + expect(lexer.lex()).toBeNull(); + }); + + it("passes capture groups to the action", () => { + const lexer = new Lexer(); + const calls: string[][] = []; + + lexer.addRule(/(\w+)=(\w+)/, (...args) => { + calls.push(args); + return args[0]; + }); + + lexer.setInput("foo=bar"); + lexer.lex(); + expect(calls).toEqual([["foo=bar", "foo", "bar"]]); + }); + + it("binds `this` to the lexer inside the action", () => { + const lexer = new Lexer(); + let captured: Lexer | undefined; + + lexer.addRule(/a/, function () { + // eslint-disable-next-line consistent-this, @typescript-eslint/no-this-alias + captured = this; + return "a"; + }); + + lexer.setInput("a"); + lexer.lex(); + expect(captured).toBe(lexer); + }); + }); + + describe("longest-match tie-breaking", () => { + it("prefers the longer non-global match", () => { + const lexer = new Lexer(); + lexer.addRule(/if/, () => "KW_IF").addRule(/iffy/, () => "IDENT_IFFY"); + + lexer.setInput("iffy"); + expect(lexer.lex()).toBe("IDENT_IFFY"); + }); + + it("treats global rules as fallbacks behind non-global rules of the same length", () => { + const lexer = new Lexer(); + lexer.addRule(/[a-z]+/g, (m) => `g:${m}`).addRule(/foo/, (m) => `s:${m}`); + + lexer.setInput("foo"); + expect(lexer.lex()).toBe("s:foo"); + }); + }); + + describe("multi-token return", () => { + it("yields the first token immediately and queues the rest", () => { + const lexer = new Lexer(); + lexer.addRule(/a/, () => ["A1", "A2", "A3"]); + + lexer.setInput("a"); + expect(lexer.lex()).toBe("A1"); + expect(lexer.lex()).toBe("A2"); + expect(lexer.lex()).toBe("A3"); + expect(lexer.lex()).toBeNull(); + }); + + it("drains the queue before scanning further input", () => { + const lexer = new Lexer(); + lexer.addRule(/a/, () => ["A1", "A2"]).addRule(/b/, () => "B"); + + lexer.setInput("ab"); + expect(drain(lexer)).toEqual(["A1", "A2", "B"]); + }); + }); + + describe("reject", () => { + it("falls through to the next-best match when an action sets reject", () => { + const lexer = new Lexer(); + const order: string[] = []; + + lexer + .addRule(/foo/, function () { + order.push("first"); + this.reject = true; + }) + .addRule(/foo/, () => { + order.push("second"); + return "FOO"; + }); + + lexer.setInput("foo"); + expect(lexer.lex()).toBe("FOO"); + expect(order).toEqual(["first", "second"]); + }); + + it("rolls back the lexer index when an action rejects", () => { + const lexer = new Lexer(); + + lexer + .addRule(/abc/, function () { + this.reject = true; + }) + .addRule(/a/, (m) => m); + + lexer.setInput("abc"); + expect(lexer.lex()).toBe("a"); + expect(lexer.index).toBe(1); + }); + }); + + describe("defunct handling", () => { + it("throws by default on unexpected characters", () => { + const lexer = new Lexer(); + lexer.addRule(/a/, (m) => m); + + lexer.setInput("a@"); + expect(lexer.lex()).toBe("a"); + expect(() => lexer.lex()).toThrow(/Unexpected character at index 1: @/); + }); + + it("invokes a custom defunct handler with the offending character", () => { + const defunct = vi.fn((chr: string) => `?${chr}`); + const lexer = new Lexer(defunct); + lexer.addRule(/a/, (m) => m); + + lexer.setInput("a@b"); + expect(drain(lexer)).toEqual(["a", "?@", "?b"]); + expect(defunct).toHaveBeenCalledTimes(2); + expect(defunct.mock.calls[0]?.[0]).toBe("@"); + }); + + it("ignores defunct return values that are null", () => { + const lexer = new Lexer((_chr) => null); + lexer.addRule(/a/, (m) => m); + + lexer.setInput("@@a"); + expect(lexer.lex()).toBe("a"); + expect(lexer.lex()).toBeNull(); + }); + + it("supports array returns from the defunct handler", () => { + const lexer = new Lexer((chr) => [`bad:${chr}`, "extra"]); + lexer.addRule(/a/, (m) => m); + + lexer.setInput("@"); + expect(lexer.lex()).toBe("bad:@"); + expect(lexer.lex()).toBe("extra"); + }); + + it("falls back to the default handler when given a non-function", () => { + // @ts-expect-error — exercising the runtime guard + const lexer = new Lexer("not a function"); + lexer.setInput("@"); + expect(() => lexer.lex()).toThrow(/Unexpected character/); + }); + }); + + describe("states", () => { + it("only fires rules whose start array includes the current state", () => { + const lexer = new Lexer(); + + lexer + .addRule(/"/, function () { + this.state = 2; + }) + .addRule( + /"/, + function () { + this.state = 0; + }, + [2], + ) + .addRule(/[^"]+/, (m) => `STR:${m}`, [2]) + .addRule(/[a-z]+/, (m) => `ID:${m}`); + + lexer.setInput('foo"hello"bar'); + expect(drain(lexer)).toEqual(["ID:foo", "STR:hello", "ID:bar"]); + }); + + it("treats an empty start array as 'active in any state'", () => { + const lexer = new Lexer(); + + lexer + .addRule(/!/, function () { + this.state = 5; + return "BANG"; + }) + .addRule(/./, (m) => m, []); + + lexer.setInput("a!b"); + expect(drain(lexer)).toEqual(["a", "BANG", "b"]); + }); + + it("matches inclusive `[0]` rules from odd-numbered states", () => { + const lexer = new Lexer(); + + lexer + .addRule(/#/, function () { + this.state = 1; + }) + .addRule(/[a-z]+/, (m) => m); + + lexer.setInput("ab#cd"); + expect(drain(lexer)).toEqual(["ab", "cd"]); + }); + + it("does not match `[0]` rules from even non-zero states", () => { + const lexer = new Lexer(); + + lexer + .addRule(/#/, function () { + this.state = 2; + }) + .addRule(/[a-z]+/, (m) => m); + + lexer.setInput("ab#cd"); + expect(lexer.lex()).toBe("ab"); + expect(() => lexer.lex()).toThrow(/Unexpected character/); + }); + }); +}); diff --git a/web/test/unit/tsconfig.json b/web/test/unit/tsconfig.json new file mode 100644 index 000000000000..59b690efbf20 --- /dev/null +++ b/web/test/unit/tsconfig.json @@ -0,0 +1,35 @@ +// @file TSConfig used by the web package during development. +{ + "extends": "@goauthentik/tsconfig", + "compilerOptions": { + "types": ["node"], + "checkJs": true, + "allowJs": true, + "composite": true, + "resolveJsonModule": true, + "allowSyntheticDefaultImports": true, + "emitDeclarationOnly": true, + "target": "esnext", + "module": "preserve", + "moduleResolution": "bundler", + "lib": ["DOM", "DOM.Iterable", "ESNext"], + + "noUncheckedIndexedAccess": true + }, + "include": ["./**/*", "../**/*"], + "exclude": [ + // --- + "**/out/**/*", + "**/dist/**/*", + "storybook-static", + // TODO: @lit/localize-tools v0.8.0 has a nullish coalescing typing error. + // Remove when we upgrade past that. + "scripts/pseudolocalize.mjs", + "scripts/build-locales.mjs" + ], + "references": [ + { + "path": "../.." + } + ] +} diff --git a/web/vite.config.js b/web/vite.config.js index 52a3c55694a7..5c8bcdeb9680 100644 --- a/web/vite.config.js +++ b/web/vite.config.js @@ -41,9 +41,12 @@ export default defineConfig({ projects: [ { test: { - include: ["./unit/**/*.{test,spec}.ts", "**/*.unit.{test,spec}.ts"], - name: "unit", + include: ["./test/unit/**/*.{test,spec}.ts", "**/*.unit.{test,spec}.ts"], + name: "Unit Tests", environment: "node", + typecheck: { + tsconfig: "./tsconfig.unit.json", + }, }, }, { @@ -51,7 +54,7 @@ export default defineConfig({ setupFiles: ["./test/lit/setup.js"], include: ["./browser/**/*.{test,spec}.ts", "**/*.browser.{test,spec}.ts"], - name: "browser", + name: "Browser Tests", browser: { enabled: true, provider: playwright(),