diff --git a/src/generators/addon-verify/constants.mjs b/src/generators/addon-verify/constants.mjs deleted file mode 100644 index 50d8d178..00000000 --- a/src/generators/addon-verify/constants.mjs +++ /dev/null @@ -1 +0,0 @@ -export const EXTRACT_CODE_FILENAME_COMMENT = /^\/\/\s+(.*\.(?:cc|h|js))[\r\n]/; diff --git a/src/generators/addon-verify/index.mjs b/src/generators/addon-verify/index.mjs index 1b5c02eb..44d1ec37 100644 --- a/src/generators/addon-verify/index.mjs +++ b/src/generators/addon-verify/index.mjs @@ -5,13 +5,13 @@ import { join } from 'node:path'; import { visit } from 'unist-util-visit'; -import { EXTRACT_CODE_FILENAME_COMMENT } from './constants.mjs'; import { generateFileList } from './utils/generateFileList.mjs'; import { generateSectionFolderName, isBuildableSection, normalizeSectionName, } from './utils/section.mjs'; +import { EXTRACT_CODE_FILENAME_COMMENT } from '../../utils/queries/regex.mjs'; /** * This generator generates a file list from code blocks extracted from diff --git a/src/generators/api-links/constants.mjs b/src/generators/api-links/constants.mjs deleted file mode 100644 index bff60720..00000000 --- a/src/generators/api-links/constants.mjs +++ /dev/null @@ -1,4 +0,0 @@ -'use strict'; - -// Checks if a string is a valid name for a constructor in JavaScript -export const CONSTRUCTOR_EXPRESSION = /^[A-Z]/; diff --git a/src/generators/api-links/utils/extractExports.mjs b/src/generators/api-links/utils/extractExports.mjs index 60746eca..71a97f02 100644 --- a/src/generators/api-links/utils/extractExports.mjs +++ b/src/generators/api-links/utils/extractExports.mjs @@ -2,8 +2,6 @@ import { visit } from 'estree-util-visit'; -import { CONSTRUCTOR_EXPRESSION } from '../constants.mjs'; - /** * @see https://github.com/estree/estree/blob/master/es5.md#assignmentexpression * @@ -99,7 +97,7 @@ function handleExpression(node, basename, nameToLineNumberMap) { case 'Identifier': { exports.identifiers.push(value.name); - if (CONSTRUCTOR_EXPRESSION.test(value.name[0])) { + if (/^[A-Z]/.test(value.name)) { exports.ctors.push(value.name); } diff --git a/src/generators/json-simple/index.mjs b/src/generators/json-simple/index.mjs index 423223f5..5d6b5c1b 100644 --- a/src/generators/json-simple/index.mjs +++ b/src/generators/json-simple/index.mjs @@ -5,7 +5,7 @@ import { join } from 'node:path'; import { remove } from 'unist-util-remove'; -import createQueries from '../../utils/queries/index.mjs'; +import { isHeading, isStabilityNode } from '../../utils/queries/unist.mjs'; /** * This generator generates a simplified JSON version of the API docs and returns it as a string @@ -41,10 +41,7 @@ export default { // Removes numerous nodes from the content that should not be on the "body" // of the JSON version of the API docs as they are already represented in the metadata - remove(content, [ - createQueries.UNIST.isStabilityNode, - createQueries.UNIST.isHeading, - ]); + remove(content, [isStabilityNode, isHeading]); return { ...node, content }; }); diff --git a/src/generators/jsx-ast/utils/buildContent.mjs b/src/generators/jsx-ast/utils/buildContent.mjs index 17b48bf9..b366609b 100644 --- a/src/generators/jsx-ast/utils/buildContent.mjs +++ b/src/generators/jsx-ast/utils/buildContent.mjs @@ -9,7 +9,11 @@ import createPropertyTable from './buildPropertyTable.mjs'; import { DOC_NODE_BLOB_BASE_URL } from '../../../constants.mjs'; import { enforceArray } from '../../../utils/array.mjs'; import { sortChanges } from '../../../utils/generators.mjs'; -import createQueries from '../../../utils/queries/index.mjs'; +import { + isStabilityNode, + isHeading, + isTypedList, +} from '../../../utils/queries/unist.mjs'; import { JSX_IMPORTS } from '../../web/constants.mjs'; import { STABILITY_LEVELS, @@ -219,17 +223,17 @@ export const processEntry = (entry, remark) => { const content = structuredClone(entry.content); // Visit and transform stability nodes - visit(content, createQueries.UNIST.isStabilityNode, transformStabilityNode); + visit(content, isStabilityNode, transformStabilityNode); // Visit and transform headings with metadata and links - visit(content, createQueries.UNIST.isHeading, (...args) => + visit(content, isHeading, (...args) => transformHeadingNode(entry, remark, ...args) ); // Transform typed lists into property tables visit( content, - createQueries.UNIST.isTypedList, + isTypedList, (node, idx, parent) => (parent.children[idx] = createPropertyTable(node)) ); diff --git a/src/generators/jsx-ast/utils/buildPropertyTable.mjs b/src/generators/jsx-ast/utils/buildPropertyTable.mjs index e5d41c9d..6fc32525 100644 --- a/src/generators/jsx-ast/utils/buildPropertyTable.mjs +++ b/src/generators/jsx-ast/utils/buildPropertyTable.mjs @@ -1,6 +1,7 @@ import { h as createElement } from 'hastscript'; -import createQueries from '../../../utils/queries/index.mjs'; +import { TYPED_LIST_STARTERS } from '../../../utils/queries/regex.mjs'; +import { isTypedList } from '../../../utils/queries/unist.mjs'; /** * Determines if a node looks like part of a type annotation. @@ -51,9 +52,7 @@ export const extractPropertyName = children => { // Text with a prefix like "Type:", "Param:", etc. if (first.type === 'text') { - const starterMatch = first.value.match( - createQueries.QUERIES.typedListStarters - ); + const starterMatch = first.value.match(TYPED_LIST_STARTERS); if (starterMatch) { // If the starter is 'Type', we don't have a property. const label = starterMatch[1] !== 'Type' && starterMatch[1]; @@ -133,7 +132,7 @@ export const parseListIntoProperties = node => { // The remaining children are the description desc: children, // Is there a list within this list? - sublist: sublists.find(createQueries.UNIST.isTypedList), + sublist: sublists.find(isTypedList), }); } diff --git a/src/generators/jsx-ast/utils/buildSignature.mjs b/src/generators/jsx-ast/utils/buildSignature.mjs index 8e3caa26..55a9d3a0 100644 --- a/src/generators/jsx-ast/utils/buildSignature.mjs +++ b/src/generators/jsx-ast/utils/buildSignature.mjs @@ -1,7 +1,7 @@ import { highlightToHast } from '@node-core/rehype-shiki'; import { h as createElement } from 'hastscript'; -import createQueries from '../../../utils/queries/index.mjs'; +import { isTypedList } from '../../../utils/queries/unist.mjs'; import { parseListItem } from '../../legacy-json/utils/parseList.mjs'; import parseSignature from '../../legacy-json/utils/parseSignature.mjs'; @@ -89,7 +89,7 @@ export const getFullName = ({ name, text }, fallback = name) => { */ export default ({ children }, { data }, idx) => { // Try to locate the parameter list immediately following the heading - const listIdx = children.findIndex(createQueries.UNIST.isTypedList); + const listIdx = children.findIndex(isTypedList); // Parse parameters from the list, if found const params = diff --git a/src/generators/legacy-html/utils/buildContent.mjs b/src/generators/legacy-html/utils/buildContent.mjs index 220603ee..920ea933 100644 --- a/src/generators/legacy-html/utils/buildContent.mjs +++ b/src/generators/legacy-html/utils/buildContent.mjs @@ -6,7 +6,12 @@ import { SKIP, visit } from 'unist-util-visit'; import buildExtraContent from './buildExtraContent.mjs'; import { DOC_NODE_BLOB_BASE_URL } from '../../../constants.mjs'; -import createQueries from '../../../utils/queries/index.mjs'; +import { LINKS_WITH_TYPES } from '../../../utils/queries/regex.mjs'; +import { + isHeading, + isHtmlWithType, + isStabilityNode, +} from '../../../utils/queries/unist.mjs'; /** * Builds a Markdown heading for a given node @@ -71,7 +76,7 @@ const buildStability = ({ children, data }, index, parent) => { */ const buildHtmlTypeLink = node => { node.value = node.value.replace( - createQueries.QUERIES.linksWithTypes, + LINKS_WITH_TYPES, (_, type, link) => `<${type}>` ); }; @@ -223,16 +228,16 @@ export default (headNodes, metadataEntries, remark) => { const content = structuredClone(entry.content); // Parses the Heading nodes into Heading elements - visit(content, createQueries.UNIST.isHeading, buildHeading); + visit(content, isHeading, buildHeading); // Parses the Blockquotes into Stability elements // This is treated differently as we want to preserve the position of a Stability Index // within the content, so we can't just remove it and append it to the metadata - visit(content, createQueries.UNIST.isStabilityNode, buildStability); + visit(content, isStabilityNode, buildStability); // Parses the type references that got replaced into Markdown links (raw) // into actual HTML links, these then get parsed into HAST nodes on `runSync` - visit(content, createQueries.UNIST.isHtmlWithType, buildHtmlTypeLink); + visit(content, isHtmlWithType, buildHtmlTypeLink); // Splits the content into the Heading node and the rest of the content const [headingNode, ...restNodes] = content.children; diff --git a/src/generators/legacy-json/constants.mjs b/src/generators/legacy-json/constants.mjs index 3fc7a524..b4ecf48f 100644 --- a/src/generators/legacy-json/constants.mjs +++ b/src/generators/legacy-json/constants.mjs @@ -1,19 +1,3 @@ -// Grabs a method's name -export const NAME_EXPRESSION = /^['`"]?([^'`": {]+)['`"]?\s*:?\s*/; - -// Denotes a method's type -export const TYPE_EXPRESSION = /^\{([^}]+)\}\s*/; - -// Checks if there's a leading hyphen -export const LEADING_HYPHEN = /^-\s*/; - -// Grabs the default value if present -export const DEFAULT_EXPRESSION = /\s*\*\*Default:\*\*\s*([^]+)$/i; - -// Grabs the parameters from a method's signature -// ex/ 'new buffer.Blob([sources[, options]])'.match(PARAM_EXPRESSION) === ['([sources[, options]])', '[sources[, options]]'] -export const PARAM_EXPRESSION = /\((.+)\);?$/; - // The plurals associated with each section type. export const SECTION_TYPE_PLURALS = { module: 'modules', diff --git a/src/generators/legacy-json/utils/parseList.mjs b/src/generators/legacy-json/utils/parseList.mjs index 73c36237..91834552 100644 --- a/src/generators/legacy-json/utils/parseList.mjs +++ b/src/generators/legacy-json/utils/parseList.mjs @@ -1,11 +1,11 @@ +import parseSignature from './parseSignature.mjs'; import { + TYPED_LIST_STARTERS, DEFAULT_EXPRESSION, - LEADING_HYPHEN, NAME_EXPRESSION, TYPE_EXPRESSION, -} from '../constants.mjs'; -import parseSignature from './parseSignature.mjs'; -import createQueries from '../../../utils/queries/index.mjs'; +} from '../../../utils/queries/regex.mjs'; +import { isTypedList } from '../../../utils/queries/unist.mjs'; import { transformNodesToString } from '../../../utils/unist.mjs'; /** @@ -46,7 +46,7 @@ export const extractPattern = (text, pattern, key, current) => { export function parseListItem(child) { const current = {}; - const subList = child.children.find(createQueries.UNIST.isTypedList); + const subList = child.children.find(isTypedList); // Extract and clean raw text from the node, excluding nested lists current.textRaw = transformTypeReferences( @@ -58,7 +58,7 @@ export function parseListItem(child) { let text = current.textRaw; // Identify return items or extract key properties (name, type, default) from the text - const starter = text.match(createQueries.QUERIES.typedListStarters); + const starter = text.match(TYPED_LIST_STARTERS); if (starter) { current.name = starter[1] === 'Returns' ? 'return' : starter[1].toLowerCase(); @@ -71,7 +71,7 @@ export function parseListItem(child) { text = extractPattern(text, DEFAULT_EXPRESSION, 'default', current); // Set the remaining text as the description, removing any leading hyphen - current.desc = text.replace(LEADING_HYPHEN, '').trim() || undefined; + current.desc = text.replace(/^-\s*/, '').trim() || undefined; // Parse nested lists (options) recursively if present if (subList) { diff --git a/src/generators/legacy-json/utils/parseSignature.mjs b/src/generators/legacy-json/utils/parseSignature.mjs index a2abb056..14f652e3 100644 --- a/src/generators/legacy-json/utils/parseSignature.mjs +++ b/src/generators/legacy-json/utils/parseSignature.mjs @@ -1,6 +1,6 @@ 'use strict'; -import { PARAM_EXPRESSION } from '../constants.mjs'; +import { PARAM_EXPRESSION } from '../../../utils/queries/regex.mjs'; const OPTIONAL_LEVEL_CHANGES = { '[': 1, ']': -1 }; diff --git a/src/generators/metadata/utils/parse.mjs b/src/generators/metadata/utils/parse.mjs index 031e00fb..5197e73d 100644 --- a/src/generators/metadata/utils/parse.mjs +++ b/src/generators/metadata/utils/parse.mjs @@ -9,6 +9,15 @@ import { SKIP, visit } from 'unist-util-visit'; import createMetadata from '../../../metadata.mjs'; import createNodeSlugger from '../../../utils/parser/slugger.mjs'; import createQueries from '../../../utils/queries/index.mjs'; +import { + isLinkReference, + isMarkdownUrl, + isHeading, + isStabilityNode, + isYamlNode, + isTextWithType, + isTextWithUnixManual, +} from '../../../utils/queries/unist.mjs'; import { getRemark } from '../../../utils/remark.mjs'; /** @@ -54,7 +63,7 @@ export const parseApiDoc = ({ file, tree }) => { const headingNodes = selectAll('heading', tree); // Handles Markdown link references and updates them to be plain links - visit(tree, createQueries.UNIST.isLinkReference, node => + visit(tree, isLinkReference, node => updateLinkReference(node, markdownDefinitions) ); @@ -64,9 +73,7 @@ export const parseApiDoc = ({ file, tree }) => { // Handles the normalisation URLs that reference to API doc files with .md extension // to replace the .md into .html, since the API doc files get eventually compiled as HTML - visit(tree, createQueries.UNIST.isMarkdownUrl, node => - updateMarkdownLink(node) - ); + visit(tree, isMarkdownUrl, node => updateMarkdownLink(node)); // If the document has no headings but it has content, we add a fake heading to the top // so that our parsing logic can work correctly, and generate content for the whole file @@ -79,7 +86,7 @@ export const parseApiDoc = ({ file, tree }) => { // (so all elements after a Heading until the next Heading) // and then it creates and updates a Metadata entry for each API doc entry // and then generates the final content for each API doc entry and pushes it to the collection - visit(tree, createQueries.UNIST.isHeading, (headingNode, index) => { + visit(tree, isHeading, (headingNode, index) => { // Creates a new Metadata entry for the current API doc file const apiEntryMetadata = createMetadata(nodeSlugger); @@ -90,8 +97,7 @@ export const parseApiDoc = ({ file, tree }) => { // This is used for ensuring that we don't include items that would // belong only to the next heading to the current Heading metadata // Note that if there is no next heading, we use the current node as the next one - const nextHeadingNode = - findAfter(tree, index, createQueries.UNIST.isHeading) ?? headingNode; + const nextHeadingNode = findAfter(tree, index, isHeading) ?? headingNode; // This is the cutover index of the subtree that we should get // of all the Nodes within the AST tree that belong to this section @@ -109,14 +115,14 @@ export const parseApiDoc = ({ file, tree }) => { // Visits all Stability Index nodes from the current subtree if there's any // and then apply the Stability Index metadata to the current metadata entry - visit(subTree, createQueries.UNIST.isStabilityNode, node => + visit(subTree, isStabilityNode, node => addStabilityMetadata(node, apiEntryMetadata) ); // Visits all HTML nodes from the current subtree and if there's any that matches // our YAML metadata structure, it transforms into YAML metadata // and then apply the YAML Metadata to the current Metadata entry - visit(subTree, createQueries.UNIST.isYamlNode, node => { + visit(subTree, isYamlNode, node => { // TODO: Is there always only one YAML node? apiEntryMetadata.setYamlPosition(node.position); addYAMLMetadata(node, apiEntryMetadata); @@ -124,19 +130,17 @@ export const parseApiDoc = ({ file, tree }) => { // Visits all Text nodes from the current subtree and if there's any that matches // any API doc type reference and then updates the type reference to be a Markdown link - visit(subTree, createQueries.UNIST.isTextWithType, (node, _, parent) => + visit(subTree, isTextWithType, (node, _, parent) => updateTypeReference(node, parent) ); // Visits all Unix manual references, and replaces them with links - visit( - subTree, - createQueries.UNIST.isTextWithUnixManual, - (node, _, parent) => updateUnixManualReference(node, parent) + visit(subTree, isTextWithUnixManual, (node, _, parent) => + updateUnixManualReference(node, parent) ); // Removes already parsed items from the subtree so that they aren't included in the final content - remove(subTree, [createQueries.UNIST.isYamlNode]); + remove(subTree, [isYamlNode]); // Applies the AST transformations to the subtree based on the API doc entry Metadata // Note that running the transformation on the subtree isn't costly as it is a reduced tree diff --git a/src/linter/constants.mjs b/src/linter/constants.mjs index d32fbf8b..ce8860a8 100644 --- a/src/linter/constants.mjs +++ b/src/linter/constants.mjs @@ -1,9 +1,5 @@ 'use strict'; -export const INTRODUCED_IN_REGEX = //; - -export const LLM_DESCRIPTION_REGEX = //; - export const LINT_MESSAGES = { missingIntroducedIn: "Missing 'introduced_in' field in the API doc entry", invalidChangeProperty: 'Invalid change property type', diff --git a/src/linter/rules/duplicate-stability-nodes.mjs b/src/linter/rules/duplicate-stability-nodes.mjs index 8c32bbb1..d910942e 100644 --- a/src/linter/rules/duplicate-stability-nodes.mjs +++ b/src/linter/rules/duplicate-stability-nodes.mjs @@ -2,7 +2,7 @@ import { visit } from 'unist-util-visit'; -import createQueries from '../../utils/queries/index.mjs'; +import { STABILITY_INDEX } from '../../utils/queries/regex.mjs'; import { LINT_MESSAGES } from '../constants.mjs'; /** @@ -33,9 +33,7 @@ export const duplicateStabilityNodes = context => { ) { const text = paragraph.children[0]; if (text.type === 'text') { - const match = text.value.match( - createQueries.QUERIES.stabilityIndex - ); + const match = text.value.match(STABILITY_INDEX); if (match) { const stability = parseFloat(match[1]); diff --git a/src/linter/rules/invalid-change-version.mjs b/src/linter/rules/invalid-change-version.mjs index 47a54f5c..b8ee07b0 100644 --- a/src/linter/rules/invalid-change-version.mjs +++ b/src/linter/rules/invalid-change-version.mjs @@ -8,7 +8,7 @@ import { extractYamlContent, normalizeYamlSyntax, } from '../../utils/parser/index.mjs'; -import createQueries from '../../utils/queries/index.mjs'; +import { isYamlNode } from '../../utils/queries/unist.mjs'; import { LINT_MESSAGES } from '../constants.mjs'; import { createYamlIssueReporter, @@ -94,7 +94,7 @@ export const extractVersions = ({ context, node, createYamlIssue }) => { * @returns {void} */ export const invalidChangeVersion = context => { - visit(context.tree, createQueries.UNIST.isYamlNode, node => { + visit(context.tree, isYamlNode, node => { const yamlContent = extractYamlContent(node); const normalizedYaml = normalizeYamlSyntax(yamlContent); diff --git a/src/linter/rules/missing-metadata.mjs b/src/linter/rules/missing-metadata.mjs index f07e3366..c61bf276 100644 --- a/src/linter/rules/missing-metadata.mjs +++ b/src/linter/rules/missing-metadata.mjs @@ -3,11 +3,8 @@ import { find } from 'unist-util-find'; import { findBefore } from 'unist-util-find-before'; -import { - INTRODUCED_IN_REGEX, - LINT_MESSAGES, - LLM_DESCRIPTION_REGEX, -} from '../constants.mjs'; +import { INTRODUCED_IN, LLM_DESCRIPTION } from '../../utils/queries/regex.mjs'; +import { LINT_MESSAGES } from '../constants.mjs'; /** * Finds the first node that matches the condition before the first h2 heading, @@ -25,13 +22,13 @@ const findTopLevelEntry = (node, condition) => { const METADATA_CHECKS = Object.freeze([ { name: 'introducedIn', - regex: INTRODUCED_IN_REGEX, + regex: INTRODUCED_IN, level: 'info', message: LINT_MESSAGES.missingIntroducedIn, }, { name: 'llmDescription', - regex: LLM_DESCRIPTION_REGEX, + regex: LLM_DESCRIPTION, level: 'warn', message: LINT_MESSAGES.missingLlmDescription, }, diff --git a/src/parsers/__tests__/markdown.test.mjs b/src/parsers/__tests__/markdown.test.mjs index dda42123..bfc069d1 100644 --- a/src/parsers/__tests__/markdown.test.mjs +++ b/src/parsers/__tests__/markdown.test.mjs @@ -23,10 +23,10 @@ describe('parseChangelog', () => { const results = await parseChangelog('...'); - assert.partialDeepStrictEqual(results, [ - { version: { raw: '24.0.0' }, isLts: false }, - { version: { raw: '22.0.0' }, isLts: true }, - ]); + assert.equal(results[0].version.version, '24.0.0'); + assert.equal(results[0].isLts, false); + assert.equal(results[1].version.version, '22.0.0'); + assert.equal(results[1].isLts, true); }); }); diff --git a/src/parsers/markdown.mjs b/src/parsers/markdown.mjs index 608c783e..1beecc86 100644 --- a/src/parsers/markdown.mjs +++ b/src/parsers/markdown.mjs @@ -4,16 +4,10 @@ import { coerce } from 'semver'; import { loadFromURL } from '../utils/parser.mjs'; import createQueries from '../utils/queries/index.mjs'; +import { MD_LINKED_LIST_ITEM, NODE_VERSIONS } from '../utils/queries/regex.mjs'; import { getRemark } from '../utils/remark.mjs'; -// A ReGeX for retrieving Node.js version headers from the CHANGELOG.md -const NODE_VERSIONS_REGEX = /\* \[Node\.js ([0-9.]+)\]\S+ (.*)\r?\n/g; - -// A ReGeX for retrieving the list items in the index document -const LIST_ITEM_REGEX = /\* \[(.*?)\]\((.*?)\.md\)/g; - -// A ReGeX for checking if a Node.js version is an LTS release -const NODE_LTS_VERSION_REGEX = /Long Term Support/i; +const LTS = 'long term support'; /** * Creates an API doc parser for a given Markdown API doc file @@ -79,11 +73,11 @@ const createParser = linter => { export const parseChangelog = async path => { const changelog = await loadFromURL(path); - const nodeMajors = Array.from(changelog.matchAll(NODE_VERSIONS_REGEX)); + const nodeMajors = Array.from(changelog.matchAll(NODE_VERSIONS)); return nodeMajors.map(match => ({ version: coerce(match[1]), - isLts: NODE_LTS_VERSION_REGEX.test(match[2]), + isLts: match[2].toLowerCase().includes(LTS), })); }; @@ -96,7 +90,7 @@ export const parseChangelog = async path => { export const parseIndex = async path => { const index = await loadFromURL(path); - const items = Array.from(index.matchAll(LIST_ITEM_REGEX)); + const items = Array.from(index.matchAll(MD_LINKED_LIST_ITEM)); return items.map(([, section, api]) => ({ section, api })); }; diff --git a/src/utils/parser/constants.mjs b/src/utils/parser/constants.mjs index 46ad3628..4dc92af4 100644 --- a/src/utils/parser/constants.mjs +++ b/src/utils/parser/constants.mjs @@ -1,5 +1,14 @@ 'use strict'; +import { + CLASS_HEADING, + CLASS_METHOD_HEADING, + CTOR_HEADING, + EVENT_HEADING, + METHOD_HEADING, + PROPERTY_HEADING, +} from '../queries/regex.mjs'; + // These are string replacements specific to Node.js API docs for anchor IDs export const DOC_API_SLUGS_REPLACEMENTS = [ { from: /node.js/i, to: 'nodejs' }, // Replace Node.js @@ -28,45 +37,13 @@ export const DOC_MDN_BASE_URL_JS_GLOBALS = `${DOC_MDN_BASE_URL_JS}Reference/Glob // These are regular expressions used to determine if a given Markdown heading // is a specific type of API Doc entry (e.g., Event, Class, Method, etc) // and to extract the inner content of said Heading to be used as the API doc entry name -const CAMEL_CASE = '\\w+(?:\\.\\w+)*'; -const FUNCTION_CALL = '\\([^)]*\\)'; - -// Matches "bar": -// Group 1: foo[bar] -// Group 2: foo.bar -const PROPERTY = `${CAMEL_CASE}(?:(\\[${CAMEL_CASE}\\])|\\.(\\w+))`; - export const DOC_API_HEADING_TYPES = [ - { - type: 'method', - regex: new RegExp(`^\`?${PROPERTY}${FUNCTION_CALL}\`?$`, 'i'), - }, - { type: 'event', regex: /^Event: +`?['"]?([^'"]+)['"]?`?$/i }, - { - type: 'class', - regex: new RegExp( - `Class: +\`?(${CAMEL_CASE}(?: extends +${CAMEL_CASE})?)\`?$`, - 'i' - ), - }, - { - type: 'ctor', - regex: new RegExp( - `^(?:Constructor: +)?\`?new +(${CAMEL_CASE})${FUNCTION_CALL}\`?$`, - 'i' - ), - }, - { - type: 'classMethod', - regex: new RegExp( - `^Static method: +\`?${PROPERTY}${FUNCTION_CALL}\`?$`, - 'i' - ), - }, - { - type: 'property', - regex: new RegExp(`^(?:Class property: +)?\`?${PROPERTY}\`?$`, 'i'), - }, + { type: 'method', regex: METHOD_HEADING }, + { type: 'event', regex: EVENT_HEADING }, + { type: 'class', regex: CLASS_HEADING }, + { type: 'ctor', regex: CTOR_HEADING }, + { type: 'classMethod', regex: CLASS_METHOD_HEADING }, + { type: 'property', regex: PROPERTY_HEADING }, ]; // This is a mapping for types within the Markdown content and their respective diff --git a/src/utils/parser/index.mjs b/src/utils/parser/index.mjs index 8a91a071..921d3445 100644 --- a/src/utils/parser/index.mjs +++ b/src/utils/parser/index.mjs @@ -13,7 +13,7 @@ import { DOC_MAN_BASE_URL, } from './constants.mjs'; import { slug } from './slugger.mjs'; -import createQueries from '../queries/index.mjs'; +import { YAML_INNER_CONTENT } from '../queries/regex.mjs'; /** * Extracts raw YAML content from a node @@ -23,7 +23,7 @@ import createQueries from '../queries/index.mjs'; */ export const extractYamlContent = node => { return node.value.replace( - createQueries.QUERIES.yamlInnerContent, + YAML_INNER_CONTENT, // Either capture a YAML multinline block, or a simple single-line YAML block (_, simple, yaml) => simple || yaml ); diff --git a/src/utils/queries/__tests__/regex.test.mjs b/src/utils/queries/__tests__/regex.test.mjs new file mode 100644 index 00000000..2b395180 --- /dev/null +++ b/src/utils/queries/__tests__/regex.test.mjs @@ -0,0 +1,552 @@ +import * as regexps from '../regex.mjs'; +import testRegExp from './testRegExp.mjs'; + +/** + * Test case definitions for regular expressions. + */ +const testCases = { + CLASS_HEADING: [ + { + input: 'Class: Buffer', + captures: 'Buffer', + }, + { + input: 'Class: EventEmitter extends NodeEventTarget', + captures: 'EventEmitter extends NodeEventTarget', + }, + { + input: 'Class: `Stream`', + captures: 'Stream', + }, + { + input: 'class: Buffer', + captures: 'Buffer', + }, + { + input: 'Class Buffer', + matches: false, + }, + { + input: 'Class: a.b.c.D extends e.f.G', + captures: 'a.b.c.D extends e.f.G', + }, + { + input: 'Class: `ChildProcess extends EventEmitter`', + captures: 'ChildProcess extends EventEmitter', + }, + { + input: 'Class: Socket', + captures: 'Socket', + }, + ], + + CLASS_METHOD_HEADING: [ + { + input: 'Static method: Buffer.isBuffer()', + matches: true, + }, + { + input: 'Static method: `Object[util.inspect.custom]()`', + matches: true, + }, + { + input: 'static method: Buffer.isBuffer()', + matches: true, + }, + { + input: 'Static method Buffer.isBuffer()', + matches: false, + }, + { + input: 'Static method: a.b.c.method()', + matches: true, + }, + { + input: 'Static method: Class.staticMethod(arg1, arg2)', + matches: true, + }, + { + input: 'Static method: Module.method()', + matches: true, + }, + ], + + CTOR_HEADING: [ + { + input: 'new Buffer()', + captures: 'Buffer', + }, + { + input: 'Constructor: new Stream()', + captures: 'Stream', + }, + { + input: 'new namespace.Class()', + captures: 'namespace.Class', + }, + { + input: 'Constructor: Buffer()', + matches: false, + }, + { + input: 'new URL(input[, base])', + captures: 'URL', + }, + { + input: 'Constructor: new `EventEmitter()`', + captures: 'EventEmitter', + }, + ], + + PROPERTY_HEADING: [ + { + input: 'Class.property', + matches: true, + }, + { + input: 'Class property: Buffer.poolSize', + matches: true, + }, + { + input: '`process.env`', + matches: true, + }, + { + input: 'Class property Buffer.poolSize', + matches: false, + }, + { + input: 'fs[Symbol.asyncIterator]', + matches: true, + }, + { + input: 'process.env', + matches: true, + }, + ], + + METHOD_HEADING: [ + { + input: '`readFile()`', + matches: true, + }, + { + input: '`array[Symbol.iterator]()`', + matches: true, + }, + { + input: 'fs.readFile()', + matches: false, + }, + { + input: '`a.b.c.method(x, y, z = {})`', + matches: true, + }, + { + input: '`something()`', + matches: true, + }, + { + input: '`fs.readFile()`', + matches: true, + }, + { + input: '`http.request()`', + matches: true, + }, + { + input: '`array.forEach()`', + matches: true, + }, + { + input: '`emitter.on(eventName, listener)`', + matches: true, + }, + ], + + EVENT_HEADING: [ + { + input: "Event: 'close'", + captures: 'close', + }, + { + input: 'Event: "data"', + captures: 'data', + }, + { + input: 'Event: error', + captures: 'error', + }, + { + input: 'Event "close"', + matches: false, + }, + { + input: "Event: 'connection'", + captures: 'connection', + }, + ], + + LINKS_WITH_TYPES: [ + { + input: 'Returns [``](/api/buffer) or [``](/api/string)', + matches: ['[``](/api/buffer)', '[``](/api/string)'], + }, + { + input: '[link text](url)', + matches: false, + }, + ], + + TYPED_LIST_STARTERS: [ + { + input: 'Returns: {string}', + matches: 'Returns: ', + }, + { + input: 'Type: {Object}', + matches: 'Type: ', + }, + { + input: 'returns: {string}', + matches: false, + }, + { + input: 'Extends: {EventEmitter}', + matches: 'Extends: ', + }, + { + input: 'Returns:', + matches: 'Returns:', + }, + ], + + MARKDOWN_URL: [ + { + input: 'fs.md', + captures: ['fs', undefined], + }, + { + input: 'buffer.md#buffer_class_buffer', + captures: ['buffer', '#buffer_class_buffer'], + }, + { + input: 'http://example.com/file.md', + matches: false, + }, + { + input: 'stream.md#stream_readable_streams', + captures: ['stream', '#stream_readable_streams'], + }, + { + input: 'errors.md#errors_class_error', + captures: ['errors', '#errors_class_error'], + }, + ], + + UNIX_MANUAL_PAGE: [ + { + input: 'See ls(1) for more information', + matches: ['ls(1)'], + captures: ['ls', '1', ''], + }, + { + input: 'Check printf(3) and malloc(3c)', + matches: ['printf(3)', 'malloc(3c)'], + captures: [ + ['printf', '3', ''], + ['malloc', '3', 'c'], + ], + }, + { + input: 'function(args)', + matches: [], + }, + { + input: 'socket(7)', + matches: ['socket(7)'], + captures: ['socket', '7', ''], + }, + { + input: 'signal.h(0p)', + matches: ['signal.h(0p)'], + captures: ['signal.h', '0', 'p'], + }, + ], + + STABILITY_INDEX_PREFIX: [ + { + input: 'Stability: 0', + captures: '0', + }, + { + input: 'Stability: 3.1', + captures: '3.1', + }, + { + input: 'Stability: 6', + matches: false, + }, + { + input: 'Stability: 2', + captures: '2', + }, + { + input: 'Stability: 5', + captures: '5', + }, + ], + + STABILITY_INDEX: [ + { + input: 'Stability: 0 - Deprecated', + captures: ['0', 'Deprecated'], + }, + { + input: 'Stability: 2-Stable', + captures: ['2', 'Stable'], + }, + { + input: 'Stability: 3', + captures: ['3', ''], + }, + { + input: 'Stability: 1 - Experimental', + captures: ['1', 'Experimental'], + }, + { + input: 'Stability: 3.1 - Legacy', + captures: ['3.1', 'Legacy'], + }, + ], + + YAML_INNER_CONTENT: [ + { + input: '', + captures: [ + '\ntitle: Example\ndescription: This is an example\n', + undefined, + undefined, + ], + }, + { + input: '', + captures: [' foo bar', undefined, undefined], + }, + { + input: '', + captures: [undefined, 'description', undefined], + }, + ], + + EXTRACT_CODE_FILENAME_COMMENT: [ + { + input: '// example.js\nconsole.log("hello");', + captures: 'example.js', + }, + { + input: '// lib/module.cc\n#include ', + captures: 'lib/module.cc', + }, + { + input: '// src/header.h\n#ifndef HEADER_H', + captures: 'src/header.h', + }, + { + input: '// not-a-valid-extension.txt\n', + matches: false, + }, + { + input: 'console.log("no comment");', + matches: false, + }, + ], + + NAME_EXPRESSION: [ + { + input: 'propertyName:', + captures: 'propertyName', + }, + { + input: '"quoted-name":', + captures: 'quoted-name', + }, + { + input: '`backtick-name`', + captures: 'backtick-name', + }, + { + input: "'single-quoted':", + captures: 'single-quoted', + }, + { + input: 'noColon', + captures: 'noColon', + }, + { + input: 'spaced-out :', + captures: 'spaced-out', + }, + ], + + TYPE_EXPRESSION: [ + { + input: '{string}', + captures: 'string', + }, + { + input: '{Buffer|null}', + captures: 'Buffer|null', + }, + { + input: '{Object}', + captures: 'Object', + }, + { + input: '{Function} callback', + captures: 'Function', + }, + { + input: 'no braces here', + matches: false, + }, + ], + + DEFAULT_EXPRESSION: [ + { + input: '**Default:** `true`', + captures: '`true`', + }, + { + input: '**Default:** 0', + captures: '0', + }, + { + input: 'Some text **Default:** false', + captures: 'false', + }, + { + input: '**default:** lowercase', + captures: 'lowercase', + }, + { + input: 'No default here', + matches: false, + }, + ], + + PARAM_EXPRESSION: [ + { + input: '(required)', + captures: 'required', + }, + { + input: '(optional);', + captures: 'optional', + }, + { + input: '(callback)', + captures: 'callback', + }, + { + input: '(arg1, arg2)', + captures: 'arg1, arg2', + }, + { + input: 'no parentheses', + matches: false, + }, + ], + + NODE_VERSIONS: [ + { + input: '* [Node.js 18.0.0]https://example.com Some description\n', + matches: ['* [Node.js 18.0.0]https://example.com Some description\n'], + captures: [['18.0.0', 'Some description']], + }, + { + input: + '* [Node.js 16.14.2]link Bug fixes\r\n* [Node.js 14.21.3]link More fixes\n', + matches: [ + '* [Node.js 16.14.2]link Bug fixes\r\n', + '* [Node.js 14.21.3]link More fixes\n', + ], + captures: [ + ['16.14.2', 'Bug fixes'], + ['14.21.3', 'More fixes'], + ], + }, + { + input: '- Not a Node.js version\n', + matches: [], + }, + ], + + MD_LINKED_LIST_ITEM: [ + { + input: '* [Buffer](buffer.md)', + matches: ['* [Buffer](buffer.md)'], + captures: [['Buffer', 'buffer']], + }, + { + input: '* [File System](fs.md) and * [HTTP](http.md)', + matches: ['* [File System](fs.md)', '* [HTTP](http.md)'], + captures: [ + ['File System', 'fs'], + ['HTTP', 'http'], + ], + }, + { + input: '- [Not a match](file.txt)', + matches: [], + }, + ], + + INTRODUCED_IN: [ + { + input: '', + matches: '', + }, + { + input: '', + matches: '', + }, + { + input: '', + matches: '', + }, + { + input: '', + matches: false, + }, + ], + + LLM_DESCRIPTION: [ + { + input: '', + matches: '', + }, + { + input: '', + matches: '', + }, + { + input: + '', + matches: + '', + }, + { + input: '', + matches: false, + }, + ], +}; + +for (const [regex, cases] of Object.entries(testCases)) { + await testRegExp(regexps[regex], cases); +} diff --git a/src/utils/queries/__tests__/testRegExp.mjs b/src/utils/queries/__tests__/testRegExp.mjs new file mode 100644 index 00000000..0aadd85a --- /dev/null +++ b/src/utils/queries/__tests__/testRegExp.mjs @@ -0,0 +1,85 @@ +import assert from 'node:assert/strict'; +import { describe, it } from 'node:test'; + +/** + * Runs tests for a regex pattern + * @param {RegExp} regex - The regex to test + * @param {Array} testCases - Test cases with input, matches, and/or captures + */ +export const runRegexTests = async (regex, testCases) => { + await describe(String(regex), async () => { + for (const testCase of testCases) { + await it(testCase.input, () => { + // Get all matches for this test case + const matches = getMatches(regex, testCase.input); + + // Check if matches meet expectations + if ('matches' in testCase) { + checkMatches(matches, testCase.matches); + } + + // Check if capture groups meet expectations + if ('captures' in testCase) { + checkCaptures(matches, testCase.captures); + } + }); + } + }); +}; + +// Get all matches from input (handles global vs non-global regex) +const getMatches = (regex, input) => { + if (regex.global) { + return Array.from(input.matchAll(regex)); + } + const match = input.match(regex); + return match ? [match] : []; +}; + +// Validate that matches are what we expected +const checkMatches = (matches, expected) => { + // Handle boolean expectation (just checking if it matches or not) + if (typeof expected === 'boolean') { + assert.equal( + matches.length > 0, + expected, + `Expected ${expected ? 'at least one match' : 'no matches'}` + ); + return; + } + + // Handle array/string expectations (checking actual match values) + const expectedArray = Array.isArray(expected) ? expected : [expected]; + + assert.equal(matches.length, expectedArray.length, 'Wrong number of matches'); + + expectedArray.forEach((exp, i) => { + assert.equal(matches[i][0], exp, `Match ${i} incorrect`); + }); +}; + +// Validate that capture groups contain expected values +const checkCaptures = (matches, expected) => { + // Normalize expected captures to nested array format + const expectedArray = + typeof expected === 'string' + ? [[expected]] + : Array.isArray(expected[0]) + ? expected + : [expected]; + + expectedArray.forEach((captureGroup, matchIndex) => { + if (matchIndex < matches.length) { + captureGroup.forEach((exp, captureIndex) => { + // captureIndex + 1 because [0] is the full match, [1] is first capture group + assert.equal( + matches[matchIndex][captureIndex + 1], + exp, + `Capture ${captureIndex + 1} in match ${matchIndex} incorrect` + ); + }); + } + }); +}; + +export default runRegexTests; diff --git a/src/utils/queries/index.mjs b/src/utils/queries/index.mjs index 76810097..9f97a0c9 100644 --- a/src/utils/queries/index.mjs +++ b/src/utils/queries/index.mjs @@ -13,6 +13,13 @@ import { } from '../parser/index.mjs'; import { getRemark } from '../remark.mjs'; import { transformNodesToString } from '../unist.mjs'; +import { + MARKDOWN_URL, + STABILITY_INDEX, + STABILITY_INDEX_PREFIX, + TYPE_EXPRESSION, + UNIX_MANUAL_PAGE, +} from './regex.mjs'; /** * Creates an instance of the Query Manager, which allows to do multiple sort @@ -57,7 +64,7 @@ const createQueries = () => { */ const updateMarkdownLink = node => { node.url = node.url.replace( - createQueries.QUERIES.markdownUrl, + MARKDOWN_URL, (_, filename, hash = '') => `${filename}.html${hash}` ); @@ -126,7 +133,7 @@ const createQueries = () => { const stabilityPrefix = transformNodesToString(node.children[0].children); // Attempts to grab the Stability index and description from the prefix - const matches = createQueries.QUERIES.stabilityIndex.exec(stabilityPrefix); + const matches = STABILITY_INDEX.exec(stabilityPrefix); // Ensures that the matches are valid and that we have at least 3 entries if (matches && matches.length === 3) { @@ -163,7 +170,7 @@ const createQueries = () => { const updateStabilityPrefixToLink = vfile => { // The `vfile` value is a String (check `loaders.mjs`) vfile.value = String(vfile.value).replace( - createQueries.QUERIES.stabilityIndexPrefix, + STABILITY_INDEX_PREFIX, match => `[${match}](${DOC_API_STABILITY_SECTION_REF_URL})` ); }; @@ -174,140 +181,14 @@ const createQueries = () => { updateMarkdownLink, /** @param {Array} args */ updateTypeReference: (...args) => - updateReferences( - createQueries.QUERIES.normalizeTypes, - transformTypeToReferenceLink, - ...args - ), + updateReferences(TYPE_EXPRESSION, transformTypeToReferenceLink, ...args), /** @param {Array} args */ updateUnixManualReference: (...args) => - updateReferences( - createQueries.QUERIES.unixManualPage, - transformUnixManualToLink, - ...args - ), + updateReferences(UNIX_MANUAL_PAGE, transformUnixManualToLink, ...args), updateLinkReference, addStabilityMetadata, updateStabilityPrefixToLink, }; }; -// This defines the actual REGEX Queries -createQueries.QUERIES = { - // Fixes the references to Markdown pages into the API documentation - markdownUrl: /^(?![+a-zA-Z]+:)([^#?]+)\.md(#.+)?$/, - // ReGeX to match the {Type} (API type references) - normalizeTypes: /(\{|<)(?! )[^<({})>]+(?! )(\}|>)/g, - // ReGex to match the type API type references that got already parsed - // so that they can be transformed into HTML links - linksWithTypes: /\[`<[^<({})>]+>`\]\((\S+)\)/g, - // ReGeX for handling Stability Indexes Metadata - stabilityIndex: /^Stability: ([0-5](?:\.[0-3])?)(?:\s*-\s*)?(.*)$/s, - // ReGeX for handling the Stability Index Prefix - stabilityIndexPrefix: /Stability: ([0-5](?:\.[0-3])?)/, - // ReGeX for retrieving the inner content from a YAML block - yamlInnerContent: /^/, - // ReGeX for finding references to Unix manuals - unixManualPage: /\b([a-z.]+)\((\d)([a-z]?)\)/g, - // ReGeX for determing a typed list's non-property names - typedListStarters: /^(Returns|Extends|Type):?\s*/, -}; - -createQueries.UNIST = { - /** - * @param {import('@types/mdast').Blockquote} blockquote - * @returns {boolean} - */ - isStabilityNode: ({ type, children }) => - type === 'blockquote' && - createQueries.QUERIES.stabilityIndex.test(transformNodesToString(children)), - /** - * @param {import('@types/mdast').Html} html - * @returns {boolean} - */ - isYamlNode: ({ type, value }) => - type === 'html' && createQueries.QUERIES.yamlInnerContent.test(value), - /** - * @param {import('@types/mdast').Text} text - * @returns {boolean} - */ - isTextWithType: ({ type, value }) => - type === 'text' && createQueries.QUERIES.normalizeTypes.test(value), - /** - * @param {import('@types/mdast').Text} text - * @returns {boolean} - */ - isTextWithUnixManual: ({ type, value }) => - type === 'text' && createQueries.QUERIES.unixManualPage.test(value), - /** - * @param {import('@types/mdast').Html} html - * @returns {boolean} - */ - isHtmlWithType: ({ type, value }) => - type === 'html' && createQueries.QUERIES.linksWithTypes.test(value), - /** - * @param {import('@types/mdast').Link} link - * @returns {boolean} - */ - isMarkdownUrl: ({ type, url }) => - type === 'link' && createQueries.QUERIES.markdownUrl.test(url), - /** - * @param {import('@types/mdast').Heading} heading - * @returns {boolean} - */ - isHeading: ({ type, depth }) => - type === 'heading' && depth >= 1 && depth <= 5, - /** - * @param {import('@types/mdast').LinkReference} linkReference - * @returns {boolean} - */ - isLinkReference: ({ type, identifier }) => - type === 'linkReference' && !!identifier, - - /** - * @param {import('@types/mdast').List} list - * @returns {boolean} - */ - isTypedList: list => { - // Exit early if not a list node - if (list.type !== 'list') { - return false; - } - - // Get the content nodes of the first list item's paragraph - const [node, ...contentNodes] = - list?.children?.[0]?.children?.[0]?.children ?? []; - - // Exit if no content nodes - if (!node) { - return false; - } - - // Check for other starters - if ( - node.value?.trimStart().match(createQueries.QUERIES.typedListStarters) - ) { - return true; - } - - // Check for direct type link pattern (starts with '<') - if (node.type === 'link' && node.children?.[0]?.value?.[0] === '<') { - return true; - } - - // Check for inline code + space + type link pattern - if ( - node.type === 'inlineCode' && - contentNodes[0]?.value.trim() === '' && - contentNodes[1]?.type === 'link' && - contentNodes[1]?.children?.[0]?.value?.[0] === '<' - ) { - return true; - } - - // Not a typed list - return false; - }, -}; - export default createQueries; diff --git a/src/utils/queries/regex.mjs b/src/utils/queries/regex.mjs new file mode 100644 index 00000000..a5e93993 --- /dev/null +++ b/src/utils/queries/regex.mjs @@ -0,0 +1,296 @@ +// ============================================================================ +// CAMEL CASE AND IDENTIFIER PATTERNS +// ============================================================================ + +/** + * Matches camel case identifiers, optionally with dot notation. + * Examples: + * - 'someVar' + * - 'SomeClass' + * - 'module.exports' + * - 'path.to.nestedProperty' + */ +const CAMEL_CASE = '\\w+(?:\\.\\w+)*'; + +/** + * Matches object properties in dot notation or bracket notation. + * Examples: + * - 'object.property' + * - 'object[property]' + * - 'nested.object.property' + * - 'object[nested.property]' + */ +const PROPERTY = `${CAMEL_CASE}(?:(\\[${CAMEL_CASE}\\])|\\.(\\w+))`; + +/** + * Matches function call parentheses including their parameters. + * Examples: + * - '()' + * - '(arg)' + * - '(arg1, arg2)' + * - '(options = {})' + */ +const FUNCTION_CALL = '\\([^)]*\\)'; + +// ============================================================================ +// DOCUMENTATION HEADING PATTERNS +// ============================================================================ + +/** + * Matches class headings in documentation. + * Examples: + * - 'Class: Buffer' + * - 'Class: Socket' + * - 'Class: EventEmitter extends NodeEventTarget' + * - 'Class: `Stream`' + * - 'Class: `ChildProcess extends EventEmitter`' + */ +export const CLASS_HEADING = new RegExp( + `Class: +\`?(${CAMEL_CASE}(?: extends +${CAMEL_CASE})?)\`?$`, + 'i' +); + +/** + * Matches constructor headings in documentation. + * Can be in the format 'new ClassName()' or 'Constructor: new ClassName()'. + * Examples: + * - 'new Buffer()' + * - 'new URL(input[, base])' + * - 'Constructor: new Stream()' + * - 'Constructor: new `EventEmitter()`' + */ +export const CTOR_HEADING = new RegExp( + `^(?:Constructor: +)?\`?new +(${CAMEL_CASE})${FUNCTION_CALL}\`?$`, + 'i' +); + +/** + * Matches static method headings in documentation. + * Examples: + * - 'Static method: Buffer.isBuffer()' + * - 'Static method: Class.staticMethod(arg1, arg2)' + * - 'Static method: Module.method()' + * - 'Static method: `Object[util.inspect.custom]()`' + */ +export const CLASS_METHOD_HEADING = new RegExp( + `^Static method: +\`?${PROPERTY}${FUNCTION_CALL}\`?$`, + 'i' +); + +/** + * Matches API method headings in documentation. + * Examples: + * - '`something()` + * - '`fs.readFile()`' + * - '`http.request()`' + * - '`array.forEach()`' + * - '`emitter.on(eventName, listener)`' + */ +export const METHOD_HEADING = new RegExp( + `^\`(?:${PROPERTY}|(${CAMEL_CASE}))${FUNCTION_CALL}\`?$`, + 'i' +); + +/** + * Matches class property headings in documentation. + * Can be in the format 'Class.property', 'Class[property]', or 'Class property: Class.property'. + * Examples: + * - 'Class.property' + * - 'Class property: Buffer.poolSize' + * - 'process.env' + * - 'fs[Symbol.asyncIterator]' + */ +export const PROPERTY_HEADING = new RegExp( + `^(?:Class property: +)?\`?${PROPERTY}\`?$`, + 'i' +); + +/** + * Matches event headings in documentation. + * Examples: + * - 'Event: \'close\'' + * - 'Event: "data"' + * - 'Event: `error`' + * - 'Event: \'connection\'' + */ +export const EVENT_HEADING = /^Event: +`?['"]?([^'"]+)['"]?`?$/i; + +// ============================================================================ +// TYPE AND REFERENCE PATTERNS +// ============================================================================ + +const TYPE_SAFE = '[^<({})>]+'; + +/** + * Matches API type references enclosed in curly braces or angle brackets. + * Used to normalize type references across documentation. + * Examples: + * - '{string}' + * - '{Buffer|string}' + * - '' + */ +export const TYPE_EXPRESSION = new RegExp( + `(?:\\{|<)(${TYPE_SAFE})(?:\\}|>)`, + 'g' +); + +/** + * Matches already-parsed API type references in Markdown link format. + * Used to detect and handle linked type references. + * Examples: + * - '[``](/api/string)' + * - '[``](/api/buffer)' + */ +export const LINKS_WITH_TYPES = new RegExp( + `\\[\`<${TYPE_SAFE}>\`\\]\\((\\S+)\\)`, + 'g' +); + +/** + * Matches headings that start typed lists in documentation. + * These are used to introduce return values, type descriptions, and inheritance information. + * Examples: + * - 'Returns: {string}' + * - 'Extends: {EventEmitter}' + * - 'Type: {Object}' + * - 'Returns:' + */ +export const TYPED_LIST_STARTERS = /^(Returns|Extends|Type):?\s*/; + +// ============================================================================ +// PARAMETER AND EXPRESSION PATTERNS +// ============================================================================ + +/** + * Matches name expressions with optional quotes and colons. + * Examples: + * - 'propertyName:' + * - '"quoted-name":' + * - '`backtick-name`' + */ +export const NAME_EXPRESSION = /^['`"]?([^'`": {]+)['`"]?\s*:?\s*/; + +/** + * Matches parameter expressions in parentheses. + * Examples: + * - '(required)' + * - '(optional);' + * - '(callback)' + */ +export const PARAM_EXPRESSION = /\((.+)\);?$/; + +/** + * Matches default value expressions. + * Examples: + * - '**Default:** `true`' + * - '**Default:** 0' + */ +export const DEFAULT_EXPRESSION = /\s*\*\*Default:\*\*\s*([^]+)$/i; + +// ============================================================================ +// URL AND REFERENCE PATTERNS +// ============================================================================ + +/** + * Matches references to Markdown pages in the API documentation. + * Captures the file path and optional anchor. + * Examples: + * - 'fs.md' + * - 'buffer.md#buffer_class_buffer' + * - 'stream.md#stream_readable_streams' + * - 'errors.md#errors_class_error' + */ +export const MARKDOWN_URL = /^(?![+a-zA-Z]+:)([^#?]+)\.md(#.+)?$/; + +/** + * Matches Unix manual references like those in command-line documentation. + * Format: command(section[subsection]) + * Examples: + * - 'ls(1)' + * - 'printf(3)' + * - 'socket(7)' + * - 'malloc(3c)' + * - 'signal.h(0p)' + */ +export const UNIX_MANUAL_PAGE = /\b([a-z.]+)\((\d)([a-z]?)\)/g; + +// ============================================================================ +// METADATA AND COMMENT PATTERNS +// ============================================================================ + +/** + * Matches just the Stability Index prefix value from documentation. + * The stability index indicates how stable an API is, from 0 (deprecated) to 5 (locked). + * Examples: + * - 'Stability: 0' + * - 'Stability: 1' + * - 'Stability: 2' + * - 'Stability: 3.1' + */ +export const STABILITY_INDEX_PREFIX = /Stability: ([0-5](?:\.[0-3])?)/; + +/** + * Matches complete Stability Index metadata lines including descriptions. + * These lines indicate the stability level of APIs in the documentation. + * Examples: + * - 'Stability: 0 - Deprecated' + * - 'Stability: 1 - Experimental' + * - 'Stability: 2 - Stable' + * - 'Stability: 3.1 - Legacy' + */ +export const STABILITY_INDEX = new RegExp( + `^${STABILITY_INDEX_PREFIX.source}(?:\\s*-\\s*)?(.*)$`, + 's' +); + +/** + * Extracts inner content from HTML comment-enclosed YAML blocks in documentation. + * These blocks contain metadata about the documentation page. + * Examples: + * - '' + * - '' + */ +export const YAML_INNER_CONTENT = /^/; + +/** + * Matches code filename comments at the beginning of files. + * Examples: + * - '// example.js' + * - '// lib/module.cc' + * - '// src/header.h' + */ +export const EXTRACT_CODE_FILENAME_COMMENT = /^\/\/\s+(.*\.(?:cc|h|js))[\r\n]/; + +/** + * Matches HTML comments with introduced_in metadata. + * Examples: + * - '' + */ +export const INTRODUCED_IN = //; + +/** + * Matches HTML comments with LLM description metadata. + * Examples: + * - '' + */ +export const LLM_DESCRIPTION = //; + +// ============================================================================ +// NODE.JS VERSION AND LIST PATTERNS +// ============================================================================ + +/** + * Matches Node.js version entries in changelog or version lists. + * Examples: + * - '* [Node.js 18.0.0] Some description' + * - '* [Node.js 16.14.2] Bug fixes' + */ +export const NODE_VERSIONS = /\* \[Node\.js ([0-9.]+)\]\S+ (.*)\r?\n/g; + +/** + * Matches list items with Markdown links. + * Examples: + * - '* [Buffer](buffer.md)' + * - '* [File System](fs.md)' + */ +export const MD_LINKED_LIST_ITEM = /\* \[(.*?)\]\((.*?)\.md\)/g; diff --git a/src/utils/queries/unist.mjs b/src/utils/queries/unist.mjs new file mode 100644 index 00000000..7e677520 --- /dev/null +++ b/src/utils/queries/unist.mjs @@ -0,0 +1,103 @@ +import { + LINKS_WITH_TYPES, + MARKDOWN_URL, + STABILITY_INDEX, + TYPE_EXPRESSION, + TYPED_LIST_STARTERS, + UNIX_MANUAL_PAGE, + YAML_INNER_CONTENT, +} from './regex.mjs'; +import { transformNodesToString } from '../unist.mjs'; + +/** + * @param {import('@types/mdast').Blockquote} blockquote + * @returns {boolean} + */ +export const isStabilityNode = ({ type, children }) => + type === 'blockquote' && + STABILITY_INDEX.test(transformNodesToString(children)); + +/** + * @param {import('@types/mdast').Html} html + * @returns {boolean} + */ +export const isYamlNode = ({ type, value }) => + type === 'html' && YAML_INNER_CONTENT.test(value); + +/** + * @param {import('@types/mdast').Text} text + * @returns {boolean} + */ +export const isTextWithType = ({ type, value }) => + type === 'text' && TYPE_EXPRESSION.test(value); + +/** + * @param {import('@types/mdast').Text} text + * @returns {boolean} + */ +export const isTextWithUnixManual = ({ type, value }) => + type === 'text' && UNIX_MANUAL_PAGE.test(value); + +/** + * @param {import('@types/mdast').Html} html + * @returns {boolean} + */ +export const isHtmlWithType = ({ type, value }) => + type === 'html' && LINKS_WITH_TYPES.test(value); + +/** + * @param {import('@types/mdast').Link} link + * @returns {boolean} + */ +export const isMarkdownUrl = ({ type, url }) => + type === 'link' && MARKDOWN_URL.test(url); + +/** + * @param {import('@types/mdast').Heading} heading + * @returns {boolean} + */ +export const isHeading = ({ type, depth }) => + type === 'heading' && depth >= 1 && depth <= 5; + +/** + * @param {import('@types/mdast').LinkReference} linkReference + * @returns {boolean} + */ +export const isLinkReference = ({ type, identifier }) => + type === 'linkReference' && !!identifier; + +/** + * @param {import('@types/mdast').List} list + * @returns {boolean} + */ +export const isTypedList = list => { + if (list.type !== 'list') { + return false; + } + + const [node, ...contentNodes] = + list?.children?.[0]?.children?.[0]?.children ?? []; + + if (!node) { + return false; + } + + if (node.value?.trimStart().match(TYPED_LIST_STARTERS)) { + return true; + } + + if (node.type === 'link' && node.children?.[0]?.value?.[0] === '<') { + return true; + } + + if ( + node.type === 'inlineCode' && + contentNodes[0]?.value.trim() === '' && + contentNodes[1]?.type === 'link' && + contentNodes[1]?.children?.[0]?.value?.[0] === '<' + ) { + return true; + } + + return false; +};