Skip to content

Commit 04b49a7

Browse files
authored
Meta: Optimize PR warning insertion (#24)
* Use more specific error types. * For compatibility, switch from `import(jsonFileName)` to `JSON.parse(readFile(jsonFileName))`. * For performance, rather than parsing HTML into a complete AST, use a streaming rewriter (and switch to passthrough after prepending to `<body>`).
1 parent 6d372ad commit 04b49a7

File tree

2 files changed

+88
-44
lines changed

2 files changed

+88
-44
lines changed

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
"devDependencies": {
1919
"@tc39/ecma262-biblio": "^2.1.2775",
2020
"ecmarkup": "^20.0.0",
21-
"jsdom": "^25.0.1"
21+
"jsdom": "^25.0.1",
22+
"parse5-html-rewriting-stream": "^7.0.0",
23+
"tmp": "^0.2.3"
2224
},
2325
"engines": {
2426
"node": ">= 12"

scripts/insert_warning.mjs

Lines changed: 85 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
import fs from 'node:fs';
22
import pathlib from 'node:path';
3+
import { pipeline } from 'node:stream/promises';
34
import { parseArgs } from 'node:util';
4-
import { JSDOM, VirtualConsole } from 'jsdom';
5+
import { JSDOM } from 'jsdom';
6+
import { RewritingStream } from 'parse5-html-rewriting-stream';
7+
import tmp from 'tmp';
58

69
const { positionals: cliArgs } = parseArgs({
710
allowPositionals: true,
@@ -12,7 +15,7 @@ if (cliArgs.length < 3) {
1215
console.error(`Usage: node ${self} <template.html> <data.json> <file.html>...
1316
1417
{{identifier}} substrings in template.html are replaced from data.json, then
15-
the result is inserted at the start of the body element in each file.html.`);
18+
the result is inserted into each file.html.`);
1619
process.exit(64);
1720
}
1821

@@ -21,58 +24,97 @@ const main = async args => {
2124

2225
// Substitute data into the template.
2326
const template = fs.readFileSync(templateFile, 'utf8');
24-
const { default: data } =
25-
await import(pathlib.resolve(dataFile), { with: { type: 'json' } });
27+
const data = JSON.parse(fs.readFileSync(dataFile, 'utf8'));
2628
const formatErrors = [];
27-
const placeholderPatt = /[{][{](?:([\p{ID_Start}$_][\p{ID_Continue}$]*)[}][}]|.*?(?:[}][}]|(?=[{][{])|$))/gsu;
29+
const placeholderPatt =
30+
/[{][{](?:([\p{ID_Start}$_][\p{ID_Continue}$]*)[}][}]|.*?(?:[}][}]|(?=[{][{])|$))/gsu;
2831
const resolved = template.replaceAll(placeholderPatt, (m, name, i) => {
2932
if (!name) {
3033
const trunc = m.replace(/([^\n]{29}(?!$)|[^\n]{,29}(?=\n)).*/s, '$1…');
31-
formatErrors.push(Error(`bad placeholder at index ${i}: ${trunc}`));
34+
formatErrors.push(SyntaxError(`bad placeholder at index ${i}: ${trunc}`));
3235
} else if (!Object.hasOwn(data, name)) {
33-
formatErrors.push(Error(`no data for ${m}`));
36+
formatErrors.push(ReferenceError(`no data for ${m}`));
3437
}
3538
return data[name];
3639
});
3740
if (formatErrors.length > 0) throw AggregateError(formatErrors);
3841

39-
// Parse the template into DOM nodes for appending to page <head>s (metadata
40-
// such as <style> elements) or prepending to page <body>s (everything else).
41-
// https://html.spec.whatwg.org/multipage/dom.html#metadata-content-2
42-
// https://html.spec.whatwg.org/multipage/semantics.html#allowed-in-the-body
43-
// https://html.spec.whatwg.org/multipage/links.html#body-ok
44-
const bodyOkRelPatt =
45-
/^(?:dns-prefetch|modulepreload|pingback|preconnect|prefetch|preload|stylesheet)$/i;
46-
const forceHead = node =>
47-
node.matches?.('base, style, title, meta:not([itemprop])') ||
48-
(node.matches?.('link:not([itemprop])') &&
49-
[...node.relList].some(rel => !rel.match(bodyOkRelPatt)));
50-
const insertDom = JSDOM.fragment(resolved);
51-
// Node.js v22+:
52-
// const { headInserts, bodyInserts } = Object.groupBy(
53-
// insertDom.childNodes,
54-
// node => (forceHead(node) ? 'headInserts' : 'bodyInserts'),
55-
// );
56-
const headInserts = [], bodyInserts = [];
57-
for (const node of insertDom.childNodes) {
58-
if (forceHead(node)) headInserts.push(node);
59-
else bodyInserts.push(node);
60-
}
42+
// Parse the template into DOM nodes for appending to page head (metadata such
43+
// as <style> elements) or prepending to page body (everything else).
44+
const jsdomOpts = { contentType: 'text/html; charset=utf-8' };
45+
const { document } = new JSDOM(resolved, jsdomOpts).window;
46+
const headHTML = document.head.innerHTML;
47+
const bodyHTML = document.body.innerHTML;
6148

62-
// Perform the insertions, suppressing JSDOM warnings from e.g. unsupported
63-
// CSS features.
64-
const virtualConsole = new VirtualConsole();
65-
virtualConsole.on('error', () => {});
66-
const jsdomOpts = { contentType: 'text/html; charset=utf-8', virtualConsole };
67-
const getInserts =
68-
files.length > 1 ? nodes => nodes.map(n => n.cloneNode(true)) : x => x;
69-
const results = await Promise.allSettled(files.map(async file => {
70-
let dom = await JSDOM.fromFile(file, jsdomOpts);
71-
const { head, body } = dom.window.document;
72-
if (headInserts.length > 0) head.append(...getInserts(headInserts));
73-
if (bodyInserts.length > 0) body.prepend(...getInserts(bodyInserts));
74-
fs.writeFileSync(file, dom.serialize(), 'utf8');
75-
}));
49+
// Perform the insertions.
50+
const work = files.map(async file => {
51+
await null;
52+
const { name: tmpName, fd, removeCallback } = tmp.fileSync({
53+
tmpdir: pathlib.dirname(file),
54+
prefix: pathlib.basename(file),
55+
postfix: '.tmp',
56+
detachDescriptor: true,
57+
});
58+
try {
59+
// Make a pipeline: fileReader -> inserter -> finisher -> fileWriter
60+
const fileReader = fs.createReadStream(file, 'utf8');
61+
const fileWriter = fs.createWriteStream('', { fd, flush: true });
62+
63+
// Insert headHTML at the end of a possibly implied head, and bodyHTML at
64+
// the beginning of a possibly implied body.
65+
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml
66+
let mode = 'before html'; // | 'before head' | 'in head' | 'after head' | '$DONE'
67+
const stayInHead = new Set([
68+
...['base', 'basefont', 'bgsound', 'link', 'meta', 'title'],
69+
...['noscript', 'noframes', 'style', 'script', 'template'],
70+
'head',
71+
]);
72+
const inserter = new RewritingStream();
73+
const onEndTag = function (tag) {
74+
if (tag.tagName === 'head') {
75+
this.emitRaw(headHTML);
76+
mode = 'after head';
77+
}
78+
this.emitEndTag(tag);
79+
};
80+
const onStartTag = function (tag) {
81+
const preserve = () => this.emitStartTag(tag);
82+
if (mode === 'before html' && tag.tagName === 'html') {
83+
mode = 'before head';
84+
} else if (mode !== 'after head' && stayInHead.has(tag.tagName)) {
85+
mode = 'in head';
86+
} else {
87+
if (mode !== 'after head') this.emitRaw(headHTML);
88+
// Emit either `${bodyTag}${bodyHTML}` or `${bodyHTML}${otherTag}`.
89+
const emits = [preserve, () => this.emitRaw(bodyHTML)];
90+
if (tag.tagName !== 'body') emits.reverse();
91+
for (const emit of emits) emit();
92+
mode = '$DONE';
93+
this.removeListener('endTag', onEndTag);
94+
this.removeListener('startTag', onStartTag);
95+
return;
96+
}
97+
preserve();
98+
};
99+
inserter.on('endTag', onEndTag).on('startTag', onStartTag);
100+
101+
// Ensure headHTML/bodyHTML insertion before EOF.
102+
const finisher = async function* (source) {
103+
for await (const chunk of source) yield chunk;
104+
if (mode === '$DONE') return;
105+
if (mode !== 'after head') yield headHTML;
106+
yield bodyHTML;
107+
};
108+
109+
await pipeline(fileReader, inserter, finisher, fileWriter);
110+
111+
// Now that the temp file is complete, overwrite the source file.
112+
fs.renameSync(tmpName, file);
113+
} finally {
114+
removeCallback();
115+
}
116+
});
117+
const results = await Promise.allSettled(work);
76118

77119
const failures = results.filter(result => result.status !== 'fulfilled');
78120
if (failures.length > 0) throw AggregateError(failures.map(r => r.reason));

0 commit comments

Comments
 (0)