Skip to content

Commit 09a76b4

Browse files
committed
Update to @cto.af/ucd generator. Upgraded to Unicode 16.0.0. BREAKING: now requires node 20+.
1 parent fbf09c0 commit 09a76b4

File tree

9 files changed

+21150
-498
lines changed

9 files changed

+21150
-498
lines changed

.github/workflows/node.js.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ jobs:
1111

1212
strategy:
1313
matrix:
14-
node-version: [18, 20, 22, 23]
14+
node-version: [20, 22, 24]
1515
os: [ubuntu-latest]
1616
runs-on: ${{ matrix.os }}
1717
steps:

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,4 @@ coverage/
33
docs/
44
lib/widths.js
55
node_modules/
6-
tools/*.txt
76
types/

package.json

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -27,30 +27,24 @@
2727
],
2828
"author": "Joe Hildebrand <[email protected]>",
2929
"license": "MIT",
30-
"devDependencies": {
31-
"@cto.af/eslint-config": "5.1.12",
32-
"@types/node": "22.13.5",
33-
"c8": "10.1.3",
34-
"eslint": "9.21.0",
35-
"eslint-plugin-mocha": "10.5.0",
36-
"mocha": "11.1.0",
37-
"typedoc": "0.27.9",
38-
"typescript": "5.7.3"
39-
},
4030
"dependencies": {
41-
"@cto.af/unicode-trie": "2.0.1",
31+
"@cto.af/unicode-trie-runtime": "3.2.2",
4232
"ansi-regex": "6.1.0",
4333
"emoji-regex": "10.4.0"
4434
},
45-
"pnpm": {
46-
"overrides": {
47-
"chokidar": "4.0.3",
48-
"fast-glob": "3.3.3",
49-
"foreground-child": "3.3.1"
50-
}
35+
"devDependencies": {
36+
"@cto.af/eslint-config": "6.0.4",
37+
"@cto.af/unicode-trie": "3.2.2",
38+
"@types/node": "22.15.24",
39+
"c8": "10.1.3",
40+
"eslint": "9.27.0",
41+
"eslint-plugin-mocha": "11.1.0",
42+
"mocha": "11.5.0",
43+
"typedoc": "0.28.5",
44+
"typescript": "5.8.3"
5145
},
52-
"packageManager": "pnpm@10.5.2",
46+
"packageManager": "pnpm@10.11.0",
5347
"engines": {
54-
"node": ">= 18"
48+
"node": ">= 20"
5549
}
5650
}

pnpm-lock.yaml

Lines changed: 564 additions & 398 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tools/DerivedCoreProperties.txt

Lines changed: 13362 additions & 0 deletions
Large diffs are not rendered by default.

tools/EastAsianWidth.txt

Lines changed: 2686 additions & 0 deletions
Large diffs are not rendered by default.

tools/Scripts.txt

Lines changed: 3128 additions & 0 deletions
Large diffs are not rendered by default.

tools/emoji/emoji-data.txt

Lines changed: 1340 additions & 0 deletions
Large diffs are not rendered by default.

tools/genWidthTrie.js

Lines changed: 56 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,64 @@
11
import {AMBIGUOUS, POTENTIAL_EMOJI} from '../lib/constants.js';
2-
import {UnicodeTrieBuilder} from '@cto.af/unicode-trie/builder.js';
32
import {fileURLToPath} from 'node:url';
4-
import fs from 'node:fs/promises';
53
import path from 'node:path';
4+
import {writeFile} from '@cto.af/unicode-trie/file';
65

76
const __filename = fileURLToPath(import.meta.url);
87
const __dirname = path.dirname(__filename);
98

10-
async function processFile(name, trie, transform) {
11-
const INPUT = path.join(__dirname, `${name.replaceAll('/', '_')}.txt`);
12-
13-
// Cache data in local file. Requires Node v18+.
14-
let txt = null;
15-
try {
16-
txt = await fs.readFile(INPUT, 'utf8');
17-
} catch (_ignored) {
18-
const url = `https://www.unicode.org/Public/UCD/latest/ucd/${name}.txt`;
19-
const res = await fetch(url);
20-
txt = await res.text();
21-
await fs.writeFile(INPUT, txt, 'utf8');
22-
}
23-
// # LineBreak-15.0.0.txt
24-
// # Date: 2022-07-28, 09:20:42 GMT [KW, LI]
25-
26-
const version = txt.match(/^#\s*\S+-(?<version>\d+\.\d+\.\d+).txt/)?.groups?.version;
27-
const date = txt.match(/^#\s*Date: (?<date>[\d,: GMT-]+)/m)?.groups?.date;
28-
29-
const matches = txt.matchAll(
30-
/^(?<start>\p{Hex}{4,6})(?:\.\.(?<end>\p{Hex}{4,6}))?\s*;\s*(?<val>\S+)/gmu
31-
);
32-
for (const match of matches) {
33-
const val = transform(match.groups.val);
34-
if (val == null) {
35-
continue;
36-
}
37-
const start = parseInt(match.groups.start, 16);
38-
if (match.groups.end) {
39-
const end = parseInt(match.groups.end, 16);
40-
trie.setRange(start, end, val);
41-
} else {
42-
trie.set(start, val);
43-
}
44-
}
45-
return {version, date};
46-
}
47-
48-
const trie = new UnicodeTrieBuilder(1, NaN);
49-
50-
const {version, date} = await processFile('DerivedCoreProperties', trie, x => {
51-
if (x === 'Default_Ignorable_Code_Point') {
52-
return 0;
53-
}
54-
return null;
55-
});
56-
57-
await processFile('Scripts', trie, x => {
58-
if (x === 'Hangul') {
59-
return 2;
60-
}
61-
return null;
62-
});
63-
64-
await processFile('EastAsianWidth', trie, x => {
65-
switch (x) {
66-
case 'F':
67-
return 2;
68-
case 'W':
69-
return 2;
70-
case 'A':
71-
return AMBIGUOUS;
72-
}
73-
return null;
9+
await writeFile([
10+
{
11+
name: 'DerivedCoreProperties.txt',
12+
transform(x) {
13+
if (x === 'Default_Ignorable_Code_Point') {
14+
return 0;
15+
}
16+
return null;
17+
},
18+
},
19+
{
20+
name: 'Scripts.txt',
21+
transform(x) {
22+
return (x === 'Hangul') ? 2 : null;
23+
},
24+
},
25+
{
26+
name: 'EastAsianWidth.txt',
27+
transform(x) {
28+
switch (x) {
29+
case 'F':
30+
return 2;
31+
case 'W':
32+
return 2;
33+
case 'A':
34+
return AMBIGUOUS;
35+
}
36+
return null;
37+
},
38+
},
39+
{
40+
name: 'emoji/emoji-data.txt',
41+
transform(x) {
42+
return (x === 'Emoji') ? POTENTIAL_EMOJI : null;
43+
},
44+
},
45+
{
46+
transform(trie) {
47+
trie.setRange(0, 20, 0); // C0 Controls
48+
trie.set(0x8, -1); // Backspace
49+
trie.set(0x7f, 0); // ESC
50+
trie.set(0x2E3A, 3); // TWO-EM DASH, in some contexts and fonts
51+
trie.set(0x2E3B, 4); // THREE-EM DASH, in some contexts and fonts
52+
},
53+
},
54+
], {
55+
cacheDir: __dirname,
56+
initialValue: 1,
57+
errorValue: NaN,
58+
className: 'Width',
59+
out: path.resolve(__dirname, '..', 'lib', 'widths.js'),
60+
quot: "'",
61+
semi: ';',
62+
verbose: true,
63+
frequency: 0,
7464
});
75-
76-
await processFile('emoji/emoji-data', trie, x => ((x === 'Emoji') ? POTENTIAL_EMOJI : null));
77-
78-
trie.setRange(0, 20, 0); // C0 Controls
79-
trie.set(0x8, -1); // Backspace
80-
trie.set(0x7f, 0); // ESC
81-
trie.set(0x2E3A, 3); // TWO-EM DASH, in some contexts and fonts
82-
trie.set(0x2E3B, 4); // THREE-EM DASH, in some contexts and fonts
83-
84-
const OUTPUT = path.resolve(__dirname, '..', 'lib', 'widths.js');
85-
await fs.writeFile(OUTPUT, trie.toModule({
86-
version, date, name: 'Width', quot: "'", semi: ';',
87-
}));

0 commit comments

Comments
 (0)