Skip to content
Open
Show file tree
Hide file tree
Changes from 44 commits
Commits
Show all changes
64 commits
Select commit Hold shift + click to select a range
fc428f3
What have I done
eggrobin May 22, 2025
91c41cf
meow
eggrobin May 23, 2025
3eb0363
count leaves
eggrobin May 24, 2025
575e2ed
meow
eggrobin May 24, 2025
29a2fa9
kwoc comparator, subentries
eggrobin May 24, 2025
dad5eb1
lemmatization tweaks
eggrobin May 24, 2025
4a1a15f
JS
eggrobin May 24, 2025
c653d2f
html, no kwoc
eggrobin May 25, 2025
1acc3f2
spotless
eggrobin May 25, 2025
c7add69
dead code
eggrobin May 25, 2025
dbc8ae7
meow
eggrobin May 25, 2025
81abfc5
meow
eggrobin May 25, 2025
bf50f64
fffe
eggrobin May 26, 2025
9568d54
autocomplete
eggrobin May 26, 2025
ee0b722
spotless
eggrobin May 26, 2025
8a4fd22
CSS
eggrobin May 27, 2025
a00a2d8
Merge remote-tracking branch 'la-vache/main' into indexer
eggrobin Feb 11, 2026
0d25640
Show latest & α Δ charts as appropriate; use the right kind of word s…
eggrobin Feb 12, 2026
002b363
Rename
eggrobin Feb 12, 2026
d662381
suggestions
eggrobin Feb 12, 2026
d81c7a3
fix chart links for ranges too, this needs to be factored
eggrobin Feb 12, 2026
64e1a96
showDevProperties on new characters
eggrobin Feb 12, 2026
b3660cd
title
eggrobin Feb 19, 2026
b0836fd
comments
eggrobin Feb 19, 2026
beea7bd
Search by code point
eggrobin Feb 19, 2026
bd57592
BOOP
eggrobin Feb 19, 2026
1444fd7
Pretty block
eggrobin Feb 20, 2026
37925d5
ungleichmäßige unzugewiesene
eggrobin Feb 20, 2026
3570a36
Lemmatize the corpus but not the query as suggested by Markus
eggrobin Feb 20, 2026
60798ae
Merge branch 'walking-down-the-plane' into indexer
eggrobin Feb 20, 2026
8c85c8b
Pretty block
eggrobin Feb 20, 2026
550046c
Merge remote-tracking branch 'la-vache/main' into indexer
eggrobin Feb 23, 2026
b4a5b43
terminology
eggrobin Feb 25, 2026
9cf091d
Drop the CLI search
eggrobin Feb 25, 2026
63dfd03
meow
eggrobin Feb 25, 2026
0fb75ed
Sentence segmentation
eggrobin Feb 25, 2026
3b52f93
Strip sentences, search by literal, name ranges
eggrobin Feb 26, 2026
9f1089f
nfkc
eggrobin Feb 26, 2026
d5627e5
No words for code point search
eggrobin Feb 26, 2026
cd2b92c
split informal aliases
eggrobin Feb 26, 2026
0cb5028
Some work towards radicals
eggrobin Feb 27, 2026
20c0185
Limit subentries not entries
eggrobin Feb 27, 2026
5cc5caf
Seems usable
eggrobin Feb 27, 2026
90210de
Don’t include . in most words
eggrobin Feb 27, 2026
01ee291
More selectively override segmentation
eggrobin Feb 27, 2026
fd7797b
Fix BOOP, show radical/stroke entry for code point search
eggrobin Feb 27, 2026
fa6bc8d
less hacky seal
eggrobin Feb 28, 2026
29d4008
Prettier presentation and usable rsindex
eggrobin Mar 2, 2026
ac948ac
rename counter
eggrobin Mar 2, 2026
00764b1
deduplicate locations
eggrobin Mar 2, 2026
b7f3af9
format
eggrobin Mar 2, 2026
c10792e
In the night, no control
eggrobin Mar 2, 2026
c0763bc
ic
eggrobin Mar 3, 2026
162f374
CSS shenanigans
eggrobin Mar 3, 2026
21183cd
g
eggrobin Mar 3, 2026
cb9c040
M
eggrobin Mar 3, 2026
c63ceaa
q
eggrobin Mar 3, 2026
cba8f50
looong
eggrobin Mar 3, 2026
d7a9a99
template
eggrobin Mar 4, 2026
b603535
bad link
eggrobin Mar 4, 2026
d16c5a2
eggsamples
eggrobin Mar 4, 2026
b3c88d2
Fix High Private Use Surrogates chart links, more specific noncharact…
eggrobin Mar 5, 2026
01651cd
Renamings
eggrobin Mar 18, 2026
37fb3bb
appease a linter
eggrobin Mar 19, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions charindex.css
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
input {
width:100%;
max-width:40em;
}

ul#results {
max-width: 40em;
list-style: none;
padding: 0;
overflow-x: hidden;
}

.tail {
display: inline-block;
padding-left: 2em;
text-indent: -2em;
box-sizing: border-box;
}
.head {
display: inline-block;
padding-left: 1em;
width: max-content;
max-width: 100%;
box-sizing: border-box;
}
span.ranges {
float: right;
}
210 changes: 210 additions & 0 deletions index_search.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
// Lemma to snippet to position of the word in the snippet.
/**@type {Map<string, Map<String, number>>}*/
let wordIndex/*= GENERATED LINE*/;
// Property name to snippet to index entry.
/**@type {Map<string, Map<string, {html: string, characters: [number, number][]}>>}*/
let indexEntries/*= GENERATED LINE*/;

/**@type {Map<number, string>}*/
let characterNames = new Map();
/**@type {Map<[number, number], string>}*/
let characterNameRanges = new Map();

let maxResults = 100;

for (let [name, entry] of indexEntries.get("Name")) {
if (entry.characters[0][0] == entry.characters[0][1]) {
characterNames.set(entry.characters[0][0], name);
} else {
for (let range of entry.characters) {
characterNameRanges.set(range, name);
}
}
}
for (let [name, entry] of indexEntries.get("Name_Alias")) {
if (!characterNames.has(entry.characters[0][0])) {
characterNames.set(entry.characters[0][0], name);
}
}

function updateResults(event) {
/**@type {string}*/
let query = event.target.value;
let {entries, rangeCount} = search(query);
if (rangeCount >= maxResults) {
document.getElementById("info").innerHTML = `Showing first ${maxResults} results`;
} else {
document.getElementById("info").innerHTML = rangeCount + " results";
}
document.getElementById("results").innerHTML = "<tr><td>" + entries.join("</tr></tr><tr><td>") + "</td></tr>";
}

function search(/**@type {string}*/ query) {
let wordBreak = new Intl.Segmenter("en", { granularity: "word" });
let queryWords = Array.from(wordBreak.segment(query.replace(/\.-/, "pm").replace(/['.]/, "p")))
.filter(s => s.isWordLike)
.map(s => query.substring(s.index, s.index + s.segment.length));
let foldedQuery = queryWords.map(fold);
var rangeCount = 0;
var covered = [];
/**@type {string[]}*/
var result = [];
/**@type {Set<string>}*/
var resultSnippets = new Set(wordIndex.get(foldedQuery[0])?.keys() ?? []);
let firstLemmata = [foldedQuery[0]];
if (resultSnippets.size === 0 && foldedQuery.length == 1) {
let prefix = fold(queryWords.at(-1));
for (let [completion, leaves] of wordIndex) {
if (completion.startsWith(prefix)) {
firstLemmata.push(completion);
resultSnippets = resultSnippets.union(leaves);
}
}
}
for (var i = 1; i < foldedQuery.length; ++i) {
var rhs = new Set(wordIndex.get(foldedQuery[i])?.keys() ?? []);
let intersection = resultSnippets.intersection(rhs);
if (intersection.size === 0 && i == foldedQuery.length - 1) {
let prefix = fold(queryWords.at(-1));
for (let [completion, leaves] of wordIndex) {
if (completion.startsWith(prefix)) {
rhs = rhs.union(leaves);
}
}
resultSnippets = resultSnippets.intersection(rhs);
} else {
resultSnippets = intersection;
}
}
let pivots = firstLemmata.map(l => wordIndex.get(l)).filter(x => !!x);
let getPivot = (/**@type {string}*/s) => pivots.map(p => p.get(s)).filter(x => x !== undefined)[0];
let collator = new Intl.Collator("en");
resultSnippets = Array.from(resultSnippets).sort(
(left, right) => collator.compare(
left.substring(getPivot(left)) +
' \uFFFE ' +
left.substring(0, getPivot(left)),
right.substring(getPivot(right)) +
' \uFFFE ' +
right.substring(0, getPivot(right))));
for (let [property, propertyIndex] of indexEntries) {
/**@type {[number, number][]}*/
for (let snippet of resultSnippets) {
let entry = propertyIndex.get(snippet);
if (!entry) {
continue;
}
let entrySet = entry.characters;
if (superset(covered, entrySet)) {
continue;
}
rangeCount += entrySet.length;
covered = covered.concat(entrySet);
let pivot = getPivot(snippet);
let tail = snippet.substring(pivot);
result.push(entry.html.replace(
"[RESULT TEXT]",
"<span class=tail" +
(snippet.includes(",") ? " style=width:100%" : "") + ">" +
toHTML(tail) +
(pivot > 0 && !tail.endsWith(".") ? "," : "") +
"</span> " +
(pivot > 0 ? "<span class=head>" +
toHTML(snippet.substring(0, pivot)) +
"</span>"
: "")));
if (rangeCount >= maxResults) {
return {entries: result, rangeCount};
}
}
}
if (queryWords.length <= 1 && query.length > 0) {
let codePoints = [];
if (/^[0-9A-F]+$/ui.test(query)) {
codePoints.push(parseInt(query, 16));
}
if (/^.$/ui.test(query)) {
codePoints.push(query.codePointAt(0));
}
for (let cp of codePoints) {
var name = characterNames.get(cp);
if (!name) {
for (let [[first, last], n] of characterNameRanges) {
if (first <= cp && cp <= last) {
name = n;
break;
}
}
}
if (name) {
rangeCount += 1;
result.push(
(indexEntries.get("Name").get(name) ??
indexEntries.get("Name_Alias").get(name)).html.replace(
"[RESULT TEXT]", toHTML(name)));
}
}
} else if (queryWords.length == 1 && /^boop$/i.test(queryWords[0])) {
rangeCount += 1;
result.push(
indexEntries.get("Block").get("Betty").html.replace(
"[RESULT TEXT]", toHTML("Betty")));
} else if (queryWords.length == 1 && /^dood$/i.test(queryWords[0])) {
rangeCount += 1;
result.push(
indexEntries.get("Block").get("the").html.replace(
"[RESULT TEXT]", toHTML("the")));
}
return {entries: result, rangeCount};
}

function toHTML(/**@type {string}*/ plain) {
return plain.replaceAll("&", "&amp;")
.replaceAll("<", "&lt;")
.replaceAll(">", "&gt;")
}

function superset(/**@type {[number, number][]}*/left, /**@type {[number, number][]}*/right) {
var remaining = right.slice();
for (containingRange of left) {
remaining = remaining.flatMap(r => rangeMinus(r, containingRange));
}
if (remaining.length > 0) {
return false;
}
return true;
}

function rangeMinus(/**@type {[number, number]}*/left, /**@type {[number, number]}*/right) {
let intersection = rangeIntersection(left, right);
if (intersection === left || intersection === right) {
return [];
} else if (intersection === null) {
return [left];
} {
/**@type {[number, number][]}*/
let result = [];
if (left[0] < intersection[0]) {
result.push([left[0], intersection[0] - 1]);
}
if (left[1] > intersection[1]) {
result.push([intersection[1] + 1, left[1] - 1]);
}
return result;
}
}

function rangeIntersection(/**@type {[number, number]}*/left, /**@type {[number, number]}*/right) {
let [leftStart, leftEnd] = left;
let [rightStart, rightEnd] = right;
if (leftEnd < rightStart || rightEnd < leftStart) {
return null;
} else {
return [Math.max(leftStart, rightStart), Math.min(leftEnd, rightEnd)];
}
}

function fold(/**@type {string}*/ word) {
var folding = word.normalize("NFKC").toLowerCase();
return folding.replace("š", "sh");
}
Loading
Loading