Skip to content

Commit c052298

Browse files
authored
Regenerate DoNotEmit (#1246)
* argh * what a mess * Hopefully good enough * Typos from L2/24-144 * Regenerate UCD * After Markus’s review
1 parent bcd1dec commit c052298

File tree

10 files changed

+583
-14
lines changed

10 files changed

+583
-14
lines changed

UnicodeJsps/src/main/java/org/unicode/jsp/XPropertyFactory.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -423,6 +423,11 @@ protected String _getValue(int codepoint) {
423423
return Common.getXStringPropertyValue(fakeEnumValue, codepoint, NameChoice.LONG);
424424
}
425425

426+
@Override
427+
protected String _getValue(String string) {
428+
throw new UnsupportedOperationException();
429+
}
430+
426431
@Override
427432
protected List _getValueAliases(String valueAlias, List result) {
428433
addUnique("<string>", result);
@@ -466,6 +471,11 @@ protected List _getValueAliases(String valueAlias, List result) {
466471
return result;
467472
}
468473

474+
@Override
475+
protected String _getValue(String string) {
476+
throw new UnsupportedOperationException();
477+
}
478+
469479
@Override
470480
protected String _getVersion() {
471481
return VersionInfo.ICU_VERSION.toString();
@@ -642,6 +652,11 @@ protected String _getValue(int codepoint) {
642652
return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1];
643653
}
644654

655+
@Override
656+
protected String _getValue(String string) {
657+
return YESNO_ARRAY[unicodeSet.contains(string) ? 0 : 1];
658+
}
659+
645660
@Override
646661
protected List _getAvailableValues(List result) {
647662
return YESNO;

unicodetools/data/ucd/dev/DoNotEmit.txt

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
# DoNotEmit-17.0.0.txt
2-
# Date: 2025-08-14
1+
# DoNotEmit-18.0.0.txt
2+
# Date: 2025-11-25, 16:30:13 GMT
33
# © 2025 Unicode®, Inc.
44
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
55
# For terms of use and license, see https://www.unicode.org/terms_of_use.html
66
#
7-
# For documentation, see UAX #44: Unicode Character Database,
8-
# at https://www.unicode.org/reports/tr44/
7+
# Unicode Character Database
8+
# For documentation, see https://www.unicode.org/reports/tr44/
99
#
1010
# Do_Not_Emit
1111
#
@@ -343,9 +343,9 @@
343343
# ================================================
344344

345345
# Latin, from text in the "Latin" section of the core specification, the NamesList, and the uppercase mapping
346+
0131 0307; 0069 0307; Dotless_Form # LATIN SMALL LETTER DOTLESS I, COMBINING DOT ABOVE; LATIN SMALL LETTER I, COMBINING DOT ABOVE
346347
0140; 006C 00B7; Preferred_Spelling # LATIN SMALL LETTER L WITH MIDDLE DOT; LATIN SMALL LETTER L, MIDDLE DOT
347348
0149; 2019 006E; Deprecated # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE; RIGHT SINGLE QUOTATION MARK, LATIN SMALL LETTER N
348-
0131 0307; 0069 0307; Dotless_Form # LATIN SMALL LETTER DOTLESS I, COMBINING DOT ABOVE; LATIN SMALL LETTER I, COMBINING DOT ABOVE
349349
0237 0307; 006A 0307; Dotless_Form # LATIN SMALL LETTER DOTLESS J, COMBINING DOT ABOVE; LATIN SMALL LETTER J, COMBINING DOT ABOVE
350350
# Characters with overstruck tilde for which a precomposed form exists,
351351
# but the sequences are not canonically equivalent
@@ -402,22 +402,22 @@
402402
0281 0321; 1DF36; Precomposed_Form # LATIN LETTER SMALL CAPITAL INVERTED R, COMBINING PALATALIZED HOOK BELOW; LATIN LETTER SMALL CAPITAL INVERTED R WITH PALATAL HOOK
403403
0283 0321; 1D8B; Precomposed_Form # LATIN SMALL LETTER ESH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER ESH WITH PALATAL HOOK
404404
028B 0321; 1DF39; Precomposed_Form # LATIN SMALL LETTER V WITH HOOK, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER V WITH HOOK AND PALATAL HOOK
405-
0295 0321; 1DF3A; Precomposed_Form # LATIN LETTER PHARYNGEAL VOICED FRICATIVE, COMBINING PALATALIZED HOOK BELOW; LATIN LETTER PHARYNGEAL VOICED FRICATIVE WITH PALATAL HOOK
406405
0292 0321; 1DF18; Precomposed_Form # LATIN SMALL LETTER EZH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER EZH WITH PALATAL HOOK
406+
0295 0321; 1DF3A; Precomposed_Form # LATIN LETTER PHARYNGEAL VOICED FRICATIVE, COMBINING PALATALIZED HOOK BELOW; LATIN LETTER PHARYNGEAL VOICED FRICATIVE WITH PALATAL HOOK
407407
02A3 0321; 1DF2E; Precomposed_Form # LATIN SMALL LETTER DZ DIGRAPH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER DZ DIGRAPH WITH PALATAL HOOK
408408
02A4 0321; 1DF12; Precomposed_Form # LATIN SMALL LETTER DEZH DIGRAPH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER DEZH DIGRAPH WITH PALATAL HOOK
409409
02A6 0321; 1DF38; Precomposed_Form # LATIN SMALL LETTER TS DIGRAPH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER TS DIGRAPH WITH PALATAL HOOK
410410
02A7 0321; 1DF17; Precomposed_Form # LATIN SMALL LETTER TESH DIGRAPH, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER TESH DIGRAPH WITH PALATAL HOOK
411-
02B0 0321; 1DF26; Precomposed_Form # MODIFIER LETTER SMALL H, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL H WITH PALATAL HOOK
411+
02B0 0321; 1DFF6; Precomposed_Form # MODIFIER LETTER SMALL H, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL H WITH PALATAL HOOK
412412
02E1 0321; 1DAA; Precomposed_Form # MODIFIER LETTER SMALL L, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL L WITH PALATAL HOOK
413-
02E2 0321; 1DF28; Precomposed_Form # MODIFIER LETTER SMALL S, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL S WITH PALATAL HOOK
414-
03B2 0321; 1DF3B; Precomposed_Form # GREEK SMALL LETTER BETA, COMBINING PALATALIZED HOOK BELOW; GREEK SMALL LETTER BETA WITH PALATAL HOOK
415-
03B8 0321; 1DF3C; Precomposed_Form # GREEK SMALL LETTER THETA, COMBINING PALATALIZED HOOK BELOW; GREEK SMALL LETTER THETA WITH PALATAL HOOK
416-
03C7 0321; 1DF3D; Precomposed_Form # GREEK SMALL LETTER CHI, COMBINING PALATALIZED HOOK BELOW; GREEK SMALL LETTER CHI WITH PALATAL HOOK
417-
1D48 0321; 1DF25; Precomposed_Form # MODIFIER LETTER SMALL D, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL D WITH PALATAL HOOK
413+
02E2 0321; 1DFF8; Precomposed_Form # MODIFIER LETTER SMALL S, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL S WITH PALATAL HOOK
414+
03B2 0321; 1DF3B; Precomposed_Form # GREEK SMALL LETTER BETA, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER BETA WITH PALATAL HOOK
415+
03B8 0321; 1DF3C; Precomposed_Form # GREEK SMALL LETTER THETA, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER THETA WITH PALATAL HOOK
416+
03C7 0321; 1DF3D; Precomposed_Form # GREEK SMALL LETTER CHI, COMBINING PALATALIZED HOOK BELOW; LATIN SMALL LETTER CHI WITH PALATAL HOOK
417+
1D48 0321; 1DFF5; Precomposed_Form # MODIFIER LETTER SMALL D, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL D WITH PALATAL HOOK
418418
1D57 0321; 1DB5; Precomposed_Form # MODIFIER LETTER SMALL T, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL T WITH PALATAL HOOK
419-
1DBB 0321; 1DF29; Precomposed_Form # MODIFIER LETTER SMALL Z, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL Z WITH PALATAL HOOK
420-
207F 0321; 1DF27; Precomposed_Form # SUPERSCRIPT LATIN SMALL LETTER N, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL N WITH PALATAL HOOK
419+
1DBB 0321; 1DFF9; Precomposed_Form # MODIFIER LETTER SMALL Z, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL Z WITH PALATAL HOOK
420+
207F 0321; 1DFF7; Precomposed_Form # SUPERSCRIPT LATIN SMALL LETTER N, COMBINING PALATALIZED HOOK BELOW; MODIFIER LETTER SMALL N WITH PALATAL HOOK
421421
# Characters with retroflex hook for which a precomposed form exists,
422422
# but the sequences are not canonically equivalent
423423
0052 0322; 2C64; Precomposed_Form # LATIN CAPITAL LETTER R, COMBINING RETROFLEX HOOK BELOW; LATIN CAPITAL LETTER R WITH TAIL

unicodetools/src/main/java/org/unicode/jsp/ICUPropertyFactory.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,11 @@ public String _getValue(int codePoint) {
158158
return null;
159159
}
160160

161+
@Override
162+
protected String _getValue(String string) {
163+
throw new UnsupportedOperationException();
164+
}
165+
161166
private String getAge(int codePoint) {
162167
String temp = UCharacter.getAge(codePoint).toString();
163168
if (temp.equals("0.0.0.0")) return "unassigned";

unicodetools/src/main/java/org/unicode/props/IndexUnicodeProperties.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -812,6 +812,11 @@ protected String _getValue(int codepoint) {
812812
return resolveValue(result, codepoint);
813813
}
814814

815+
@Override
816+
protected String _getValue(String string) {
817+
return _getRawUnicodeMap().get(string);
818+
}
819+
815820
@Override
816821
public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
817822
if (baseVersionProperties == null) {

unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,26 @@ public String getValue(int codepoint) {
288288
return _getValue(codepoint);
289289
}
290290

291+
public Iterable<String> getValues(String string) {
292+
String value = _getValue(string);
293+
return isMultivalued && value != null
294+
? delimiterSplitter.split(value)
295+
: Collections.singleton(value);
296+
}
297+
298+
public String getValue(String string) {
299+
final var it = getValues(string).iterator();
300+
final var result = it.next();
301+
if (it.hasNext()) {
302+
throw new IllegalArgumentException(
303+
name
304+
+ ": getValue("
305+
+ string
306+
+ ") but the property is multivalued for that string");
307+
}
308+
return result;
309+
}
310+
291311
// public String getValue(int codepoint, boolean isShort) {
292312
// return getValue(codepoint);
293313
// }
@@ -331,6 +351,8 @@ public List<String> getAvailableValues(List<String> result) {
331351

332352
protected abstract String _getValue(int codepoint);
333353

354+
protected abstract String _getValue(String string);
355+
334356
protected abstract List<String> _getNameAliases(List<String> result);
335357

336358
protected abstract List<String> _getValueAliases(String valueAlias, List<String> result);
@@ -1205,6 +1227,11 @@ public String _getValue(int codepoint) {
12051227
return filter.remap(property.getValue(codepoint));
12061228
}
12071229

1230+
@Override
1231+
public String _getValue(String string) {
1232+
return filter.remap(property.getValue(string));
1233+
}
1234+
12081235
@Override
12091236
public List<String> _getValueAliases(String valueAlias, List<String> result) {
12101237
if (backmap == null) {
@@ -1551,6 +1578,11 @@ private void _addToValues(String item, String alias) {
15511578
_ensureValueInAliases(item);
15521579
addValueAlias(item, alias, AliasAddAction.REQUIRE_MAIN_ALIAS);
15531580
}
1581+
1582+
@Override
1583+
protected String _getValue(String string) {
1584+
throw new UnsupportedOperationException();
1585+
}
15541586
/* public String _getVersion() {
15551587
return version;
15561588
}
@@ -1602,6 +1634,11 @@ protected String _getValue(int codepoint) {
16021634
return unicodeMap.getValue(codepoint);
16031635
}
16041636

1637+
@Override
1638+
public String _getValue(String string) {
1639+
return unicodeMap.getValue(string);
1640+
}
1641+
16051642
/* protected List _getValueAliases(String valueAlias, List result) {
16061643
if (!unicodeMap.getAvailableValues().contains(valueAlias)) return result;
16071644
result.add(valueAlias);
@@ -1758,6 +1795,11 @@ protected String _getValue(int codepoint) {
17581795
return YESNO_ARRAY[unicodeSet.contains(codepoint) ? 0 : 1];
17591796
}
17601797

1798+
@Override
1799+
public String _getValue(String string) {
1800+
return YESNO_ARRAY[unicodeSet.contains(string) ? 0 : 1];
1801+
}
1802+
17611803
@Override
17621804
protected List<String> _getAvailableValues(List<String> result) {
17631805
return YESNO;

0 commit comments

Comments
 (0)