Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unicode/EBCDIC decode fixes and validator functions #4874

Closed
wants to merge 9 commits into from
Closed
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Update Unicode tables.
Rot127 committed Feb 1, 2025
commit 52a612e43fb0733dee77ddf9d8acddf7768476cf
542 changes: 354 additions & 188 deletions librz/util/utf8.c
Original file line number Diff line number Diff line change
@@ -1,204 +1,313 @@
// SPDX-FileCopyrightText: 2014-2018 LemonBoy <thatlemon@gmail.com>
// SPDX-FileCopyrightText: 2014-2018 kazarmy <kazarmy@gmail.com>
// SPDX-FileCopyrightText: 2014-2018 pancake <pancake@nopcode.org>
// SPDX-FileCopyrightText: 2025 Rot127 <unisono@quyllur.org>
// SPDX-License-Identifier: LGPL-3.0-only

#include <rz_types.h>
#include <rz_util.h>
#include <rz_windows.h>

#define UTF_LAST_BLOCK (281)
#define UTF_BLOCKS_COUNT RZ_ARRAY_SIZE(utf_blocks)
#define UTF_NONPRINTABLE_RANGES_COUNT RZ_ARRAY_SIZE(nonprintable_ranges)

/**
* \brief All ranges of control, private or similar character categories.
* Those are all ranges/characters in UnicodeData.txt with a General_Category
* (at index 2) starting with 'C'.
* The undefined ones were generated with a script.
*
* References:
* - https://www.unicode.org/Public/UNIDATA/UnicodeData.txt
* - https://www.unicode.org/reports/tr44/#General_Category_Values
*/
const struct {
ut32 from, to;
} nonprintable_ranges[] = {
{ 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x034F, 0x034F },
{ 0x0378, 0x0379 }, { 0x037F, 0x0383 }, { 0x038B, 0x038B },
{ 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, { 0x0528, 0x0530 },
{ 0x0557, 0x0558 }, { 0x0560, 0x0560 }, { 0x0588, 0x0588 },
{ 0x058B, 0x058E }, { 0x0590, 0x0590 }, { 0x05C8, 0x05CF },
{ 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, { 0x061C, 0x061D },
{ 0x06DD, 0x06DD }, { 0x070E, 0x070F }, { 0x074B, 0x074C },
{ 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, { 0x082E, 0x082F },
{ 0x083F, 0x083F }, { 0x085C, 0x085D }, { 0x085F, 0x089F },
{ 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, { 0x08FF, 0x08FF },
{ 0x0978, 0x0978 }, { 0x0980, 0x0980 }, { 0x0984, 0x0984 },
{ 0x098D, 0x098E }, { 0x0991, 0x0992 }, { 0x09A9, 0x09A9 },
{ 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, { 0x09BA, 0x09BB },
{ 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, { 0x09CF, 0x09D6 },
{ 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, { 0x09E4, 0x09E5 },
{ 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, { 0x0A0B, 0x0A0E },
{ 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, { 0x0A31, 0x0A31 },
{ 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, { 0x0A3A, 0x0A3B },
{ 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, { 0x0A49, 0x0A4A },
{ 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, { 0x0A5D, 0x0A5D },
{ 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, { 0x0A84, 0x0A84 },
{ 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, { 0x0AA9, 0x0AA9 },
{ 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, { 0x0ABA, 0x0ABB },
{ 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, { 0x0ACE, 0x0ACF },
{ 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, { 0x0AF2, 0x0B00 },
{ 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, { 0x0B11, 0x0B12 },
{ 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, { 0x0B34, 0x0B34 },
{ 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, { 0x0B49, 0x0B4A },
{ 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, { 0x0B5E, 0x0B5E },
{ 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, { 0x0B84, 0x0B84 },
{ 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, { 0x0B96, 0x0B98 },
{ 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, { 0x0BA0, 0x0BA2 },
{ 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, { 0x0BBA, 0x0BBD },
{ 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, { 0x0BCE, 0x0BCF },
{ 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, { 0x0BFB, 0x0C00 },
{ 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, { 0x0C11, 0x0C11 },
{ 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, { 0x0C3A, 0x0C3C },
{ 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, { 0x0C4E, 0x0C54 },
{ 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, { 0x0C64, 0x0C65 },
{ 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, { 0x0C84, 0x0C84 },
{ 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, { 0x0CA9, 0x0CA9 },
{ 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, { 0x0CC5, 0x0CC5 },
{ 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, { 0x0CD7, 0x0CDD },
{ 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, { 0x0CF0, 0x0CF0 },
{ 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, { 0x0D0D, 0x0D0D },
{ 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, { 0x0D45, 0x0D45 },
{ 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, { 0x0D58, 0x0D5F },
{ 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, { 0x0D80, 0x0D81 },
{ 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, { 0x0DB2, 0x0DB2 },
{ 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, { 0x0DC7, 0x0DC9 },
{ 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, { 0x0DD7, 0x0DD7 },
{ 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, { 0x0E3B, 0x0E3E },
{ 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, { 0x0E85, 0x0E86 },
{ 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, { 0x0E8E, 0x0E93 },
{ 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, { 0x0EA4, 0x0EA4 },
{ 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, { 0x0EAC, 0x0EAC },
{ 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, { 0x0EC5, 0x0EC5 },
{ 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, { 0x0EDA, 0x0EDB },
{ 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, { 0x0F6D, 0x0F70 },
{ 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, { 0x0FCD, 0x0FCD },
{ 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, { 0x10C8, 0x10CC },
{ 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, { 0x1249, 0x1249 },
{ 0x124E, 0x124F }, { 0x1257, 0x1257 }, { 0x1259, 0x1259 },
{ 0x125E, 0x125F }, { 0x1289, 0x1289 }, { 0x128E, 0x128F },
{ 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, { 0x12BF, 0x12BF },
{ 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, { 0x12D7, 0x12D7 },
{ 0x1311, 0x1311 }, { 0x1316, 0x1317 }, { 0x135B, 0x135C },
{ 0x137D, 0x137F }, { 0x139A, 0x139F }, { 0x13F5, 0x13FF },
{ 0x169D, 0x169F }, { 0x16F1, 0x16FF }, { 0x170D, 0x170D },
{ 0x1715, 0x171F }, { 0x1737, 0x173F }, { 0x1754, 0x175F },
{ 0x176D, 0x176D }, { 0x1771, 0x1771 }, { 0x1774, 0x177F },
{ 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, { 0x17EA, 0x17EF },
{ 0x17FA, 0x17FF }, { 0x180B, 0x180D }, { 0x180F, 0x180F },
{ 0x181A, 0x181F }, { 0x1878, 0x187F }, { 0x18AB, 0x18AF },
{ 0x18F6, 0x18FF }, { 0x191D, 0x191F }, { 0x192C, 0x192F },
{ 0x193C, 0x193F }, { 0x1941, 0x1943 }, { 0x196E, 0x196F },
{ 0x1975, 0x197F }, { 0x19AC, 0x19AF }, { 0x19CA, 0x19CF },
{ 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, { 0x1A5F, 0x1A5F },
{ 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, { 0x1A9A, 0x1A9F },
{ 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, { 0x1B7D, 0x1B7F },
{ 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, { 0x1C4A, 0x1C4C },
{ 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, { 0x1CF7, 0x1CFF },
{ 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, { 0x1F1E, 0x1F1F },
{ 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, { 0x1F58, 0x1F58 },
{ 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, { 0x1F5E, 0x1F5E },
{ 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, { 0x1FC5, 0x1FC5 },
{ 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, { 0x1FF0, 0x1FF1 },
{ 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, { 0x200B, 0x200F },
{ 0x202A, 0x202E }, { 0x2060, 0x206F }, { 0x2072, 0x2073 },
{ 0x208F, 0x208F }, { 0x209D, 0x209F }, { 0x20BB, 0x20CF },
{ 0x20F1, 0x20FF }, { 0x218A, 0x218F }, { 0x23F4, 0x23FF },
{ 0x2427, 0x243F }, { 0x244B, 0x245F }, { 0x2700, 0x2700 },
{ 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, { 0x2C2F, 0x2C2F },
{ 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, { 0x2D26, 0x2D26 },
{ 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, { 0x2D68, 0x2D6E },
{ 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, { 0x2DA7, 0x2DA7 },
{ 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, { 0x2DBF, 0x2DBF },
{ 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, { 0x2DD7, 0x2DD7 },
{ 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, { 0x2E9A, 0x2E9A },
{ 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF },
{ 0x3040, 0x3040 }, { 0x3097, 0x3098 }, { 0x3100, 0x3104 },
{ 0x312E, 0x3130 }, { 0x3164, 0x3164 }, { 0x318F, 0x318F },
{ 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, { 0x321F, 0x321F },
{ 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, { 0x9FCD, 0x9FFF },
{ 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, { 0xA62C, 0xA63F },
{ 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, { 0xA78F, 0xA78F },
{ 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, { 0xA82C, 0xA82F },
{ 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, { 0xA8C5, 0xA8CD },
{ 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, { 0xA954, 0xA95E },
{ 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, { 0xA9DA, 0xA9DD },
{ 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, { 0xAA4E, 0xAA4F },
{ 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, { 0xAAC3, 0xAADA },
{ 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, { 0xAB0F, 0xAB10 },
{ 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, { 0xAB2F, 0xABBF },
{ 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, { 0xD7A4, 0xD7AF },
{ 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, { 0xFA6E, 0xFA6F },
{ 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, { 0xFB18, 0xFB1C },
{ 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, { 0xFB3F, 0xFB3F },
{ 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, { 0xFBC2, 0xFBD2 },
{ 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, { 0xFDC8, 0xFDEF },
{ 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, { 0xFE27, 0xFE2F },
{ 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, { 0xFE6C, 0xFE6F },
{ 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, { 0xFF00, 0xFF00 },
{ 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, { 0xFFC8, 0xFFC9 },
{ 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, { 0xFFDD, 0xFFDF },
{ 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, { 0xFFFE, 0xFFFF },
{ 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, { 0x1003B, 0x1003B },
{ 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, { 0x1005E, 0x1007F },
{ 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, { 0x10134, 0x10136 },
{ 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, { 0x101FE, 0x1027F },
{ 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, { 0x1031F, 0x1031F },
{ 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, { 0x1039E, 0x1039E },
{ 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, { 0x1049E, 0x1049F },
{ 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, { 0x10809, 0x10809 },
{ 0x10836, 0x10836 }, { 0x10839, 0x1083B }, { 0x1083D, 0x1083E },
{ 0x10856, 0x10856 }, { 0x10860, 0x108FF }, { 0x1091C, 0x1091E },
{ 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, { 0x109B8, 0x109BD },
{ 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, { 0x10A07, 0x10A0B },
{ 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, { 0x10A34, 0x10A37 },
{ 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, { 0x10A59, 0x10A5F },
{ 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, { 0x10B56, 0x10B57 },
{ 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, { 0x10C49, 0x10E5F },
{ 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, { 0x11070, 0x1107F },
{ 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, { 0x110E9, 0x110EF },
{ 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, { 0x11144, 0x1117F },
{ 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, { 0x116B8, 0x116BF },
{ 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, { 0x12463, 0x1246F },
{ 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, { 0x16A39, 0x16EFF },
{ 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, { 0x16FA0, 0x1AFFF },
{ 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, { 0x1D127, 0x1D128 },
{ 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, { 0x1D246, 0x1D2FF },
{ 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, { 0x1D455, 0x1D455 },
{ 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, { 0x1D4A3, 0x1D4A4 },
{ 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, { 0x1D4BA, 0x1D4BA },
{ 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, { 0x1D506, 0x1D506 },
{ 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, { 0x1D51D, 0x1D51D },
{ 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, { 0x1D545, 0x1D545 },
{ 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, { 0x1D6A6, 0x1D6A7 },
{ 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, { 0x1EE04, 0x1EE04 },
{ 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, { 0x1EE25, 0x1EE26 },
{ 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, { 0x1EE38, 0x1EE38 },
{ 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, { 0x1EE43, 0x1EE46 },
{ 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, { 0x1EE4C, 0x1EE4C },
{ 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, { 0x1EE55, 0x1EE56 },
{ 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, { 0x1EE5C, 0x1EE5C },
{ 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, { 0x1EE63, 0x1EE63 },
{ 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, { 0x1EE73, 0x1EE73 },
{ 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, { 0x1EE7F, 0x1EE7F },
{ 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, { 0x1EEA4, 0x1EEA4 },
{ 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, { 0x1EEF2, 0x1EFFF },
{ 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, { 0x1F0AF, 0x1F0B0 },
{ 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, { 0x1F0E0, 0x1F0FF },
{ 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F },
{ 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, { 0x1F23B, 0x1F23F },
{ 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, { 0x1F321, 0x1F32F },
{ 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, { 0x1F394, 0x1F39F },
{ 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, { 0x1F3F1, 0x1F3FF },
{ 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, { 0x1F4F8, 0x1F4F8 },
{ 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, { 0x1F544, 0x1F54F },
{ 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, { 0x1F650, 0x1F67F },
{ 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, { 0x2A6D7, 0x2A6FF },
{ 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, { 0x2FA1E, 0xF0000 },
{ 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF }, { 0x110000, 0xFFFFFFFF }
{ 0x0000, 0x001F }, // <control>
{ 0x007F, 0x009F }, // <control>
{ 0x00AD, 0x00AD }, // SOFT HYPHEN
{ 0x0600, 0x0605 }, // ARABIC NUMBER MARK ABOVE
{ 0x061C, 0x061C }, // ARABIC LETTER MARK
{ 0x06DD, 0x06DD }, // ARABIC END OF AYAH
{ 0x070F, 0x070F }, // SYRIAC ABBREVIATION MARK
{ 0x0890, 0x0891 }, // ARABIC POUND MARK ABOVE
{ 0x08E2, 0x08E2 }, // ARABIC DISPUTED END OF AYAH
{ 0x180E, 0x180E }, // MONGOLIAN VOWEL SEPARATOR
{ 0x200B, 0x200F }, // ZERO WIDTH SPACE
{ 0x202A, 0x202E }, // LEFT-TO-RIGHT EMBEDDING
{ 0x2060, 0x2064 }, // WORD JOINER
{ 0x2066, 0x206F }, // LEFT-TO-RIGHT ISOLATE
{ 0xD800, 0xDB7F }, // Non Private Use High Surrogate
{ 0xDB80, 0xDBFF }, // Private Use High Surrogate
{ 0xDC00, 0xDFFF }, // ow Surrogate
{ 0xE000, 0xF8FF }, // Private Use
{ 0xFEFF, 0xFEFF }, // ZERO WIDTH NO-BREAK SPACE
{ 0xFFF9, 0xFFFB }, // INTERLINEAR ANNOTATION ANCHOR
{ 0x110BD, 0x110BD }, // KAITHI NUMBER SIGN
{ 0x110CD, 0x110CD }, // KAITHI NUMBER SIGN ABOVE
{ 0x13430, 0x1343F }, // EGYPTIAN HIEROGLYPH VERTICAL JOINER
{ 0x1BCA0, 0x1BCA3 }, // SHORTHAND FORMAT LETTER OVERLAP
{ 0x1D173, 0x1D17A }, // MUSICAL SYMBOL BEGIN BEAM
{ 0xE0001, 0xE0001 }, // LANGUAGE TAG
{ 0xE0020, 0xE007F }, // TAGS
{ 0xF0000, 0xFFFFD }, // Plane 15 Private Use
{ 0x100000, 0x10FFFD }, // Plane 16 Private Use
{ 0x378, 0x379 }, { 0x380, 0x383 }, { 0x38b, 0x38b }, // Undefined
{ 0x38d, 0x38d }, { 0x3a2, 0x3a2 }, { 0x530, 0x530 }, // Undefined
{ 0x557, 0x558 }, { 0x58b, 0x58c }, { 0x590, 0x590 }, // Undefined
{ 0x5c8, 0x5cf }, { 0x5eb, 0x5ee }, { 0x5f5, 0x5ff }, // Undefined
{ 0x70e, 0x70e }, { 0x74b, 0x74c }, { 0x7b2, 0x7bf }, // Undefined
{ 0x7fb, 0x7fc }, { 0x82e, 0x82f }, { 0x83f, 0x83f }, // Undefined
{ 0x85c, 0x85d }, { 0x85f, 0x85f }, { 0x86b, 0x86f }, // Undefined
{ 0x88f, 0x88f }, { 0x892, 0x896 }, { 0x984, 0x984 }, // Undefined
{ 0x98d, 0x98e }, { 0x991, 0x992 }, { 0x9a9, 0x9a9 }, // Undefined
{ 0x9b1, 0x9b1 }, { 0x9b3, 0x9b5 }, { 0x9ba, 0x9bb }, // Undefined
{ 0x9c5, 0x9c6 }, { 0x9c9, 0x9ca }, { 0x9cf, 0x9d6 }, // Undefined
{ 0x9d8, 0x9db }, { 0x9de, 0x9de }, { 0x9e4, 0x9e5 }, // Undefined
{ 0x9ff, 0xa00 }, { 0xa04, 0xa04 }, { 0xa0b, 0xa0e }, // Undefined
{ 0xa11, 0xa12 }, { 0xa29, 0xa29 }, { 0xa31, 0xa31 }, // Undefined
{ 0xa34, 0xa34 }, { 0xa37, 0xa37 }, { 0xa3a, 0xa3b }, // Undefined
{ 0xa3d, 0xa3d }, { 0xa43, 0xa46 }, { 0xa49, 0xa4a }, // Undefined
{ 0xa4e, 0xa50 }, { 0xa52, 0xa58 }, { 0xa5d, 0xa5d }, // Undefined
{ 0xa5f, 0xa65 }, { 0xa77, 0xa80 }, { 0xa84, 0xa84 }, // Undefined
{ 0xa8e, 0xa8e }, { 0xa92, 0xa92 }, { 0xaa9, 0xaa9 }, // Undefined
{ 0xab1, 0xab1 }, { 0xab4, 0xab4 }, { 0xaba, 0xabb }, // Undefined
{ 0xac6, 0xac6 }, { 0xaca, 0xaca }, { 0xace, 0xacf }, // Undefined
{ 0xad1, 0xadf }, { 0xae4, 0xae5 }, { 0xaf2, 0xaf8 }, // Undefined
{ 0xb00, 0xb00 }, { 0xb04, 0xb04 }, { 0xb0d, 0xb0e }, // Undefined
{ 0xb11, 0xb12 }, { 0xb29, 0xb29 }, { 0xb31, 0xb31 }, // Undefined
{ 0xb34, 0xb34 }, { 0xb3a, 0xb3b }, { 0xb45, 0xb46 }, // Undefined
{ 0xb49, 0xb4a }, { 0xb4e, 0xb54 }, { 0xb58, 0xb5b }, // Undefined
{ 0xb5e, 0xb5e }, { 0xb64, 0xb65 }, { 0xb78, 0xb81 }, // Undefined
{ 0xb84, 0xb84 }, { 0xb8b, 0xb8d }, { 0xb91, 0xb91 }, // Undefined
{ 0xb96, 0xb98 }, { 0xb9b, 0xb9b }, { 0xb9d, 0xb9d }, // Undefined
{ 0xba0, 0xba2 }, { 0xba5, 0xba7 }, { 0xbab, 0xbad }, // Undefined
{ 0xbba, 0xbbd }, { 0xbc3, 0xbc5 }, { 0xbc9, 0xbc9 }, // Undefined
{ 0xbce, 0xbcf }, { 0xbd1, 0xbd6 }, { 0xbd8, 0xbe5 }, // Undefined
{ 0xbfb, 0xbff }, { 0xc0d, 0xc0d }, { 0xc11, 0xc11 }, // Undefined
{ 0xc29, 0xc29 }, { 0xc3a, 0xc3b }, { 0xc45, 0xc45 }, // Undefined
{ 0xc49, 0xc49 }, { 0xc4e, 0xc54 }, { 0xc57, 0xc57 }, // Undefined
{ 0xc5b, 0xc5c }, { 0xc5e, 0xc5f }, { 0xc64, 0xc65 }, // Undefined
{ 0xc70, 0xc76 }, { 0xc8d, 0xc8d }, { 0xc91, 0xc91 }, // Undefined
{ 0xca9, 0xca9 }, { 0xcb4, 0xcb4 }, { 0xcba, 0xcbb }, // Undefined
{ 0xcc5, 0xcc5 }, { 0xcc9, 0xcc9 }, { 0xcce, 0xcd4 }, // Undefined
{ 0xcd7, 0xcdc }, { 0xcdf, 0xcdf }, { 0xce4, 0xce5 }, // Undefined
{ 0xcf0, 0xcf0 }, { 0xcf4, 0xcff }, { 0xd0d, 0xd0d }, // Undefined
{ 0xd11, 0xd11 }, { 0xd45, 0xd45 }, { 0xd49, 0xd49 }, // Undefined
{ 0xd50, 0xd53 }, { 0xd64, 0xd65 }, { 0xd80, 0xd80 }, // Undefined
{ 0xd84, 0xd84 }, { 0xd97, 0xd99 }, { 0xdb2, 0xdb2 }, // Undefined
{ 0xdbc, 0xdbc }, { 0xdbe, 0xdbf }, { 0xdc7, 0xdc9 }, // Undefined
{ 0xdcb, 0xdce }, { 0xdd5, 0xdd5 }, { 0xdd7, 0xdd7 }, // Undefined
{ 0xde0, 0xde5 }, { 0xdf0, 0xdf1 }, { 0xdf5, 0xe00 }, // Undefined
{ 0xe3b, 0xe3e }, { 0xe5c, 0xe80 }, { 0xe83, 0xe83 }, // Undefined
{ 0xe85, 0xe85 }, { 0xe8b, 0xe8b }, { 0xea4, 0xea4 }, // Undefined
{ 0xea6, 0xea6 }, { 0xebe, 0xebf }, { 0xec5, 0xec5 }, // Undefined
{ 0xec7, 0xec7 }, { 0xecf, 0xecf }, { 0xeda, 0xedb }, // Undefined
{ 0xee0, 0xeff }, { 0xf48, 0xf48 }, { 0xf6d, 0xf70 }, // Undefined
{ 0xf98, 0xf98 }, { 0xfbd, 0xfbd }, { 0xfcd, 0xfcd }, // Undefined
{ 0xfdb, 0xfff }, { 0x10c6, 0x10c6 }, { 0x10c8, 0x10cc }, // Undefined
{ 0x10ce, 0x10cf }, { 0x1249, 0x1249 }, { 0x124e, 0x124f }, // Undefined
{ 0x1257, 0x1257 }, { 0x1259, 0x1259 }, { 0x125e, 0x125f }, // Undefined
{ 0x1289, 0x1289 }, { 0x128e, 0x128f }, { 0x12b1, 0x12b1 }, // Undefined
{ 0x12b6, 0x12b7 }, { 0x12bf, 0x12bf }, { 0x12c1, 0x12c1 }, // Undefined
{ 0x12c6, 0x12c7 }, { 0x12d7, 0x12d7 }, { 0x1311, 0x1311 }, // Undefined
{ 0x1316, 0x1317 }, { 0x135b, 0x135c }, { 0x137d, 0x137f }, // Undefined
{ 0x139a, 0x139f }, { 0x13f6, 0x13f7 }, { 0x13fe, 0x13ff }, // Undefined
{ 0x169d, 0x169f }, { 0x16f9, 0x16ff }, { 0x1716, 0x171e }, // Undefined
{ 0x1737, 0x173f }, { 0x1754, 0x175f }, { 0x176d, 0x176d }, // Undefined
{ 0x1771, 0x1771 }, { 0x1774, 0x177f }, { 0x17de, 0x17df }, // Undefined
{ 0x17ea, 0x17ef }, { 0x17fa, 0x17ff }, { 0x181a, 0x181f }, // Undefined
{ 0x1879, 0x187f }, { 0x18ab, 0x18af }, { 0x18f6, 0x18ff }, // Undefined
{ 0x191f, 0x191f }, { 0x192c, 0x192f }, { 0x193c, 0x193f }, // Undefined
{ 0x1941, 0x1943 }, { 0x196e, 0x196f }, { 0x1975, 0x197f }, // Undefined
{ 0x19ac, 0x19af }, { 0x19ca, 0x19cf }, { 0x19db, 0x19dd }, // Undefined
{ 0x1a1c, 0x1a1d }, { 0x1a5f, 0x1a5f }, { 0x1a7d, 0x1a7e }, // Undefined
{ 0x1a8a, 0x1a8f }, { 0x1a9a, 0x1a9f }, { 0x1aae, 0x1aaf }, // Undefined
{ 0x1acf, 0x1aff }, { 0x1b4d, 0x1b4d }, { 0x1bf4, 0x1bfb }, // Undefined
{ 0x1c38, 0x1c3a }, { 0x1c4a, 0x1c4c }, { 0x1c8b, 0x1c8f }, // Undefined
{ 0x1cbb, 0x1cbc }, { 0x1cc8, 0x1ccf }, { 0x1cfb, 0x1cff }, // Undefined
{ 0x1f16, 0x1f17 }, { 0x1f1e, 0x1f1f }, { 0x1f46, 0x1f47 }, // Undefined
{ 0x1f4e, 0x1f4f }, { 0x1f58, 0x1f58 }, { 0x1f5a, 0x1f5a }, // Undefined
{ 0x1f5c, 0x1f5c }, { 0x1f5e, 0x1f5e }, { 0x1f7e, 0x1f7f }, // Undefined
{ 0x1fb5, 0x1fb5 }, { 0x1fc5, 0x1fc5 }, { 0x1fd4, 0x1fd5 }, // Undefined
{ 0x1fdc, 0x1fdc }, { 0x1ff0, 0x1ff1 }, { 0x1ff5, 0x1ff5 }, // Undefined
{ 0x1fff, 0x1fff }, { 0x2065, 0x2065 }, { 0x2072, 0x2073 }, // Undefined
{ 0x208f, 0x208f }, { 0x209d, 0x209f }, { 0x20c1, 0x20cf }, // Undefined
{ 0x20f1, 0x20ff }, { 0x218c, 0x218f }, { 0x242a, 0x243f }, // Undefined
{ 0x244b, 0x245f }, { 0x2b74, 0x2b75 }, { 0x2b96, 0x2b96 }, // Undefined
{ 0x2cf4, 0x2cf8 }, { 0x2d26, 0x2d26 }, { 0x2d28, 0x2d2c }, // Undefined
{ 0x2d2e, 0x2d2f }, { 0x2d68, 0x2d6e }, { 0x2d71, 0x2d7e }, // Undefined
{ 0x2d97, 0x2d9f }, { 0x2da7, 0x2da7 }, { 0x2daf, 0x2daf }, // Undefined
{ 0x2db7, 0x2db7 }, { 0x2dbf, 0x2dbf }, { 0x2dc7, 0x2dc7 }, // Undefined
{ 0x2dcf, 0x2dcf }, { 0x2dd7, 0x2dd7 }, { 0x2ddf, 0x2ddf }, // Undefined
{ 0x2e5e, 0x2e7f }, { 0x2e9a, 0x2e9a }, { 0x2ef4, 0x2eff }, // Undefined
{ 0x2fd6, 0x2fef }, { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, // Undefined
{ 0x3100, 0x3104 }, { 0x3130, 0x3130 }, { 0x318f, 0x318f }, // Undefined
{ 0x31e6, 0x31ee }, { 0x321f, 0x321f }, { 0x3401, 0x4dbe }, // Undefined
{ 0x4e01, 0x9ffe }, { 0xa48d, 0xa48f }, { 0xa4c7, 0xa4cf }, // Undefined
{ 0xa62c, 0xa63f }, { 0xa6f8, 0xa6ff }, { 0xa7ce, 0xa7cf }, // Undefined
{ 0xa7d2, 0xa7d2 }, { 0xa7d4, 0xa7d4 }, { 0xa7dd, 0xa7f1 }, // Undefined
{ 0xa82d, 0xa82f }, { 0xa83a, 0xa83f }, { 0xa878, 0xa87f }, // Undefined
{ 0xa8c6, 0xa8cd }, { 0xa8da, 0xa8df }, { 0xa954, 0xa95e }, // Undefined
{ 0xa97d, 0xa97f }, { 0xa9ce, 0xa9ce }, { 0xa9da, 0xa9dd }, // Undefined
{ 0xa9ff, 0xa9ff }, { 0xaa37, 0xaa3f }, { 0xaa4e, 0xaa4f }, // Undefined
{ 0xaa5a, 0xaa5b }, { 0xaac3, 0xaada }, { 0xaaf7, 0xab00 }, // Undefined
{ 0xab07, 0xab08 }, { 0xab0f, 0xab10 }, { 0xab17, 0xab1f }, // Undefined
{ 0xab27, 0xab27 }, { 0xab2f, 0xab2f }, { 0xab6c, 0xab6f }, // Undefined
{ 0xabee, 0xabef }, { 0xabfa, 0xabff }, { 0xac01, 0xd7a2 }, // Undefined
{ 0xd7a4, 0xd7af }, { 0xd7c7, 0xd7ca }, { 0xd7fc, 0xd7ff }, // Undefined
{ 0xd801, 0xdb7e }, { 0xdb81, 0xdbfe }, { 0xdc01, 0xdffe }, // Undefined
{ 0xe001, 0xf8fe }, { 0xfa6e, 0xfa6f }, { 0xfada, 0xfaff }, // Undefined
{ 0xfb07, 0xfb12 }, { 0xfb18, 0xfb1c }, { 0xfb37, 0xfb37 }, // Undefined
{ 0xfb3d, 0xfb3d }, { 0xfb3f, 0xfb3f }, { 0xfb42, 0xfb42 }, // Undefined
{ 0xfb45, 0xfb45 }, { 0xfbc3, 0xfbd2 }, { 0xfd90, 0xfd91 }, // Undefined
{ 0xfdc8, 0xfdce }, { 0xfdd0, 0xfdef }, { 0xfe1a, 0xfe1f }, // Undefined
{ 0xfe53, 0xfe53 }, { 0xfe67, 0xfe67 }, { 0xfe6c, 0xfe6f }, // Undefined
{ 0xfe75, 0xfe75 }, { 0xfefd, 0xfefe }, { 0xff00, 0xff00 }, // Undefined
{ 0xffbf, 0xffc1 }, { 0xffc8, 0xffc9 }, { 0xffd0, 0xffd1 }, // Undefined
{ 0xffd8, 0xffd9 }, { 0xffdd, 0xffdf }, { 0xffe7, 0xffe7 }, // Undefined
{ 0xffef, 0xfff8 }, { 0xfffe, 0xffff }, { 0x1000c, 0x1000c }, // Undefined
{ 0x10027, 0x10027 }, { 0x1003b, 0x1003b }, { 0x1003e, 0x1003e }, // Undefined
{ 0x1004e, 0x1004f }, { 0x1005e, 0x1007f }, { 0x100fb, 0x100ff }, // Undefined
{ 0x10103, 0x10106 }, { 0x10134, 0x10136 }, { 0x1018f, 0x1018f }, // Undefined
{ 0x1019d, 0x1019f }, { 0x101a1, 0x101cf }, { 0x101fe, 0x1027f }, // Undefined
{ 0x1029d, 0x1029f }, { 0x102d1, 0x102df }, { 0x102fc, 0x102ff }, // Undefined
{ 0x10324, 0x1032c }, { 0x1034b, 0x1034f }, { 0x1037b, 0x1037f }, // Undefined
{ 0x1039e, 0x1039e }, { 0x103c4, 0x103c7 }, { 0x103d6, 0x103ff }, // Undefined
{ 0x1049e, 0x1049f }, { 0x104aa, 0x104af }, { 0x104d4, 0x104d7 }, // Undefined
{ 0x104fc, 0x104ff }, { 0x10528, 0x1052f }, { 0x10564, 0x1056e }, // Undefined
{ 0x1057b, 0x1057b }, { 0x1058b, 0x1058b }, { 0x10593, 0x10593 }, // Undefined
{ 0x10596, 0x10596 }, { 0x105a2, 0x105a2 }, { 0x105b2, 0x105b2 }, // Undefined
{ 0x105ba, 0x105ba }, { 0x105bd, 0x105bf }, { 0x105f4, 0x105ff }, // Undefined
{ 0x10737, 0x1073f }, { 0x10756, 0x1075f }, { 0x10768, 0x1077f }, // Undefined
{ 0x10786, 0x10786 }, { 0x107b1, 0x107b1 }, { 0x107bb, 0x107ff }, // Undefined
{ 0x10806, 0x10807 }, { 0x10809, 0x10809 }, { 0x10836, 0x10836 }, // Undefined
{ 0x10839, 0x1083b }, { 0x1083d, 0x1083e }, { 0x10856, 0x10856 }, // Undefined
{ 0x1089f, 0x108a6 }, { 0x108b0, 0x108df }, { 0x108f3, 0x108f3 }, // Undefined
{ 0x108f6, 0x108fa }, { 0x1091c, 0x1091e }, { 0x1093a, 0x1093e }, // Undefined
{ 0x10940, 0x1097f }, { 0x109b8, 0x109bb }, { 0x109d0, 0x109d1 }, // Undefined
{ 0x10a04, 0x10a04 }, { 0x10a07, 0x10a0b }, { 0x10a14, 0x10a14 }, // Undefined
{ 0x10a18, 0x10a18 }, { 0x10a36, 0x10a37 }, { 0x10a3b, 0x10a3e }, // Undefined
{ 0x10a49, 0x10a4f }, { 0x10a59, 0x10a5f }, { 0x10aa0, 0x10abf }, // Undefined
{ 0x10ae7, 0x10aea }, { 0x10af7, 0x10aff }, { 0x10b36, 0x10b38 }, // Undefined
{ 0x10b56, 0x10b57 }, { 0x10b73, 0x10b77 }, { 0x10b92, 0x10b98 }, // Undefined
{ 0x10b9d, 0x10ba8 }, { 0x10bb0, 0x10bff }, { 0x10c49, 0x10c7f }, // Undefined
{ 0x10cb3, 0x10cbf }, { 0x10cf3, 0x10cf9 }, { 0x10d28, 0x10d2f }, // Undefined
{ 0x10d3a, 0x10d3f }, { 0x10d66, 0x10d68 }, { 0x10d86, 0x10d8d }, // Undefined
{ 0x10d90, 0x10e5f }, { 0x10e7f, 0x10e7f }, { 0x10eaa, 0x10eaa }, // Undefined
{ 0x10eae, 0x10eaf }, { 0x10eb2, 0x10ec1 }, { 0x10ec5, 0x10efb }, // Undefined
{ 0x10f28, 0x10f2f }, { 0x10f5a, 0x10f6f }, { 0x10f8a, 0x10faf }, // Undefined
{ 0x10fcc, 0x10fdf }, { 0x10ff7, 0x10fff }, { 0x1104e, 0x11051 }, // Undefined
{ 0x11076, 0x1107e }, { 0x110c3, 0x110cc }, { 0x110ce, 0x110cf }, // Undefined
{ 0x110e9, 0x110ef }, { 0x110fa, 0x110ff }, { 0x11135, 0x11135 }, // Undefined
{ 0x11148, 0x1114f }, { 0x11177, 0x1117f }, { 0x111e0, 0x111e0 }, // Undefined
{ 0x111f5, 0x111ff }, { 0x11212, 0x11212 }, { 0x11242, 0x1127f }, // Undefined
{ 0x11287, 0x11287 }, { 0x11289, 0x11289 }, { 0x1128e, 0x1128e }, // Undefined
{ 0x1129e, 0x1129e }, { 0x112aa, 0x112af }, { 0x112eb, 0x112ef }, // Undefined
{ 0x112fa, 0x112ff }, { 0x11304, 0x11304 }, { 0x1130d, 0x1130e }, // Undefined
{ 0x11311, 0x11312 }, { 0x11329, 0x11329 }, { 0x11331, 0x11331 }, // Undefined
{ 0x11334, 0x11334 }, { 0x1133a, 0x1133a }, { 0x11345, 0x11346 }, // Undefined
{ 0x11349, 0x1134a }, { 0x1134e, 0x1134f }, { 0x11351, 0x11356 }, // Undefined
{ 0x11358, 0x1135c }, { 0x11364, 0x11365 }, { 0x1136d, 0x1136f }, // Undefined
{ 0x11375, 0x1137f }, { 0x1138a, 0x1138a }, { 0x1138c, 0x1138d }, // Undefined
{ 0x1138f, 0x1138f }, { 0x113b6, 0x113b6 }, { 0x113c1, 0x113c1 }, // Undefined
{ 0x113c3, 0x113c4 }, { 0x113c6, 0x113c6 }, { 0x113cb, 0x113cb }, // Undefined
{ 0x113d6, 0x113d6 }, { 0x113d9, 0x113e0 }, { 0x113e3, 0x113ff }, // Undefined
{ 0x1145c, 0x1145c }, { 0x11462, 0x1147f }, { 0x114c8, 0x114cf }, // Undefined
{ 0x114da, 0x1157f }, { 0x115b6, 0x115b7 }, { 0x115de, 0x115ff }, // Undefined
{ 0x11645, 0x1164f }, { 0x1165a, 0x1165f }, { 0x1166d, 0x1167f }, // Undefined
{ 0x116ba, 0x116bf }, { 0x116ca, 0x116cf }, { 0x116e4, 0x116ff }, // Undefined
{ 0x1171b, 0x1171c }, { 0x1172c, 0x1172f }, { 0x11747, 0x117ff }, // Undefined
{ 0x1183c, 0x1189f }, { 0x118f3, 0x118fe }, { 0x11907, 0x11908 }, // Undefined
{ 0x1190a, 0x1190b }, { 0x11914, 0x11914 }, { 0x11917, 0x11917 }, // Undefined
{ 0x11936, 0x11936 }, { 0x11939, 0x1193a }, { 0x11947, 0x1194f }, // Undefined
{ 0x1195a, 0x1199f }, { 0x119a8, 0x119a9 }, { 0x119d8, 0x119d9 }, // Undefined
{ 0x119e5, 0x119ff }, { 0x11a48, 0x11a4f }, { 0x11aa3, 0x11aaf }, // Undefined
{ 0x11af9, 0x11aff }, { 0x11b0a, 0x11bbf }, { 0x11be2, 0x11bef }, // Undefined
{ 0x11bfa, 0x11bff }, { 0x11c09, 0x11c09 }, { 0x11c37, 0x11c37 }, // Undefined
{ 0x11c46, 0x11c4f }, { 0x11c6d, 0x11c6f }, { 0x11c90, 0x11c91 }, // Undefined
{ 0x11ca8, 0x11ca8 }, { 0x11cb7, 0x11cff }, { 0x11d07, 0x11d07 }, // Undefined
{ 0x11d0a, 0x11d0a }, { 0x11d37, 0x11d39 }, { 0x11d3b, 0x11d3b }, // Undefined
{ 0x11d3e, 0x11d3e }, { 0x11d48, 0x11d4f }, { 0x11d5a, 0x11d5f }, // Undefined
{ 0x11d66, 0x11d66 }, { 0x11d69, 0x11d69 }, { 0x11d8f, 0x11d8f }, // Undefined
{ 0x11d92, 0x11d92 }, { 0x11d99, 0x11d9f }, { 0x11daa, 0x11edf }, // Undefined
{ 0x11ef9, 0x11eff }, { 0x11f11, 0x11f11 }, { 0x11f3b, 0x11f3d }, // Undefined
{ 0x11f5b, 0x11faf }, { 0x11fb1, 0x11fbf }, { 0x11ff2, 0x11ffe }, // Undefined
{ 0x1239a, 0x123ff }, { 0x1246f, 0x1246f }, { 0x12475, 0x1247f }, // Undefined
{ 0x12544, 0x12f8f }, { 0x12ff3, 0x12fff }, { 0x13456, 0x1345f }, // Undefined
{ 0x143fb, 0x143ff }, { 0x14647, 0x160ff }, { 0x1613a, 0x167ff }, // Undefined
{ 0x16a39, 0x16a3f }, { 0x16a5f, 0x16a5f }, { 0x16a6a, 0x16a6d }, // Undefined
{ 0x16abf, 0x16abf }, { 0x16aca, 0x16acf }, { 0x16aee, 0x16aef }, // Undefined
{ 0x16af6, 0x16aff }, { 0x16b46, 0x16b4f }, { 0x16b5a, 0x16b5a }, // Undefined
{ 0x16b62, 0x16b62 }, { 0x16b78, 0x16b7c }, { 0x16b90, 0x16d3f }, // Undefined
{ 0x16d7a, 0x16e3f }, { 0x16e9b, 0x16eff }, { 0x16f4b, 0x16f4e }, // Undefined
{ 0x16f88, 0x16f8e }, { 0x16fa0, 0x16fdf }, { 0x16fe5, 0x16fef }, // Undefined
{ 0x16ff2, 0x16fff }, { 0x17001, 0x187f6 }, { 0x187f8, 0x187ff }, // Undefined
{ 0x18cd6, 0x18cfe }, { 0x18d01, 0x18d07 }, { 0x18d09, 0x1afef }, // Undefined
{ 0x1aff4, 0x1aff4 }, { 0x1affc, 0x1affc }, { 0x1afff, 0x1afff }, // Undefined
{ 0x1b123, 0x1b131 }, { 0x1b133, 0x1b14f }, { 0x1b153, 0x1b154 }, // Undefined
{ 0x1b156, 0x1b163 }, { 0x1b168, 0x1b16f }, { 0x1b2fc, 0x1bbff }, // Undefined
{ 0x1bc6b, 0x1bc6f }, { 0x1bc7d, 0x1bc7f }, { 0x1bc89, 0x1bc8f }, // Undefined
{ 0x1bc9a, 0x1bc9b }, { 0x1bca4, 0x1cbff }, { 0x1ccfa, 0x1ccff }, // Undefined
{ 0x1ceb4, 0x1ceff }, { 0x1cf2e, 0x1cf2f }, { 0x1cf47, 0x1cf4f }, // Undefined
{ 0x1cfc4, 0x1cfff }, { 0x1d0f6, 0x1d0ff }, { 0x1d127, 0x1d128 }, // Undefined
{ 0x1d1eb, 0x1d1ff }, { 0x1d246, 0x1d2bf }, { 0x1d2d4, 0x1d2df }, // Undefined
{ 0x1d2f4, 0x1d2ff }, { 0x1d357, 0x1d35f }, { 0x1d379, 0x1d3ff }, // Undefined
{ 0x1d455, 0x1d455 }, { 0x1d49d, 0x1d49d }, { 0x1d4a0, 0x1d4a1 }, // Undefined
{ 0x1d4a3, 0x1d4a4 }, { 0x1d4a7, 0x1d4a8 }, { 0x1d4ad, 0x1d4ad }, // Undefined
{ 0x1d4ba, 0x1d4ba }, { 0x1d4bc, 0x1d4bc }, { 0x1d4c4, 0x1d4c4 }, // Undefined
{ 0x1d506, 0x1d506 }, { 0x1d50b, 0x1d50c }, { 0x1d515, 0x1d515 }, // Undefined
{ 0x1d51d, 0x1d51d }, { 0x1d53a, 0x1d53a }, { 0x1d53f, 0x1d53f }, // Undefined
{ 0x1d545, 0x1d545 }, { 0x1d547, 0x1d549 }, { 0x1d551, 0x1d551 }, // Undefined
{ 0x1d6a6, 0x1d6a7 }, { 0x1d7cc, 0x1d7cd }, { 0x1da8c, 0x1da9a }, // Undefined
{ 0x1daa0, 0x1daa0 }, { 0x1dab0, 0x1deff }, { 0x1df1f, 0x1df24 }, // Undefined
{ 0x1df2b, 0x1dfff }, { 0x1e007, 0x1e007 }, { 0x1e019, 0x1e01a }, // Undefined
{ 0x1e022, 0x1e022 }, { 0x1e025, 0x1e025 }, { 0x1e02b, 0x1e02f }, // Undefined
{ 0x1e06e, 0x1e08e }, { 0x1e090, 0x1e0ff }, { 0x1e12d, 0x1e12f }, // Undefined
{ 0x1e13e, 0x1e13f }, { 0x1e14a, 0x1e14d }, { 0x1e150, 0x1e28f }, // Undefined
{ 0x1e2af, 0x1e2bf }, { 0x1e2fa, 0x1e2fe }, { 0x1e300, 0x1e4cf }, // Undefined
{ 0x1e4fa, 0x1e5cf }, { 0x1e5fb, 0x1e5fe }, { 0x1e600, 0x1e7df }, // Undefined
{ 0x1e7e7, 0x1e7e7 }, { 0x1e7ec, 0x1e7ec }, { 0x1e7ef, 0x1e7ef }, // Undefined
{ 0x1e7ff, 0x1e7ff }, { 0x1e8c5, 0x1e8c6 }, { 0x1e8d7, 0x1e8ff }, // Undefined
{ 0x1e94c, 0x1e94f }, { 0x1e95a, 0x1e95d }, { 0x1e960, 0x1ec70 }, // Undefined
{ 0x1ecb5, 0x1ed00 }, { 0x1ed3e, 0x1edff }, { 0x1ee04, 0x1ee04 }, // Undefined
{ 0x1ee20, 0x1ee20 }, { 0x1ee23, 0x1ee23 }, { 0x1ee25, 0x1ee26 }, // Undefined
{ 0x1ee28, 0x1ee28 }, { 0x1ee33, 0x1ee33 }, { 0x1ee38, 0x1ee38 }, // Undefined
{ 0x1ee3a, 0x1ee3a }, { 0x1ee3c, 0x1ee41 }, { 0x1ee43, 0x1ee46 }, // Undefined
{ 0x1ee48, 0x1ee48 }, { 0x1ee4a, 0x1ee4a }, { 0x1ee4c, 0x1ee4c }, // Undefined
{ 0x1ee50, 0x1ee50 }, { 0x1ee53, 0x1ee53 }, { 0x1ee55, 0x1ee56 }, // Undefined
{ 0x1ee58, 0x1ee58 }, { 0x1ee5a, 0x1ee5a }, { 0x1ee5c, 0x1ee5c }, // Undefined
{ 0x1ee5e, 0x1ee5e }, { 0x1ee60, 0x1ee60 }, { 0x1ee63, 0x1ee63 }, // Undefined
{ 0x1ee65, 0x1ee66 }, { 0x1ee6b, 0x1ee6b }, { 0x1ee73, 0x1ee73 }, // Undefined
{ 0x1ee78, 0x1ee78 }, { 0x1ee7d, 0x1ee7d }, { 0x1ee7f, 0x1ee7f }, // Undefined
{ 0x1ee8a, 0x1ee8a }, { 0x1ee9c, 0x1eea0 }, { 0x1eea4, 0x1eea4 }, // Undefined
{ 0x1eeaa, 0x1eeaa }, { 0x1eebc, 0x1eeef }, { 0x1eef2, 0x1efff }, // Undefined
{ 0x1f02c, 0x1f02f }, { 0x1f094, 0x1f09f }, { 0x1f0af, 0x1f0b0 }, // Undefined
{ 0x1f0c0, 0x1f0c0 }, { 0x1f0d0, 0x1f0d0 }, { 0x1f0f6, 0x1f0ff }, // Undefined
{ 0x1f1ae, 0x1f1e5 }, { 0x1f203, 0x1f20f }, { 0x1f23c, 0x1f23f }, // Undefined
{ 0x1f249, 0x1f24f }, { 0x1f252, 0x1f25f }, { 0x1f266, 0x1f2ff }, // Undefined
{ 0x1f6d8, 0x1f6db }, { 0x1f6ed, 0x1f6ef }, { 0x1f6fd, 0x1f6ff }, // Undefined
{ 0x1f777, 0x1f77a }, { 0x1f7da, 0x1f7df }, { 0x1f7ec, 0x1f7ef }, // Undefined
{ 0x1f7f1, 0x1f7ff }, { 0x1f80c, 0x1f80f }, { 0x1f848, 0x1f84f }, // Undefined
{ 0x1f85a, 0x1f85f }, { 0x1f888, 0x1f88f }, { 0x1f8ae, 0x1f8af }, // Undefined
{ 0x1f8bc, 0x1f8bf }, { 0x1f8c2, 0x1f8ff }, { 0x1fa54, 0x1fa5f }, // Undefined
{ 0x1fa6e, 0x1fa6f }, { 0x1fa7d, 0x1fa7f }, { 0x1fa8a, 0x1fa8e }, // Undefined
{ 0x1fac7, 0x1facd }, { 0x1fadd, 0x1fade }, { 0x1faea, 0x1faef }, // Undefined
{ 0x1faf9, 0x1faff }, { 0x1fb93, 0x1fb93 }, { 0x1fbfa, 0x1ffff }, // Undefined
{ 0x20001, 0x2a6de }, { 0x2a6e0, 0x2a6ff }, { 0x2a701, 0x2b738 }, // Undefined
{ 0x2b73a, 0x2b73f }, { 0x2b741, 0x2b81c }, { 0x2b81e, 0x2b81f }, // Undefined
{ 0x2b821, 0x2cea0 }, { 0x2cea2, 0x2ceaf }, { 0x2ceb1, 0x2ebdf }, // Undefined
{ 0x2ebe1, 0x2ebef }, { 0x2ebf1, 0x2ee5c }, { 0x2ee5e, 0x2f7ff }, // Undefined
{ 0x2fa1e, 0x2ffff }, { 0x30001, 0x31349 }, { 0x3134b, 0x3134f }, // Undefined
{ 0x31351, 0x323ae }, { 0x323b0, 0xe0000 }, { 0xe0002, 0xe001f }, // Undefined
{ 0xe0080, 0xe00ff }, { 0xe01f0, 0xeffff }, { 0xf0001, 0xffffc }, // Undefined
{ 0xffffe, 0xfffff }, { 0x100001, 0x10fffc }, { 0x110000, 0xFFFFFFFF } // Undefined
};

/**
* \brief From https://www.unicode.org/Public/UNIDATA/Blocks.txt
*/
const RUtfBlock utf_blocks[] = {
{ 0x0000, 0x007F, "Basic Latin" },
{ 0x0080, 0x00FF, "Latin-1 Supplement" },
@@ -220,6 +329,7 @@ const RUtfBlock utf_blocks[] = {
{ 0x0800, 0x083F, "Samaritan" },
{ 0x0840, 0x085F, "Mandaic" },
{ 0x0860, 0x086F, "Syriac Supplement" },
{ 0x0870, 0x089F, "Arabic Extended-B" },
{ 0x08A0, 0x08FF, "Arabic Extended-A" },
{ 0x0900, 0x097F, "Devanagari" },
{ 0x0980, 0x09FF, "Bengali" },
@@ -263,6 +373,7 @@ const RUtfBlock utf_blocks[] = {
{ 0x1C00, 0x1C4F, "Lepcha" },
{ 0x1C50, 0x1C7F, "Ol Chiki" },
{ 0x1C80, 0x1C8F, "Cyrillic Extended-C" },
{ 0x1C90, 0x1CBF, "Georgian Extended" },
{ 0x1CC0, 0x1CCF, "Sundanese Supplement" },
{ 0x1CD0, 0x1CFF, "Vedic Extensions" },
{ 0x1D00, 0x1D7F, "Phonetic Extensions" },
@@ -382,7 +493,10 @@ const RUtfBlock utf_blocks[] = {
{ 0x104B0, 0x104FF, "Osage" },
{ 0x10500, 0x1052F, "Elbasan" },
{ 0x10530, 0x1056F, "Caucasian Albanian" },
{ 0x10570, 0x105BF, "Vithkuqi" },
{ 0x105C0, 0x105FF, "Todhri" },
{ 0x10600, 0x1077F, "Linear A" },
{ 0x10780, 0x107BF, "Latin Extended-F" },
{ 0x10800, 0x1083F, "Cypriot Syllabary" },
{ 0x10840, 0x1085F, "Imperial Aramaic" },
{ 0x10860, 0x1087F, "Palmyrene" },
@@ -402,7 +516,16 @@ const RUtfBlock utf_blocks[] = {
{ 0x10B80, 0x10BAF, "Psalter Pahlavi" },
{ 0x10C00, 0x10C4F, "Old Turkic" },
{ 0x10C80, 0x10CFF, "Old Hungarian" },
{ 0x10D00, 0x10D3F, "Hanifi Rohingya" },
{ 0x10D40, 0x10D8F, "Garay" },
{ 0x10E60, 0x10E7F, "Rumi Numeral Symbols" },
{ 0x10E80, 0x10EBF, "Yezidi" },
{ 0x10EC0, 0x10EFF, "Arabic Extended-C" },
{ 0x10F00, 0x10F2F, "Old Sogdian" },
{ 0x10F30, 0x10F6F, "Sogdian" },
{ 0x10F70, 0x10FAF, "Old Uyghur" },
{ 0x10FB0, 0x10FDF, "Chorasmian" },
{ 0x10FE0, 0x10FFF, "Elymaic" },
{ 0x11000, 0x1107F, "Brahmi" },
{ 0x11080, 0x110CF, "Kaithi" },
{ 0x110D0, 0x110FF, "Sora Sompeng" },
@@ -414,48 +537,86 @@ const RUtfBlock utf_blocks[] = {
{ 0x11280, 0x112AF, "Multani" },
{ 0x112B0, 0x112FF, "Khudawadi" },
{ 0x11300, 0x1137F, "Grantha" },
{ 0x11380, 0x113FF, "Tulu-Tigalari" },
{ 0x11400, 0x1147F, "Newa" },
{ 0x11480, 0x114DF, "Tirhuta" },
{ 0x11580, 0x115FF, "Siddham" },
{ 0x11600, 0x1165F, "Modi" },
{ 0x11660, 0x1167F, "Mongolian Supplement" },
{ 0x11680, 0x116CF, "Takri" },
{ 0x11700, 0x1173F, "Ahom" },
{ 0x116D0, 0x116FF, "Myanmar Extended-C" },
{ 0x11700, 0x1174F, "Ahom" },
{ 0x11800, 0x1184F, "Dogra" },
{ 0x118A0, 0x118FF, "Warang Citi" },
{ 0x11900, 0x1195F, "Dives Akuru" },
{ 0x119A0, 0x119FF, "Nandinagari" },
{ 0x11A00, 0x11A4F, "Zanabazar Square" },
{ 0x11A50, 0x11AAF, "Soyombo" },
{ 0x11AB0, 0x11ABF, "Unified Canadian Aboriginal Syllabics Extended-A" },
{ 0x11AC0, 0x11AFF, "Pau Cin Hau" },
{ 0x11B00, 0x11B5F, "Devanagari Extended-A" },
{ 0x11BC0, 0x11BFF, "Sunuwar" },
{ 0x11C00, 0x11C6F, "Bhaiksuki" },
{ 0x11C70, 0x11CBF, "Marchen" },
{ 0x11D00, 0x11D5F, "Masaram Gondi" },
{ 0x11D60, 0x11DAF, "Gunjala Gondi" },
{ 0x11EE0, 0x11EFF, "Makasar" },
{ 0x11F00, 0x11F5F, "Kawi" },
{ 0x11FB0, 0x11FBF, "Lisu Supplement" },
{ 0x11FC0, 0x11FFF, "Tamil Supplement" },
{ 0x12000, 0x123FF, "Cuneiform" },
{ 0x12400, 0x1247F, "Cuneiform Numbers and Punctuation" },
{ 0x12480, 0x1254F, "Early Dynastic Cuneiform" },
{ 0x12F90, 0x12FFF, "Cypro-Minoan" },
{ 0x13000, 0x1342F, "Egyptian Hieroglyphs" },
{ 0x13430, 0x1345F, "Egyptian Hieroglyph Format Controls" },
{ 0x13460, 0x143FF, "Egyptian Hieroglyphs Extended-A" },
{ 0x14400, 0x1467F, "Anatolian Hieroglyphs" },
{ 0x16100, 0x1613F, "Gurung Khema" },
{ 0x16800, 0x16A3F, "Bamum Supplement" },
{ 0x16A40, 0x16A6F, "Mro" },
{ 0x16A70, 0x16ACF, "Tangsa" },
{ 0x16AD0, 0x16AFF, "Bassa Vah" },
{ 0x16B00, 0x16B8F, "Pahawh Hmong" },
{ 0x16D40, 0x16D7F, "Kirat Rai" },
{ 0x16E40, 0x16E9F, "Medefaidrin" },
{ 0x16F00, 0x16F9F, "Miao" },
{ 0x16FE0, 0x16FFF, "Ideographic Symbols and Punctuation" },
{ 0x17000, 0x187FF, "Tangut" },
{ 0x18800, 0x18AFF, "Tangut Components" },
{ 0x18B00, 0x18CFF, "Khitan Small Script" },
{ 0x18D00, 0x18D7F, "Tangut Supplement" },
{ 0x1AFF0, 0x1AFFF, "Kana Extended-B" },
{ 0x1B000, 0x1B0FF, "Kana Supplement" },
{ 0x1B100, 0x1B12F, "Kana Extended-A" },
{ 0x1B130, 0x1B16F, "Small Kana Extension" },
{ 0x1B170, 0x1B2FF, "Nushu" },
{ 0x1BC00, 0x1BC9F, "Duployan" },
{ 0x1BCA0, 0x1BCAF, "Shorthand Format Controls" },
{ 0x1CC00, 0x1CEBF, "Symbols for Legacy Computing Supplement" },
{ 0x1CF00, 0x1CFCF, "Znamenny Musical Notation" },
{ 0x1D000, 0x1D0FF, "Byzantine Musical Symbols" },
{ 0x1D100, 0x1D1FF, "Musical Symbols" },
{ 0x1D200, 0x1D24F, "Ancient Greek Musical Notation" },
{ 0x1D2C0, 0x1D2DF, "Kaktovik Numerals" },
{ 0x1D2E0, 0x1D2FF, "Mayan Numerals" },
{ 0x1D300, 0x1D35F, "Tai Xuan Jing Symbols" },
{ 0x1D360, 0x1D37F, "Counting Rod Numerals" },
{ 0x1D400, 0x1D7FF, "Mathematical Alphanumeric Symbols" },
{ 0x1D800, 0x1DAAF, "Sutton SignWriting" },
{ 0x1DF00, 0x1DFFF, "Latin Extended-G" },
{ 0x1E000, 0x1E02F, "Glagolitic Supplement" },
{ 0x1E030, 0x1E08F, "Cyrillic Extended-D" },
{ 0x1E100, 0x1E14F, "Nyiakeng Puachue Hmong" },
{ 0x1E290, 0x1E2BF, "Toto" },
{ 0x1E2C0, 0x1E2FF, "Wancho" },
{ 0x1E4D0, 0x1E4FF, "Nag Mundari" },
{ 0x1E5D0, 0x1E5FF, "Ol Onal" },
{ 0x1E7E0, 0x1E7FF, "Ethiopic Extended-B" },
{ 0x1E800, 0x1E8DF, "Mende Kikakui" },
{ 0x1E900, 0x1E95F, "Adlam" },
{ 0x1EC70, 0x1ECBF, "Indic Siyaq Numbers" },
{ 0x1ED00, 0x1ED4F, "Ottoman Siyaq Numbers" },
{ 0x1EE00, 0x1EEFF, "Arabic Mathematical Alphabetic Symbols" },
{ 0x1F000, 0x1F02F, "Mahjong Tiles" },
{ 0x1F030, 0x1F09F, "Domino Tiles" },
@@ -470,21 +631,26 @@ const RUtfBlock utf_blocks[] = {
{ 0x1F780, 0x1F7FF, "Geometric Shapes Extended" },
{ 0x1F800, 0x1F8FF, "Supplemental Arrows-C" },
{ 0x1F900, 0x1F9FF, "Supplemental Symbols and Pictographs" },
{ 0x1FA00, 0x1FA6F, "Chess Symbols" },
{ 0x1FA70, 0x1FAFF, "Symbols and Pictographs Extended-A" },
{ 0x1FB00, 0x1FBFF, "Symbols for Legacy Computing" },
{ 0x20000, 0x2A6DF, "CJK Unified Ideographs Extension B" },
{ 0x2A700, 0x2B73F, "CJK Unified Ideographs Extension C" },
{ 0x2B740, 0x2B81F, "CJK Unified Ideographs Extension D" },
{ 0x2B820, 0x2CEAF, "CJK Unified Ideographs Extension E" },
{ 0x2CEB0, 0x2EBEF, "CJK Unified Ideographs Extension F" },
{ 0x2EBF0, 0x2EE5F, "CJK Unified Ideographs Extension I" },
{ 0x2F800, 0x2FA1F, "CJK Compatibility Ideographs Supplement" },
{ 0x30000, 0x3134F, "CJK Unified Ideographs Extension G" },
{ 0x31350, 0x323AF, "CJK Unified Ideographs Extension H" },
{ 0xE0000, 0xE007F, "Tags" },
{ 0xE0100, 0xE01EF, "Variation Selectors Supplement" },
{ 0xF0000, 0xFFFFF, "Supplementary Private Use Area-A" },
{ 0x100000, 0x10FFFF, "Supplementary Private Use Area-B" },
{ 0x110000, 0xFFFFFFFF, "No_Block" }
{ 0x100000, 0x10FFFF, "Supplementary Private Use Area-B" }
};

RZ_API const char *rz_utf_block_name(int idx) {
if (idx < 0 || idx >= UTF_LAST_BLOCK) {
if (idx < 0 || idx >= RZ_ARRAY_SIZE(utf_blocks)) {
return NULL;
}
return utf_blocks[idx].name;
11 changes: 11 additions & 0 deletions test/db/cmd/cmd_ps
Original file line number Diff line number Diff line change
@@ -151,3 +151,14 @@ Just like pineapple on pizza, this task/thread port doesn't belong here. @%s:%d\
Just like pineapple on pizza, this task/thread port doesn't belong here. @%s:%d
EOF
RUN

NAME=Issue #4697 and Unicode update
FILE==
CMDS=<<EOF
wx f09f9faaf09f9faa
ps
EOF
EXPECT=<<EOF
🟪🟪
EOF
RUN