12
12
13
13
#![ allow( missing_docs, non_upper_case_globals, non_snake_case) ]
14
14
15
- /// Represents a Unicode Version.
16
- ///
17
- /// See also: <http://www.unicode.org/versions/>
18
- #[ derive( Clone , Copy , Debug , Eq , Ord , PartialEq , PartialOrd ) ]
19
- pub struct UnicodeVersion {
20
- /// Major version.
21
- pub major : u32 ,
22
-
23
- /// Minor version.
24
- pub minor : u32 ,
25
-
26
- /// Micro (or Update) version.
27
- pub micro : u32 ,
28
-
29
- // Private field to keep struct expandable.
30
- _priv : ( ) ,
31
- }
15
+ use version:: UnicodeVersion ;
16
+ use bool_trie:: { BoolTrie , SmallBoolTrie } ;
32
17
33
18
/// The version of [Unicode](http://www.unicode.org/) that the Unicode parts of
34
19
/// `CharExt` and `UnicodeStrPrelude` traits are based on.
@@ -38,76 +23,8 @@ pub const UNICODE_VERSION: UnicodeVersion = UnicodeVersion {
38
23
micro : 0 ,
39
24
_priv : ( ) ,
40
25
} ;
41
-
42
-
43
- // BoolTrie is a trie for representing a set of Unicode codepoints. It is
44
- // implemented with postfix compression (sharing of identical child nodes),
45
- // which gives both compact size and fast lookup.
46
- //
47
- // The space of Unicode codepoints is divided into 3 subareas, each
48
- // represented by a trie with different depth. In the first (0..0x800), there
49
- // is no trie structure at all; each u64 entry corresponds to a bitvector
50
- // effectively holding 64 bool values.
51
- //
52
- // In the second (0x800..0x10000), each child of the root node represents a
53
- // 64-wide subrange, but instead of storing the full 64-bit value of the leaf,
54
- // the trie stores an 8-bit index into a shared table of leaf values. This
55
- // exploits the fact that in reasonable sets, many such leaves can be shared.
56
- //
57
- // In the third (0x10000..0x110000), each child of the root node represents a
58
- // 4096-wide subrange, and the trie stores an 8-bit index into a 64-byte slice
59
- // of a child tree. Each of these 64 bytes represents an index into the table
60
- // of shared 64-bit leaf values. This exploits the sparse structure in the
61
- // non-BMP range of most Unicode sets.
62
- pub struct BoolTrie {
63
- // 0..0x800 (corresponding to 1 and 2 byte utf-8 sequences)
64
- r1 : [ u64 ; 32 ] , // leaves
65
-
66
- // 0x800..0x10000 (corresponding to 3 byte utf-8 sequences)
67
- r2 : [ u8 ; 992 ] , // first level
68
- r3 : & ' static [ u64 ] , // leaves
69
-
70
- // 0x10000..0x110000 (corresponding to 4 byte utf-8 sequences)
71
- r4 : [ u8 ; 256 ] , // first level
72
- r5 : & ' static [ u8 ] , // second level
73
- r6 : & ' static [ u64 ] , // leaves
74
- }
75
-
76
- fn trie_range_leaf ( c : usize , bitmap_chunk : u64 ) -> bool {
77
- ( ( bitmap_chunk >> ( c & 63 ) ) & 1 ) != 0
78
- }
79
-
80
- fn trie_lookup_range_table ( c : char , r : & ' static BoolTrie ) -> bool {
81
- let c = c as usize ;
82
- if c < 0x800 {
83
- trie_range_leaf ( c, r. r1 [ c >> 6 ] )
84
- } else if c < 0x10000 {
85
- let child = r. r2 [ ( c >> 6 ) - 0x20 ] ;
86
- trie_range_leaf ( c, r. r3 [ child as usize ] )
87
- } else {
88
- let child = r. r4 [ ( c >> 12 ) - 0x10 ] ;
89
- let leaf = r. r5 [ ( ( child as usize ) << 6 ) + ( ( c >> 6 ) & 0x3f ) ] ;
90
- trie_range_leaf ( c, r. r6 [ leaf as usize ] )
91
- }
92
- }
93
-
94
- pub struct SmallBoolTrie {
95
- r1 : & ' static [ u8 ] , // first level
96
- r2 : & ' static [ u64 ] , // leaves
97
- }
98
-
99
- impl SmallBoolTrie {
100
- fn lookup ( & self , c : char ) -> bool {
101
- let c = c as usize ;
102
- match self . r1 . get ( c >> 6 ) {
103
- Some ( & child) => trie_range_leaf ( c, self . r2 [ child as usize ] ) ,
104
- None => false ,
105
- }
106
- }
107
- }
108
-
109
26
pub mod general_category {
110
- pub const Cc_table : & ' static super :: SmallBoolTrie = & super :: SmallBoolTrie {
27
+ pub const Cc_table : & super :: SmallBoolTrie = & super :: SmallBoolTrie {
111
28
r1 : & [
112
29
0 , 1 , 0
113
30
] ,
@@ -120,7 +37,7 @@ pub mod general_category {
120
37
Cc_table . lookup ( c)
121
38
}
122
39
123
- pub const N_table : & ' static super :: BoolTrie = & super :: BoolTrie {
40
+ pub const N_table : & super :: BoolTrie = & super :: BoolTrie {
124
41
r1 : [
125
42
0x03ff000000000000 , 0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 ,
126
43
0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 ,
@@ -212,13 +129,13 @@ pub mod general_category {
212
129
} ;
213
130
214
131
pub fn N ( c : char ) -> bool {
215
- super :: trie_lookup_range_table ( c , N_table )
132
+ N_table . lookup ( c )
216
133
}
217
134
218
135
}
219
136
220
137
pub mod derived_property {
221
- pub const Alphabetic_table : & ' static super :: BoolTrie = & super :: BoolTrie {
138
+ pub const Alphabetic_table : & super :: BoolTrie = & super :: BoolTrie {
222
139
r1 : [
223
140
0x0000000000000000 , 0x07fffffe07fffffe , 0x0420040000000000 , 0xff7fffffff7fffff ,
224
141
0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff ,
@@ -397,10 +314,10 @@ pub mod derived_property {
397
314
} ;
398
315
399
316
pub fn Alphabetic ( c : char ) -> bool {
400
- super :: trie_lookup_range_table ( c , Alphabetic_table )
317
+ Alphabetic_table . lookup ( c )
401
318
}
402
319
403
- pub const Case_Ignorable_table : & ' static super :: BoolTrie = & super :: BoolTrie {
320
+ pub const Case_Ignorable_table : & super :: BoolTrie = & super :: BoolTrie {
404
321
r1 : [
405
322
0x0400408000000000 , 0x0000000140000000 , 0x0190a10000000000 , 0x0000000000000000 ,
406
323
0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 , 0x0000000000000000 ,
@@ -529,10 +446,10 @@ pub mod derived_property {
529
446
} ;
530
447
531
448
pub fn Case_Ignorable ( c : char ) -> bool {
532
- super :: trie_lookup_range_table ( c , Case_Ignorable_table )
449
+ Case_Ignorable_table . lookup ( c )
533
450
}
534
451
535
- pub const Cased_table : & ' static super :: BoolTrie = & super :: BoolTrie {
452
+ pub const Cased_table : & super :: BoolTrie = & super :: BoolTrie {
536
453
r1 : [
537
454
0x0000000000000000 , 0x07fffffe07fffffe , 0x0420040000000000 , 0xff7fffffff7fffff ,
538
455
0xffffffffffffffff , 0xffffffffffffffff , 0xf7ffffffffffffff , 0xfffffffffffffff0 ,
@@ -628,10 +545,10 @@ pub mod derived_property {
628
545
} ;
629
546
630
547
pub fn Cased ( c : char ) -> bool {
631
- super :: trie_lookup_range_table ( c , Cased_table )
548
+ Cased_table . lookup ( c )
632
549
}
633
550
634
- pub const Lowercase_table : & ' static super :: BoolTrie = & super :: BoolTrie {
551
+ pub const Lowercase_table : & super :: BoolTrie = & super :: BoolTrie {
635
552
r1 : [
636
553
0x0000000000000000 , 0x07fffffe00000000 , 0x0420040000000000 , 0xff7fffff80000000 ,
637
554
0x55aaaaaaaaaaaaaa , 0xd4aaaaaaaaaaab55 , 0xe6512d2a4e243129 , 0xaa29aaaab5555240 ,
@@ -725,10 +642,10 @@ pub mod derived_property {
725
642
} ;
726
643
727
644
pub fn Lowercase ( c : char ) -> bool {
728
- super :: trie_lookup_range_table ( c , Lowercase_table )
645
+ Lowercase_table . lookup ( c )
729
646
}
730
647
731
- pub const Uppercase_table : & ' static super :: BoolTrie = & super :: BoolTrie {
648
+ pub const Uppercase_table : & super :: BoolTrie = & super :: BoolTrie {
732
649
r1 : [
733
650
0x0000000000000000 , 0x0000000007fffffe , 0x0000000000000000 , 0x000000007f7fffff ,
734
651
0xaa55555555555555 , 0x2b555555555554aa , 0x11aed2d5b1dbced6 , 0x55d255554aaaa490 ,
@@ -823,10 +740,10 @@ pub mod derived_property {
823
740
} ;
824
741
825
742
pub fn Uppercase ( c : char ) -> bool {
826
- super :: trie_lookup_range_table ( c , Uppercase_table )
743
+ Uppercase_table . lookup ( c )
827
744
}
828
745
829
- pub const XID_Continue_table : & ' static super :: BoolTrie = & super :: BoolTrie {
746
+ pub const XID_Continue_table : & super :: BoolTrie = & super :: BoolTrie {
830
747
r1 : [
831
748
0x03ff000000000000 , 0x07fffffe87fffffe , 0x04a0040000000000 , 0xff7fffffff7fffff ,
832
749
0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff ,
@@ -998,10 +915,10 @@ pub mod derived_property {
998
915
} ;
999
916
1000
917
pub fn XID_Continue ( c : char ) -> bool {
1001
- super :: trie_lookup_range_table ( c , XID_Continue_table )
918
+ XID_Continue_table . lookup ( c )
1002
919
}
1003
920
1004
- pub const XID_Start_table : & ' static super :: BoolTrie = & super :: BoolTrie {
921
+ pub const XID_Start_table : & super :: BoolTrie = & super :: BoolTrie {
1005
922
r1 : [
1006
923
0x0000000000000000 , 0x07fffffe07fffffe , 0x0420040000000000 , 0xff7fffffff7fffff ,
1007
924
0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff , 0xffffffffffffffff ,
@@ -1175,13 +1092,13 @@ pub mod derived_property {
1175
1092
} ;
1176
1093
1177
1094
pub fn XID_Start ( c : char ) -> bool {
1178
- super :: trie_lookup_range_table ( c , XID_Start_table )
1095
+ XID_Start_table . lookup ( c )
1179
1096
}
1180
1097
1181
1098
}
1182
1099
1183
1100
pub mod property {
1184
- pub const Pattern_White_Space_table : & ' static super :: SmallBoolTrie = & super :: SmallBoolTrie {
1101
+ pub const Pattern_White_Space_table : & super :: SmallBoolTrie = & super :: SmallBoolTrie {
1185
1102
r1 : & [
1186
1103
0 , 1 , 2 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
1187
1104
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
@@ -1198,7 +1115,7 @@ pub mod property {
1198
1115
Pattern_White_Space_table . lookup ( c)
1199
1116
}
1200
1117
1201
- pub const White_Space_table : & ' static super :: SmallBoolTrie = & super :: SmallBoolTrie {
1118
+ pub const White_Space_table : & super :: SmallBoolTrie = & super :: SmallBoolTrie {
1202
1119
r1 : & [
1203
1120
0 , 1 , 2 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
1204
1121
1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 , 1 ,
@@ -1238,11 +1155,11 @@ pub mod conversions {
1238
1155
}
1239
1156
}
1240
1157
1241
- fn bsearch_case_table ( c : char , table : & ' static [ ( char , [ char ; 3 ] ) ] ) -> Option < usize > {
1158
+ fn bsearch_case_table ( c : char , table : & [ ( char , [ char ; 3 ] ) ] ) -> Option < usize > {
1242
1159
table. binary_search_by ( |& ( key, _) | key. cmp ( & c) ) . ok ( )
1243
1160
}
1244
1161
1245
- const to_lowercase_table: & ' static [ ( char , [ char ; 3 ] ) ] = & [
1162
+ const to_lowercase_table: & [ ( char , [ char ; 3 ] ) ] = & [
1246
1163
( '\u{41}' , [ '\u{61}' , '\0' , '\0' ] ) , ( '\u{42}' , [ '\u{62}' , '\0' , '\0' ] ) , ( '\u{43}' ,
1247
1164
[ '\u{63}' , '\0' , '\0' ] ) , ( '\u{44}' , [ '\u{64}' , '\0' , '\0' ] ) , ( '\u{45}' , [ '\u{65}' , '\0' ,
1248
1165
'\0' ] ) , ( '\u{46}' , [ '\u{66}' , '\0' , '\0' ] ) , ( '\u{47}' , [ '\u{67}' , '\0' , '\0' ] ) , ( '\u{48}' ,
@@ -1826,7 +1743,7 @@ pub mod conversions {
1826
1743
( '\u{1e920}' , [ '\u{1e942}' , '\0' , '\0' ] ) , ( '\u{1e921}' , [ '\u{1e943}' , '\0' , '\0' ] )
1827
1744
] ;
1828
1745
1829
- const to_uppercase_table: & ' static [ ( char , [ char ; 3 ] ) ] = & [
1746
+ const to_uppercase_table: & [ ( char , [ char ; 3 ] ) ] = & [
1830
1747
( '\u{61}' , [ '\u{41}' , '\0' , '\0' ] ) , ( '\u{62}' , [ '\u{42}' , '\0' , '\0' ] ) , ( '\u{63}' ,
1831
1748
[ '\u{43}' , '\0' , '\0' ] ) , ( '\u{64}' , [ '\u{44}' , '\0' , '\0' ] ) , ( '\u{65}' , [ '\u{45}' , '\0' ,
1832
1749
'\0' ] ) , ( '\u{66}' , [ '\u{46}' , '\0' , '\0' ] ) , ( '\u{67}' , [ '\u{47}' , '\0' , '\0' ] ) , ( '\u{68}' ,
0 commit comments