11use memx:: { memeq, memmem} ;
22
3+ /// Lookup table for ASCII case conversion
4+ static ASCII_LOWERCASE : [ u8 ; 256 ] = [
5+ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 ,
6+ 16 , 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 , 25 , 26 , 27 , 28 , 29 , 30 , 31 ,
7+ 32 , 33 , 34 , 35 , 36 , 37 , 38 , 39 , 40 , 41 , 42 , 43 , 44 , 45 , 46 , 47 ,
8+ 48 , 49 , 50 , 51 , 52 , 53 , 54 , 55 , 56 , 57 , 58 , 59 , 60 , 61 , 62 , 63 ,
9+ 64 , 97 , 98 , 99 , 100 , 101 , 102 , 103 , 104 , 105 , 106 , 107 , 108 , 109 , 110 , 111 ,
10+ 112 , 113 , 114 , 115 , 116 , 117 , 118 , 119 , 120 , 121 , 122 , 91 , 92 , 93 , 94 , 95 ,
11+ 96 , 97 , 98 , 99 , 100 , 101 , 102 , 103 , 104 , 105 , 106 , 107 , 108 , 109 , 110 , 111 ,
12+ 112 , 113 , 114 , 115 , 116 , 117 , 118 , 119 , 120 , 121 , 122 , 123 , 124 , 125 , 126 , 127 ,
13+ 128 , 129 , 130 , 131 , 132 , 133 , 134 , 135 , 136 , 137 , 138 , 139 , 140 , 141 , 142 , 143 ,
14+ 144 , 145 , 146 , 147 , 148 , 149 , 150 , 151 , 152 , 153 , 154 , 155 , 156 , 157 , 158 , 159 ,
15+ 160 , 161 , 162 , 163 , 164 , 165 , 166 , 167 , 168 , 169 , 170 , 171 , 172 , 173 , 174 , 175 ,
16+ 176 , 177 , 178 , 179 , 180 , 181 , 182 , 183 , 184 , 185 , 186 , 187 , 188 , 189 , 190 , 191 ,
17+ 192 , 193 , 194 , 195 , 196 , 197 , 198 , 199 , 200 , 201 , 202 , 203 , 204 , 205 , 206 , 207 ,
18+ 208 , 209 , 210 , 211 , 212 , 213 , 214 , 215 , 216 , 217 , 218 , 219 , 220 , 221 , 222 , 223 ,
19+ 224 , 225 , 226 , 227 , 228 , 229 , 230 , 231 , 232 , 233 , 234 , 235 , 236 , 237 , 238 , 239 ,
20+ 240 , 241 , 242 , 243 , 244 , 245 , 246 , 247 , 248 , 249 , 250 , 251 , 252 , 253 , 254 , 255 ,
21+ ] ;
22+
323/// Performs a case-sensitive prefix match using the `memx` crate.
424///
525/// # Arguments
@@ -11,6 +31,10 @@ use memx::{memeq, memmem};
1131/// - `false` otherwise.
1232#[ inline( always) ]
1333pub fn eq_prefix_memx ( addr : & [ u8 ] , pat : & [ u8 ] ) -> bool {
34+ if addr. len ( ) < pat. len ( ) {
35+ return false ;
36+ }
37+
1438 memeq ( & addr[ ..pat. len ( ) ] , pat)
1539}
1640
@@ -25,6 +49,10 @@ pub fn eq_prefix_memx(addr: &[u8], pat: &[u8]) -> bool {
2549/// - `false` otherwise.
2650#[ inline( always) ]
2751pub fn eq_suffix_memx ( addr : & [ u8 ] , pat : & [ u8 ] ) -> bool {
52+ if addr. len ( ) < pat. len ( ) {
53+ return false ;
54+ }
55+
2856 let start = addr. len ( ) - pat. len ( ) ;
2957 memeq ( & addr[ start..] , pat)
3058}
@@ -43,117 +71,143 @@ pub fn contains_memx(addr: &[u8], pat: &[u8]) -> bool {
4371 memmem ( addr, pat) . is_some ( )
4472}
4573
46- /// Performs a case-insensitive prefix match.
74+ /// Simple, fast case-insensitive prefix match.
4775///
4876/// # Arguments
4977/// - `data`: The target byte slice to check.
50- /// - `pattern`: The prefix byte slice to match against.
78+ /// - `pattern`: The prefix byte slice to match against (should be lowercase) .
5179///
5280/// # Returns
5381/// - `true` if the beginning of `data` matches `pattern` (case-insensitively).
5482/// - `false` otherwise.
5583#[ inline( always) ]
5684pub fn eq_prefix_case_insensitive ( data : & [ u8 ] , pattern : & [ u8 ] ) -> bool {
57- if data. len ( ) < pattern. len ( ) {
85+ let pattern_len = pattern. len ( ) ;
86+ if data. len ( ) < pattern_len {
5887 return false ;
5988 }
60-
61- for i in 0 ..pattern. len ( ) {
62- let a = data[ i] ;
63- let b = pattern[ i] ;
64-
65- // Convert `a` to lowercase if it is an uppercase ASCII letter
66- let a = if a. is_ascii_uppercase ( ) {
67- a | 0b00100000
68- } else {
69- a
70- } ;
71-
72- if a != b {
89+
90+ if pattern_len == 0 {
91+ return true ;
92+ }
93+
94+ // Simple, efficient byte-by-byte comparison with lookup table
95+ for i in 0 ..pattern_len {
96+ if ASCII_LOWERCASE [ data[ i] as usize ] != pattern[ i] {
7397 return false ;
7498 }
7599 }
76-
77100 true
78101}
79102
80- /// Performs a case-insensitive suffix match.
103+ /// Simple, fast case-insensitive suffix match.
81104///
82105/// # Arguments
83106/// - `data`: The target byte slice to check.
84- /// - `pattern`: The suffix byte slice to match against.
107+ /// - `pattern`: The suffix byte slice to match against (should be lowercase) .
85108///
86109/// # Returns
87110/// - `true` if the end of `data` matches `pattern` (case-insensitively).
88111/// - `false` otherwise.
89112#[ inline( always) ]
90113pub fn eq_suffix_case_insensitive ( data : & [ u8 ] , pattern : & [ u8 ] ) -> bool {
91- if data. len ( ) < pattern. len ( ) {
114+ let pattern_len = pattern. len ( ) ;
115+ if data. len ( ) < pattern_len {
92116 return false ;
93117 }
118+
119+ if pattern_len == 0 {
120+ return true ;
121+ }
122+
123+ let start = data. len ( ) - pattern_len;
94124
95- let start = data. len ( ) - pattern. len ( ) ;
96- for i in 0 ..pattern. len ( ) {
97- let a = data[ start + i] ;
98- let b = pattern[ i] ;
99-
100- // Convert `a` to lowercase if it is an uppercase ASCII letter
101- let a = if a. is_ascii_uppercase ( ) {
102- a | 0b00100000
103- } else {
104- a
105- } ;
106-
107- if a != b {
125+ // Simple, efficient byte-by-byte comparison with lookup table
126+ for i in 0 ..pattern_len {
127+ if ASCII_LOWERCASE [ data[ start + i] as usize ] != pattern[ i] {
108128 return false ;
109129 }
110130 }
111-
112131 true
113132}
114133
115- /// Performs a case-insensitive substring match (anywhere match).
134+ /// High-performance case-insensitive substring search with adaptive algorithm selection.
135+ /// Uses different algorithms based on pattern length for optimal performance.
116136///
117137/// # Arguments
118138/// - `data`: The target byte slice to check.
119- /// - `pattern`: The byte slice to find within `data`.
139+ /// - `pattern`: The byte slice to find within `data` (should be lowercase) .
120140///
121141/// # Returns
122142/// - `true` if `pattern` is found anywhere within `data` (case-insensitively).
123143/// - `false` otherwise.
124144#[ inline( always) ]
125145pub fn contains_case_insensitive ( data : & [ u8 ] , pattern : & [ u8 ] ) -> bool {
126- if data. len ( ) < pattern. len ( ) {
146+ let data_len = data. len ( ) ;
147+ let pattern_len = pattern. len ( ) ;
148+
149+ if data_len < pattern_len {
127150 return false ;
128151 }
129152
130- let pattern_len = pattern. len ( ) ;
131- let data_len = data. len ( ) ;
153+ if pattern_len == 0 {
154+ return true ;
155+ }
132156
133- for start in 0 ..=( data_len - pattern_len) {
134- let mut found = true ;
157+ // Fast path for single character search - our biggest optimization win
158+ if pattern_len == 1 {
159+ let target = pattern[ 0 ] ;
160+ return data. iter ( ) . any ( |& byte| ASCII_LOWERCASE [ byte as usize ] == target) ;
161+ }
135162
136- for i in 0 ..pattern_len {
137- let a = data[ start + i] ;
138- let b = pattern[ i] ;
163+ // For medium patterns (5-16 bytes), use optimized Boyer-Moore
164+ if pattern_len <= 16 {
165+ // Create bad character table
166+ let mut bad_char = [ pattern_len; 256 ] ;
167+ for ( i, & byte) in pattern. iter ( ) . enumerate ( ) {
168+ bad_char[ byte as usize ] = pattern_len - 1 - i;
169+ }
170+
171+ let mut pos = 0 ;
172+ while pos <= data_len - pattern_len {
173+ let mut j = pattern_len;
139174
140- // Convert `a` to lowercase if it is an uppercase ASCII letter
141- let a = if a. is_ascii_uppercase ( ) {
142- a | 0b00100000
175+ // Check from the end of the pattern
176+ while j > 0 {
177+ j -= 1 ;
178+ if ASCII_LOWERCASE [ data[ pos + j] as usize ] != pattern[ j] {
179+ break ;
180+ }
181+ }
182+
183+ if j == 0 {
184+ return true ; // Match found
185+ }
186+
187+ // Use bad character heuristic to skip positions
188+ let bad_char_skip = if pos + pattern_len - 1 < data_len {
189+ bad_char[ ASCII_LOWERCASE [ data[ pos + pattern_len - 1 ] as usize ] as usize ]
143190 } else {
144- a
191+ 1
145192 } ;
193+ pos += bad_char_skip. max ( 1 ) ;
194+ }
195+
196+ return false ;
197+ }
146198
147- if a != b {
148- found = false ;
149- break ; // Early exit on mismatch
199+ // For very small (2-4 bytes) or very large (more than 16 bytes) patterns, use simple scan
200+ for start in 0 ..=( data_len - pattern_len) {
201+ let mut matches = true ;
202+ for i in 0 ..pattern_len {
203+ if ASCII_LOWERCASE [ data[ start + i] as usize ] != pattern[ i] {
204+ matches = false ;
205+ break ;
150206 }
151207 }
152-
153- if found {
154- return true ; // Return early if a match is found
208+ if matches {
209+ return true ;
155210 }
156211 }
157-
158212 false
159- }
213+ }
0 commit comments