|
1 | 1 | //! Utilities for validating string and char literals and turning them into
|
2 | 2 | //! values they represent.
|
3 | 3 |
|
| 4 | +use std::ffi::CStr; |
4 | 5 | use std::ops::Range;
|
5 | 6 | use std::str::Chars;
|
6 | 7 |
|
@@ -138,37 +139,82 @@ pub fn unescape_for_errors(
|
138 | 139 | /// and produces a sequence of characters or errors,
|
139 | 140 | /// which are returned by invoking `callback`.
|
140 | 141 | /// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
|
141 |
| -pub fn check_raw_str(src: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) { |
142 |
| - check_raw_common(src, Mode::RawStr, &mut callback) |
| 142 | +pub fn check_raw_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) { |
| 143 | + str::check_raw(src, callback); |
143 | 144 | }
|
144 | 145 |
|
145 | 146 | /// Takes the contents of a raw byte string literal (without quotes)
|
146 | 147 | /// and produces a sequence of bytes or errors,
|
147 | 148 | /// which are returned by invoking `callback`.
|
148 | 149 | /// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
|
149 |
| -pub fn check_raw_byte_str( |
150 |
| - src: &str, |
151 |
| - mut callback: impl FnMut(Range<usize>, Result<u8, EscapeError>), |
152 |
| -) { |
153 |
| - check_raw_common(src, Mode::RawByteStr, &mut |r, res| { |
154 |
| - callback(r, res.map(byte_from_char)) |
155 |
| - }) |
| 150 | +pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range<usize>, Result<u8, EscapeError>)) { |
| 151 | + <[u8]>::check_raw(src, callback); |
156 | 152 | }
|
157 | 153 |
|
158 | 154 | /// Takes the contents of a raw C string literal (without quotes)
|
159 | 155 | /// and produces a sequence of characters or errors,
|
160 | 156 | /// which are returned by invoking `callback`.
|
161 | 157 | /// NOTE: Does no escaping, but produces errors for bare carriage return ('\r').
|
162 |
| -pub fn check_raw_c_str( |
163 |
| - src: &str, |
164 |
| - mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>), |
165 |
| -) { |
166 |
| - check_raw_common(src, Mode::RawCStr, &mut |r, mut result| { |
167 |
| - if let Ok('\0') = result { |
168 |
| - result = Err(EscapeError::NulInCStr); |
| 158 | +pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) { |
| 159 | + CStr::check_raw(src, callback); |
| 160 | +} |
| 161 | + |
| 162 | +/// trait for checking raw strings |
| 163 | +trait CheckRaw { |
| 164 | + /// Unit type of the implementing string type (`char` for string, `u8` for byte string) |
| 165 | + type RawUnit; |
| 166 | + |
| 167 | + /// Converts chars to the unit type of the literal type |
| 168 | + fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError>; |
| 169 | + |
| 170 | + /// Takes the contents of a raw literal (without quotes) |
| 171 | + /// and produces a sequence of `Result<Self::RawUnit, EscapeError>` |
| 172 | + /// which are returned via `callback`. |
| 173 | + /// |
| 174 | + /// NOTE: Does no escaping, but produces errors for bare carriage return ('\r'). |
| 175 | + fn check_raw( |
| 176 | + src: &str, |
| 177 | + mut callback: impl FnMut(Range<usize>, Result<Self::RawUnit, EscapeError>), |
| 178 | + ) { |
| 179 | + src.char_indices().for_each(|(pos, c)| { |
| 180 | + callback( |
| 181 | + pos..pos + c.len_utf8(), |
| 182 | + if c == '\r' { |
| 183 | + Err(EscapeError::BareCarriageReturnInRawString) |
| 184 | + } else { |
| 185 | + Self::char2raw_unit(c) |
| 186 | + }, |
| 187 | + ); |
| 188 | + }); |
| 189 | + } |
| 190 | +} |
| 191 | + |
| 192 | +impl CheckRaw for str { |
| 193 | + type RawUnit = char; |
| 194 | + |
| 195 | + fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> { |
| 196 | + Ok(c) |
| 197 | + } |
| 198 | +} |
| 199 | + |
| 200 | +impl CheckRaw for [u8] { |
| 201 | + type RawUnit = u8; |
| 202 | + |
| 203 | + fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> { |
| 204 | + char2byte(c) |
| 205 | + } |
| 206 | +} |
| 207 | + |
| 208 | +impl CheckRaw for CStr { |
| 209 | + type RawUnit = char; |
| 210 | + |
| 211 | + fn char2raw_unit(c: char) -> Result<Self::RawUnit, EscapeError> { |
| 212 | + if c == '\0' { |
| 213 | + Err(EscapeError::NulInCStr) |
| 214 | + } else { |
| 215 | + Ok(c) |
169 | 216 | }
|
170 |
| - callback(r, result) |
171 |
| - }) |
| 217 | + } |
172 | 218 | }
|
173 | 219 |
|
174 | 220 | /// Takes the contents of a string literal (without quotes)
|
@@ -496,34 +542,18 @@ where
|
496 | 542 | *chars = tail.chars();
|
497 | 543 | }
|
498 | 544 |
|
499 |
| -/// Takes a contents of a string literal (without quotes) and produces a |
500 |
| -/// sequence of characters or errors. |
501 |
| -/// NOTE: Raw strings do not perform any explicit character escaping, here we |
502 |
| -/// only produce errors on bare CR. |
503 |
| -fn check_raw_common<F>(src: &str, mode: Mode, callback: &mut F) |
504 |
| -where |
505 |
| - F: FnMut(Range<usize>, Result<char, EscapeError>), |
506 |
| -{ |
507 |
| - let mut chars = src.chars(); |
508 |
| - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop |
509 |
| - |
510 |
| - // The `start` and `end` computation here matches the one in |
511 |
| - // `unescape_non_raw_common` for consistency, even though this function |
512 |
| - // doesn't have to worry about skipping any chars. |
513 |
| - while let Some(c) = chars.next() { |
514 |
| - let start = src.len() - chars.as_str().len() - c.len_utf8(); |
515 |
| - let res = match c { |
516 |
| - '\r' => Err(EscapeError::BareCarriageReturnInRawString), |
517 |
| - _ => ascii_check(c, allow_unicode_chars), |
518 |
| - }; |
519 |
| - let end = src.len() - chars.as_str().len(); |
520 |
| - callback(start..end, res); |
521 |
| - } |
522 |
| -} |
523 |
| - |
524 | 545 | #[inline]
|
525 | 546 | fn byte_from_char(c: char) -> u8 {
|
526 | 547 | let res = c as u32;
|
527 | 548 | debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr");
|
528 | 549 | res as u8
|
529 | 550 | }
|
| 551 | + |
| 552 | +fn char2byte(c: char) -> Result<u8, EscapeError> { |
| 553 | + // do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte) |
| 554 | + if c.is_ascii() { |
| 555 | + Ok(c as u8) |
| 556 | + } else { |
| 557 | + Err(EscapeError::NonAsciiCharInByte) |
| 558 | + } |
| 559 | +} |
0 commit comments