diff --git a/CHANGELOG.md b/CHANGELOG.md index 133c13c..40feaf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +# 0.0.3 + +- Add `check_raw_str`, `check_raw_byte_str`, `check_raw_c_str`, +- Add `unescape_str`, `unescape_byte_str`, `unescape_c_str`, +- Add `unescape_for_errors`, +- Remove: `unescape_unicode` and `unescape_mixed` + # 0.0.2 - Add new `rustc-dep-of-std` feature to allow building `libproc-macro` diff --git a/Cargo.lock b/Cargo.lock index b613e3c..3b2777a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,7 +4,7 @@ version = 4 [[package]] name = "rustc-literal-escaper" -version = "0.0.2" +version = "0.0.3" dependencies = [ "rustc-std-workspace-std", ] diff --git a/Cargo.toml b/Cargo.toml index ffb9a69..d0eb307 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rustc-literal-escaper" -version = "0.0.2" +version = "0.0.3" edition = "2021" description = "Provides code to unescape string literals" license = "Apache-2.0 OR MIT" diff --git a/src/lib.rs b/src/lib.rs index cd6faff..1e0b4fd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,11 +1,10 @@ //! Utilities for validating string and char literals and turning them into //! values they represent. +use std::ffi::CStr; use std::ops::Range; use std::str::Chars; -use Mode::*; - #[cfg(test)] mod tests; @@ -80,33 +79,133 @@ impl EscapeError { } } -/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without -/// quotes) and produces a sequence of escaped characters or errors. -/// -/// Values are returned by invoking `callback`. For `Char` and `Byte` modes, -/// the callback will be called exactly once. -pub fn unescape_unicode(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - match mode { - Char | Byte => { - let mut chars = src.chars(); - let res = unescape_char_or_byte(&mut chars, mode); - callback(0..(src.len() - chars.as_str().len()), res); +/// Takes the contents of a raw string literal (without quotes) +/// and produces a sequence of characters or errors, +/// which are returned by invoking `callback`. +/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r'). +pub fn check_raw_str(src: &str, callback: impl FnMut(Range, Result)) { + str::check_raw(src, callback); +} + +/// Takes the contents of a raw byte string literal (without quotes) +/// and produces a sequence of bytes or errors, +/// which are returned by invoking `callback`. +/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r'). +pub fn check_raw_byte_str(src: &str, callback: impl FnMut(Range, Result)) { + <[u8]>::check_raw(src, callback); +} + +/// Takes the contents of a raw C string literal (without quotes) +/// and produces a sequence of characters or errors, +/// which are returned by invoking `callback`. +/// NOTE: Does no escaping, but produces errors for bare carriage return ('\r'). +pub fn check_raw_c_str(src: &str, callback: impl FnMut(Range, Result)) { + CStr::check_raw(src, callback); +} + +/// trait for checking raw strings +trait CheckRaw { + /// Unit type of the implementing string type (`char` for string, `u8` for byte string) + type RawUnit; + + /// Converts chars to the unit type of the literal type + fn char2raw_unit(c: char) -> Result; + + /// Takes the contents of a raw literal (without quotes) + /// and produces a sequence of `Result` + /// which are returned via `callback`. + /// + /// NOTE: Does no escaping, but produces errors for bare carriage return ('\r'). + fn check_raw( + src: &str, + mut callback: impl FnMut(Range, Result), + ) { + src.char_indices().for_each(|(pos, c)| { + callback( + pos..pos + c.len_utf8(), + if c == '\r' { + Err(EscapeError::BareCarriageReturnInRawString) + } else { + Self::char2raw_unit(c) + }, + ); + }); + } +} + +impl CheckRaw for str { + type RawUnit = char; + + fn char2raw_unit(c: char) -> Result { + Ok(c) + } +} + +impl CheckRaw for [u8] { + type RawUnit = u8; + + fn char2raw_unit(c: char) -> Result { + char2byte(c) + } +} + +fn char2byte(c: char) -> Result { + // do NOT do: c.try_into().ok_or(EscapeError::NonAsciiCharInByte) + if c.is_ascii() { + Ok(c as u8) + } else { + Err(EscapeError::NonAsciiCharInByte) + } +} + +impl CheckRaw for CStr { + type RawUnit = char; + + fn char2raw_unit(c: char) -> Result { + if c == '\0' { + Err(EscapeError::NulInCStr) + } else { + Ok(c) } - Str | ByteStr => unescape_non_raw_common(src, mode, callback), - RawStr | RawByteStr => check_raw_common(src, mode, callback), - RawCStr => check_raw_common(src, mode, &mut |r, mut result| { - if let Ok('\0') = result { - result = Err(EscapeError::NulInCStr); - } - callback(r, result) - }), - CStr => unreachable!(), } } +/// Takes the contents of a char literal (without quotes), +/// and returns an unescaped char or an error. +pub fn unescape_char(src: &str) -> Result { + str::unescape_single(&mut src.chars()) +} + +/// Takes the contents of a byte literal (without quotes), +/// and returns an unescaped byte or an error. +pub fn unescape_byte(src: &str) -> Result { + <[u8]>::unescape_single(&mut src.chars()) +} + +/// Takes the contents of a string literal (without quotes) +/// and produces a sequence of escaped characters or errors, +/// which are returned by invoking `callback`. +pub fn unescape_str(src: &str, callback: impl FnMut(Range, Result)) { + str::unescape(src, callback) +} + +/// Takes the contents of a byte string literal (without quotes) +/// and produces a sequence of escaped bytes or errors, +/// which are returned by invoking `callback`. +pub fn unescape_byte_str(src: &str, callback: impl FnMut(Range, Result)) { + <[u8]>::unescape(src, callback) +} + +/// Takes the contents of a C string literal (without quotes) +/// and produces a sequence of escaped MixedUnits or errors, +/// which are returned by invoking `callback`. +pub fn unescape_c_str( + src: &str, + callback: impl FnMut(Range, Result), +) { + CStr::unescape(src, callback) +} + /// Used for mixed utf8 string literals, i.e. those that allow both unicode /// chars and high bytes. pub enum MixedUnit { @@ -142,145 +241,125 @@ impl From for MixedUnit { } } -/// Takes the contents of a mixed-utf8 literal (without quotes) and produces -/// a sequence of escaped characters or errors. -/// -/// Values are returned by invoking `callback`. -pub fn unescape_mixed(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - match mode { - CStr => unescape_non_raw_common(src, mode, &mut |r, mut result| { - if let Ok(MixedUnit::Char('\0')) = result { - result = Err(EscapeError::NulInCStr); - } - callback(r, result) - }), - Char | Byte | Str | RawStr | ByteStr | RawByteStr | RawCStr => unreachable!(), - } -} - -/// Takes a contents of a char literal (without quotes), and returns an -/// unescaped char or an error. -pub fn unescape_char(src: &str) -> Result { - unescape_char_or_byte(&mut src.chars(), Char) -} +/// trait for unescaping escape sequences in strings +trait Unescape { + /// Unit type of the implementing string type (`char` for string, `u8` for byte string) + type Unit: From; -/// Takes a contents of a byte literal (without quotes), and returns an -/// unescaped byte or an error. -pub fn unescape_byte(src: &str) -> Result { - unescape_char_or_byte(&mut src.chars(), Byte).map(byte_from_char) -} - -/// What kind of literal do we parse. -#[derive(Debug, Clone, Copy, PartialEq)] -pub enum Mode { - Char, + /// Result of unescaping the zero char ('\0') + const ZERO_RESULT: Result; - Byte, + /// Converts chars to the unit type + fn char2unit(c: char) -> Result; - Str, - RawStr, + /// Converts the byte of a hex escape to the unit type + fn hex2unit(b: u8) -> Result; - ByteStr, - RawByteStr, - - CStr, - RawCStr, -} - -impl Mode { - pub fn in_double_quotes(self) -> bool { - match self { - Str | RawStr | ByteStr | RawByteStr | CStr | RawCStr => true, - Char | Byte => false, - } - } - - /// Are `\x80`..`\xff` allowed? - fn allow_high_bytes(self) -> bool { - match self { - Char | Str => false, - Byte | ByteStr | CStr => true, - RawStr | RawByteStr | RawCStr => unreachable!(), - } - } + /// Converts the result of a unicode escape to the unit type + fn unicode2unit(r: Result) -> Result; - /// Are unicode (non-ASCII) chars allowed? - #[inline] - fn allow_unicode_chars(self) -> bool { - match self { - Byte | ByteStr | RawByteStr => false, - Char | Str | RawStr | CStr | RawCStr => true, + /// Unescape a single unit (single quote syntax) + fn unescape_single(chars: &mut Chars<'_>) -> Result { + let res = match chars.next().ok_or(EscapeError::ZeroChars)? { + '\\' => Self::unescape_1(chars), + '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), + '\r' => Err(EscapeError::BareCarriageReturn), + c => Self::char2unit(c), + }?; + if chars.next().is_some() { + return Err(EscapeError::MoreThanOneChar); } + Ok(res) } - /// Are unicode escapes (`\u`) allowed? - fn allow_unicode_escapes(self) -> bool { - match self { - Byte | ByteStr => false, - Char | Str | CStr => true, - RawByteStr | RawStr | RawCStr => unreachable!(), + /// Unescape the first unit of a string (double quoted syntax) + fn unescape_1(chars: &mut Chars<'_>) -> Result { + // Previous character was '\\', unescape what follows. + let c = chars.next().ok_or(EscapeError::LoneSlash)?; + if c == '0' { + Self::ZERO_RESULT + } else { + simple_escape(c).map(|b| b.into()).or_else(|c| match c { + 'x' => Self::hex2unit(hex_escape(chars)?), + 'u' => Self::unicode2unit({ + let value = unicode_escape(chars)?; + if value > char::MAX as u32 { + Err(EscapeError::OutOfRangeUnicodeEscape) + } else { + char::from_u32(value).ok_or(EscapeError::LoneSurrogateUnicodeEscape) + } + }), + _ => Err(EscapeError::InvalidEscape), + }) } } - pub fn prefix_noraw(self) -> &'static str { - match self { - Char | Str | RawStr => "", - Byte | ByteStr | RawByteStr => "b", - CStr | RawCStr => "c", + /// Takes the contents of a raw literal (without quotes) + /// and produces a sequence of `Result` + /// which are returned via `callback`. + fn unescape( + src: &str, + mut callback: impl FnMut(Range, Result), + ) { + let mut chars = src.chars(); + while let Some(c) = chars.next() { + let start = src.len() - chars.as_str().len() - c.len_utf8(); + let res = match c { + '\\' => { + if let Some(b'\n') = chars.as_str().as_bytes().first() { + let _ = chars.next(); + // skip whitespace for backslash newline, see [Rust language reference] + // (https://doc.rust-lang.org/reference/tokens.html#string-literals). + let mut callback_err = |range, err| callback(range, Err(err)); + skip_ascii_whitespace(&mut chars, start, &mut callback_err); + continue; + } else { + Self::unescape_1(&mut chars) + } + } + '"' => Err(EscapeError::EscapeOnlyChar), + '\r' => Err(EscapeError::BareCarriageReturn), + c => Self::char2unit(c), + }; + let end = src.len() - chars.as_str().len(); + callback(start..end, res); } } } -fn scan_escape + From>( - chars: &mut Chars<'_>, - mode: Mode, -) -> Result { +/// Parse the character of an ASCII escape (except nul) without the leading backslash. +fn simple_escape(c: char) -> Result { // Previous character was '\\', unescape what follows. - let res: char = match chars.next().ok_or(EscapeError::LoneSlash)? { - '"' => '"', - 'n' => '\n', - 'r' => '\r', - 't' => '\t', - '\\' => '\\', - '\'' => '\'', - '0' => '\0', - 'x' => { - // Parse hexadecimal character code. - - let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?; - let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; - - let value = (hi * 16 + lo) as u8; - - return if !mode.allow_high_bytes() && !value.is_ascii() { - Err(EscapeError::OutOfRangeHexEscape) - } else { - // This may be a high byte, but that will only happen if `T` is - // `MixedUnit`, because of the `allow_high_bytes` check above. - Ok(T::from(value)) - }; - } - 'u' => return scan_unicode(chars, mode.allow_unicode_escapes()).map(T::from), - _ => return Err(EscapeError::InvalidEscape), - }; - Ok(T::from(res)) + Ok(match c { + '"' => b'"', + 'n' => b'\n', + 'r' => b'\r', + 't' => b'\t', + '\\' => b'\\', + '\'' => b'\'', + _ => Err(c)?, + }) } -fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result { - // We've parsed '\u', now we have to parse '{..}'. +/// Parse the two hexadecimal characters of a hexadecimal escape without the leading r"\x". +fn hex_escape(chars: &mut impl Iterator) -> Result { + let hi = chars.next().ok_or(EscapeError::TooShortHexEscape)?; + let hi = hi.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; + + let lo = chars.next().ok_or(EscapeError::TooShortHexEscape)?; + let lo = lo.to_digit(16).ok_or(EscapeError::InvalidCharInHexEscape)?; + Ok((hi * 16 + lo) as u8) +} + +/// Parse the braces with hexadecimal characters (and underscores) part of a unicode escape. +/// This r"{...}" normally comes after r"\u" and cannot start with an underscore. +fn unicode_escape(chars: &mut impl Iterator) -> Result { if chars.next() != Some('{') { return Err(EscapeError::NoBraceInUnicodeEscape); } // First character must be a hexadecimal digit. - let mut n_digits = 1; let mut value: u32 = match chars.next().ok_or(EscapeError::UnclosedUnicodeEscape)? { '_' => return Err(EscapeError::LeadingUnderscoreUnicodeEscape), '}' => return Err(EscapeError::EmptyUnicodeEscape), @@ -291,28 +370,19 @@ fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result return Err(EscapeError::UnclosedUnicodeEscape), Some('_') => continue, Some('}') => { - if n_digits > 6 { - return Err(EscapeError::OverlongUnicodeEscape); - } - // Incorrect syntax has higher priority for error reporting // than unallowed value for a literal. - if !allow_unicode_escapes { - return Err(EscapeError::UnicodeEscapeInByte); - } - - break std::char::from_u32(value).ok_or({ - if value > 0x10FFFF { - EscapeError::OutOfRangeUnicodeEscape - } else { - EscapeError::LoneSurrogateUnicodeEscape - } - }); + return if n_digits > 6 { + Err(EscapeError::OverlongUnicodeEscape) + } else { + Ok(value) + }; } Some(c) => { let digit: u32 = c @@ -329,122 +399,196 @@ fn scan_unicode(chars: &mut Chars<'_>, allow_unicode_escapes: bool) -> Result Result { - if allow_unicode_chars || c.is_ascii() { - Ok(c) - } else { - Err(EscapeError::NonAsciiCharInByte) +/// Skip ASCII whitespace, except for the formfeed character +/// (see [this issue](https://github.com/rust-lang/rust/issues/136600)). +/// Warns on unescaped newline and following non-ASCII whitespace. +fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) +where + F: FnMut(Range, EscapeError), +{ + let rest = chars.as_str(); + let first_non_space = rest + .bytes() + .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') + .unwrap_or(rest.len()); + let (space, rest) = rest.split_at(first_non_space); + // backslash newline adds 2 bytes + let end = start + 2 + first_non_space; + if space.contains('\n') { + callback(start..end, EscapeError::MultipleSkippedLinesWarning); + } + *chars = rest.chars(); + if let Some(c) = chars.clone().next() { + if c.is_whitespace() { + // for error reporting, include the character that was not skipped in the span + callback( + start..end + c.len_utf8(), + EscapeError::UnskippedWhitespaceWarning, + ); + } } } -fn unescape_char_or_byte(chars: &mut Chars<'_>, mode: Mode) -> Result { - let c = chars.next().ok_or(EscapeError::ZeroChars)?; - let res = match c { - '\\' => scan_escape(chars, mode), - '\n' | '\t' | '\'' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, mode.allow_unicode_chars()), - }?; - if chars.next().is_some() { - return Err(EscapeError::MoreThanOneChar); +impl Unescape for str { + type Unit = char; + + const ZERO_RESULT: Result = Ok('\0'); + + fn char2unit(c: char) -> Result { + Ok(c) + } + + fn hex2unit(b: u8) -> Result { + if b.is_ascii() { + Ok(b as char) + } else { + Err(EscapeError::OutOfRangeHexEscape) + } + } + + /// Converts the result of a unicode escape to the unit type + fn unicode2unit(r: Result) -> Result { + r } - Ok(res) } -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of escaped characters or errors. -fn unescape_non_raw_common + From>(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - let mut chars = src.chars(); - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop - - // The `start` and `end` computation here is complicated because - // `skip_ascii_whitespace` makes us to skip over chars without counting - // them in the range computation. - while let Some(c) = chars.next() { - let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { - '\\' => { - match chars.clone().next() { - Some('\n') => { - // Rust language specification requires us to skip whitespaces - // if unescaped '\' character is followed by '\n'. - // For details see [Rust language reference] - // (https://doc.rust-lang.org/reference/tokens.html#string-literals). - skip_ascii_whitespace(&mut chars, start, &mut |range, err| { - callback(range, Err(err)) - }); - continue; - } - _ => scan_escape::(&mut chars, mode), - } - } - '"' => Err(EscapeError::EscapeOnlyChar), - '\r' => Err(EscapeError::BareCarriageReturn), - _ => ascii_check(c, allow_unicode_chars).map(T::from), - }; - let end = src.len() - chars.as_str().len(); - callback(start..end, res); +impl Unescape for [u8] { + type Unit = u8; + + const ZERO_RESULT: Result = Ok(b'\0'); + + fn char2unit(c: char) -> Result { + char2byte(c) + } + + fn hex2unit(b: u8) -> Result { + Ok(b) + } + + /// Converts the result of a unicode escape to the unit type + fn unicode2unit(_r: Result) -> Result { + Err(EscapeError::UnicodeEscapeInByte) } } -fn skip_ascii_whitespace(chars: &mut Chars<'_>, start: usize, callback: &mut F) -where - F: FnMut(Range, EscapeError), -{ - let tail = chars.as_str(); - let first_non_space = tail - .bytes() - .position(|b| b != b' ' && b != b'\t' && b != b'\n' && b != b'\r') - .unwrap_or(tail.len()); - if tail[1..first_non_space].contains('\n') { - // The +1 accounts for the escaping slash. - let end = start + first_non_space + 1; - callback(start..end, EscapeError::MultipleSkippedLinesWarning); +impl Unescape for CStr { + type Unit = MixedUnit; + + const ZERO_RESULT: Result = Err(EscapeError::NulInCStr); + + fn char2unit(c: char) -> Result { + if c == '\0' { + Err(EscapeError::NulInCStr) + } else { + Ok(MixedUnit::Char(c)) + } } - let tail = &tail[first_non_space..]; - if let Some(c) = tail.chars().next() { - if c.is_whitespace() { - // For error reporting, we would like the span to contain the character that was not - // skipped. The +1 is necessary to account for the leading \ that started the escape. - let end = start + first_non_space + c.len_utf8() + 1; - callback(start..end, EscapeError::UnskippedWhitespaceWarning); + + fn hex2unit(byte: u8) -> Result { + if byte == b'\0' { + Err(EscapeError::NulInCStr) + } else if byte.is_ascii() { + Ok(MixedUnit::Char(byte as char)) + } else { + Ok(MixedUnit::HighByte(byte)) } } - *chars = tail.chars(); + + /// Converts the result of a unicode escape to the unit type + fn unicode2unit(r: Result) -> Result { + Self::char2unit(r?) + } } -/// Takes a contents of a string literal (without quotes) and produces a -/// sequence of characters or errors. -/// NOTE: Raw strings do not perform any explicit character escaping, here we -/// only produce errors on bare CR. -fn check_raw_common(src: &str, mode: Mode, callback: &mut F) -where - F: FnMut(Range, Result), -{ - let mut chars = src.chars(); - let allow_unicode_chars = mode.allow_unicode_chars(); // get this outside the loop - - // The `start` and `end` computation here matches the one in - // `unescape_non_raw_common` for consistency, even though this function - // doesn't have to worry about skipping any chars. - while let Some(c) = chars.next() { - let start = src.len() - chars.as_str().len() - c.len_utf8(); - let res = match c { - '\r' => Err(EscapeError::BareCarriageReturnInRawString), - _ => ascii_check(c, allow_unicode_chars), - }; - let end = src.len() - chars.as_str().len(); - callback(start..end, res); +/// What kind of literal do we parse. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Mode { + Char, + + Byte, + + Str, + RawStr, + + ByteStr, + RawByteStr, + + CStr, + RawCStr, +} + +impl Mode { + pub fn in_double_quotes(self) -> bool { + match self { + Mode::Str + | Mode::RawStr + | Mode::ByteStr + | Mode::RawByteStr + | Mode::CStr + | Mode::RawCStr => true, + Mode::Char | Mode::Byte => false, + } + } + + pub fn prefix_noraw(self) -> &'static str { + match self { + Mode::Char | Mode::Str | Mode::RawStr => "", + Mode::Byte | Mode::ByteStr | Mode::RawByteStr => "b", + Mode::CStr | Mode::RawCStr => "c", + } } } -#[inline] -pub fn byte_from_char(c: char) -> u8 { - let res = c as u32; - debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr"); - res as u8 +/// Takes the contents of a literal (without quotes) +/// and produces a sequence of errors, +/// which are returned by invoking `error_callback`. +pub fn unescape_for_errors( + src: &str, + mode: Mode, + mut error_callback: impl FnMut(Range, EscapeError), +) { + match mode { + Mode::Char => { + let mut chars = src.chars(); + if let Err(e) = str::unescape_single(&mut chars) { + error_callback(0..(src.len() - chars.as_str().len()), e); + } + } + Mode::Byte => { + let mut chars = src.chars(); + if let Err(e) = <[u8]>::unescape_single(&mut chars) { + error_callback(0..(src.len() - chars.as_str().len()), e); + } + } + Mode::Str => unescape_str(src, |range, res| { + if let Err(e) = res { + error_callback(range, e); + } + }), + Mode::ByteStr => unescape_byte_str(src, |range, res| { + if let Err(e) = res { + error_callback(range, e); + } + }), + Mode::CStr => unescape_c_str(src, |range, res| { + if let Err(e) = res { + error_callback(range, e); + } + }), + Mode::RawStr => check_raw_str(src, |range, res| { + if let Err(e) = res { + error_callback(range, e); + } + }), + Mode::RawByteStr => check_raw_byte_str(src, |range, res| { + if let Err(e) = res { + error_callback(range, e); + } + }), + Mode::RawCStr => check_raw_c_str(src, |range, res| { + if let Err(e) = res { + error_callback(range, e); + } + }), + } } diff --git a/src/tests.rs b/src/tests.rs index a4bbdc0..a13d8a5 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -100,9 +100,7 @@ fn test_unescape_char_good() { fn test_unescape_str_warn() { fn check(literal: &str, expected: &[(Range, Result)]) { let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::Str, &mut |range, res| { - unescaped.push((range, res)) - }); + unescape_str(literal, |range, res| unescaped.push((range, res))); assert_eq!(unescaped, expected); } @@ -132,7 +130,7 @@ fn test_unescape_str_warn() { fn test_unescape_str_good() { fn check(literal_text: &str, expected: &str) { let mut buf = Ok(String::with_capacity(literal_text.len())); - unescape_unicode(literal_text, Mode::Str, &mut |range, c| { + unescape_str(literal_text, |range, c| { if let Ok(b) = &mut buf { match c { Ok(c) => b.push(c), @@ -248,16 +246,16 @@ fn test_unescape_byte_good() { #[test] fn test_unescape_byte_str_good() { fn check(literal_text: &str, expected: &[u8]) { - let mut buf = Ok(Vec::with_capacity(literal_text.len())); - unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| { - if let Ok(b) = &mut buf { - match c { - Ok(c) => b.push(byte_from_char(c)), - Err(e) => buf = Err((range, e)), + let mut result = Ok(Vec::with_capacity(literal_text.len())); + unescape_byte_str(literal_text, |range, res| { + if let Ok(buf) = &mut result { + match res { + Ok(b) => buf.push(b), + Err(e) => result = Err((range, e)), } } }); - assert_eq!(buf.as_deref(), Ok(expected)) + assert_eq!(result.as_deref(), Ok(expected)) } check("foo", b"foo"); @@ -272,9 +270,7 @@ fn test_unescape_byte_str_good() { fn test_unescape_raw_str() { fn check(literal: &str, expected: &[(Range, Result)]) { let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::RawStr, &mut |range, res| { - unescaped.push((range, res)) - }); + check_raw_str(literal, |range, res| unescaped.push((range, res))); assert_eq!(unescaped, expected); } @@ -293,11 +289,9 @@ fn test_unescape_raw_str() { #[test] fn test_unescape_raw_byte_str() { - fn check(literal: &str, expected: &[(Range, Result)]) { + fn check(literal: &str, expected: &[(Range, Result)]) { let mut unescaped = Vec::with_capacity(literal.len()); - unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| { - unescaped.push((range, res)) - }); + check_raw_byte_str(literal, |range, res| unescaped.push((range, res))); assert_eq!(unescaped, expected); } @@ -310,7 +304,7 @@ fn test_unescape_raw_byte_str() { "🦀a", &[ (0..4, Err(EscapeError::NonAsciiCharInByte)), - (4..5, Ok('a')), + (4..5, Ok(b'a')), ], ); }