Skip to content

Commit b96dd92

Browse files
committed
New API which does not expose unreachable
The old API exposes `unreachable` in both unescape_unicode and unescape_mixed. These are conceptually one function, but because their return types are incompatible, they could not be unified. The new API takes this insight further to separate unescape_unicode into separate functions, such that byte functions can return bytes instead of chars.
1 parent 6c8dc46 commit b96dd92

File tree

4 files changed

+112
-24
lines changed

4 files changed

+112
-24
lines changed

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "rustc-literal-escaper"
3-
version = "0.0.2"
3+
version = "0.0.3"
44
edition = "2021"
55
description = "Provides code to unescape string literals"
66
license = "Apache-2.0 OR MIT"

src/lib.rs

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,106 @@ impl EscapeError {
8080
}
8181
}
8282

83+
/// Takes the contents of a literal (without quotes)
84+
/// and produces a sequence of errors,
85+
/// which are returned by invoking `error_callback`.
86+
pub fn unescape_for_errors(
87+
src: &str,
88+
mode: Mode,
89+
mut error_callback: impl FnMut(Range<usize>, EscapeError),
90+
) {
91+
match mode {
92+
Char => {
93+
let mut chars = src.chars();
94+
if let Err(e) = unescape_char_or_byte(&mut chars, Mode::Char) {
95+
error_callback(0..(src.len() - chars.as_str().len()), e);
96+
}
97+
}
98+
Byte => {
99+
let mut chars = src.chars();
100+
if let Err(e) = unescape_char_or_byte(&mut chars, Mode::Byte) {
101+
error_callback(0..(src.len() - chars.as_str().len()), e);
102+
}
103+
}
104+
Str => unescape_str(src, |range, res| {
105+
if let Err(e) = res {
106+
error_callback(range, e);
107+
}
108+
}),
109+
ByteStr => unescape_byte_str(src, |range, res| {
110+
if let Err(e) = res {
111+
error_callback(range, e);
112+
}
113+
}),
114+
CStr => unescape_c_str(src, |range, res| {
115+
if let Err(e) = res {
116+
error_callback(range, e);
117+
}
118+
}),
119+
RawStr => check_raw_str(src, |range, res| {
120+
if let Err(e) = res {
121+
error_callback(range, e);
122+
}
123+
}),
124+
RawByteStr => check_raw_byte_str(src, |range, res| {
125+
if let Err(e) = res {
126+
error_callback(range, e);
127+
}
128+
}),
129+
RawCStr => check_raw_c_str(src, |range, res| {
130+
if let Err(e) = res {
131+
error_callback(range, e);
132+
}
133+
}),
134+
}
135+
}
136+
137+
pub fn check_raw_str(src: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
138+
unescape_unicode(src, Mode::RawStr, &mut callback)
139+
}
140+
141+
pub fn check_raw_byte_str(
142+
src: &str,
143+
mut callback: impl FnMut(Range<usize>, Result<u8, EscapeError>),
144+
) {
145+
unescape_unicode(src, Mode::RawByteStr, &mut |r, res| {
146+
callback(r, res.map(byte_from_char))
147+
})
148+
}
149+
150+
pub fn check_raw_c_str(
151+
src: &str,
152+
mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>),
153+
) {
154+
unescape_unicode(src, Mode::RawCStr, &mut callback)
155+
}
156+
157+
pub fn unescape_str(src: &str, mut callback: impl FnMut(Range<usize>, Result<char, EscapeError>)) {
158+
unescape_unicode(src, Mode::Str, &mut callback)
159+
}
160+
161+
pub fn unescape_byte_str(
162+
src: &str,
163+
mut callback: impl FnMut(Range<usize>, Result<u8, EscapeError>),
164+
) {
165+
unescape_unicode(src, Mode::ByteStr, &mut |r, res| {
166+
callback(r, res.map(byte_from_char))
167+
})
168+
}
169+
170+
pub fn unescape_c_str(
171+
src: &str,
172+
mut callback: impl FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
173+
) {
174+
unescape_mixed(src, Mode::CStr, &mut callback)
175+
}
176+
83177
/// Takes the contents of a unicode-only (non-mixed-utf8) literal (without
84178
/// quotes) and produces a sequence of escaped characters or errors.
85179
///
86180
/// Values are returned by invoking `callback`. For `Char` and `Byte` modes,
87181
/// the callback will be called exactly once.
88-
pub fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
182+
fn unescape_unicode<F>(src: &str, mode: Mode, callback: &mut F)
89183
where
90184
F: FnMut(Range<usize>, Result<char, EscapeError>),
91185
{
@@ -146,7 +240,7 @@ impl From<u8> for MixedUnit {
146240
/// a sequence of escaped characters or errors.
147241
///
148242
/// Values are returned by invoking `callback`.
149-
pub fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
243+
fn unescape_mixed<F>(src: &str, mode: Mode, callback: &mut F)
150244
where
151245
F: FnMut(Range<usize>, Result<MixedUnit, EscapeError>),
152246
{
@@ -443,7 +537,7 @@ where
443537
}
444538

445539
#[inline]
446-
pub fn byte_from_char(c: char) -> u8 {
540+
fn byte_from_char(c: char) -> u8 {
447541
let res = c as u32;
448542
debug_assert!(res <= u8::MAX as u32, "guaranteed because of ByteStr");
449543
res as u8

src/tests.rs

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -100,9 +100,7 @@ fn test_unescape_char_good() {
100100
fn test_unescape_str_warn() {
101101
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
102102
let mut unescaped = Vec::with_capacity(literal.len());
103-
unescape_unicode(literal, Mode::Str, &mut |range, res| {
104-
unescaped.push((range, res))
105-
});
103+
unescape_str(literal, |range, res| unescaped.push((range, res)));
106104
assert_eq!(unescaped, expected);
107105
}
108106

@@ -132,7 +130,7 @@ fn test_unescape_str_warn() {
132130
fn test_unescape_str_good() {
133131
fn check(literal_text: &str, expected: &str) {
134132
let mut buf = Ok(String::with_capacity(literal_text.len()));
135-
unescape_unicode(literal_text, Mode::Str, &mut |range, c| {
133+
unescape_str(literal_text, |range, c| {
136134
if let Ok(b) = &mut buf {
137135
match c {
138136
Ok(c) => b.push(c),
@@ -248,16 +246,16 @@ fn test_unescape_byte_good() {
248246
#[test]
249247
fn test_unescape_byte_str_good() {
250248
fn check(literal_text: &str, expected: &[u8]) {
251-
let mut buf = Ok(Vec::with_capacity(literal_text.len()));
252-
unescape_unicode(literal_text, Mode::ByteStr, &mut |range, c| {
253-
if let Ok(b) = &mut buf {
254-
match c {
255-
Ok(c) => b.push(byte_from_char(c)),
256-
Err(e) => buf = Err((range, e)),
249+
let mut result = Ok(Vec::with_capacity(literal_text.len()));
250+
unescape_byte_str(literal_text, |range, res| {
251+
if let Ok(buf) = &mut result {
252+
match res {
253+
Ok(b) => buf.push(b),
254+
Err(e) => result = Err((range, e)),
257255
}
258256
}
259257
});
260-
assert_eq!(buf.as_deref(), Ok(expected))
258+
assert_eq!(result.as_deref(), Ok(expected))
261259
}
262260

263261
check("foo", b"foo");
@@ -272,9 +270,7 @@ fn test_unescape_byte_str_good() {
272270
fn test_unescape_raw_str() {
273271
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
274272
let mut unescaped = Vec::with_capacity(literal.len());
275-
unescape_unicode(literal, Mode::RawStr, &mut |range, res| {
276-
unescaped.push((range, res))
277-
});
273+
check_raw_str(literal, |range, res| unescaped.push((range, res)));
278274
assert_eq!(unescaped, expected);
279275
}
280276

@@ -293,11 +289,9 @@ fn test_unescape_raw_str() {
293289

294290
#[test]
295291
fn test_unescape_raw_byte_str() {
296-
fn check(literal: &str, expected: &[(Range<usize>, Result<char, EscapeError>)]) {
292+
fn check(literal: &str, expected: &[(Range<usize>, Result<u8, EscapeError>)]) {
297293
let mut unescaped = Vec::with_capacity(literal.len());
298-
unescape_unicode(literal, Mode::RawByteStr, &mut |range, res| {
299-
unescaped.push((range, res))
300-
});
294+
check_raw_byte_str(literal, |range, res| unescaped.push((range, res)));
301295
assert_eq!(unescaped, expected);
302296
}
303297

@@ -310,7 +304,7 @@ fn test_unescape_raw_byte_str() {
310304
"🦀a",
311305
&[
312306
(0..4, Err(EscapeError::NonAsciiCharInByte)),
313-
(4..5, Ok('a')),
307+
(4..5, Ok(b'a')),
314308
],
315309
);
316310
}

0 commit comments

Comments
 (0)