Skip to content

Commit 96c97e5

Browse files
committed
Implement c-str literal parsing
1 parent 5477bd2 commit 96c97e5

File tree

1 file changed

+108
-13
lines changed

1 file changed

+108
-13
lines changed

src/parse.rs

+108-13
Original file line numberDiff line numberDiff line change
@@ -277,9 +277,11 @@ fn leaf_token(input: Cursor) -> PResult<TokenTree> {
277277
}
278278

279279
fn ident(input: Cursor) -> PResult<crate::Ident> {
280-
if ["r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#"]
281-
.iter()
282-
.any(|prefix| input.starts_with(prefix))
280+
if [
281+
"r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
282+
]
283+
.iter()
284+
.any(|prefix| input.starts_with(prefix))
283285
{
284286
Err(Reject)
285287
} else {
@@ -337,6 +339,8 @@ fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
337339
Ok(ok)
338340
} else if let Ok(ok) = byte_string(input) {
339341
Ok(ok)
342+
} else if let Ok(ok) = c_string(input) {
343+
Ok(ok)
340344
} else if let Ok(ok) = byte(input) {
341345
Ok(ok)
342346
} else if let Ok(ok) = character(input) {
@@ -389,9 +393,7 @@ fn cooked_string(input: Cursor) -> Result<Cursor, Reject> {
389393
Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
390394
| Some((_, '\'')) | Some((_, '"')) | Some((_, '0')) => {}
391395
Some((_, 'u')) => {
392-
if !backslash_u(&mut chars) {
393-
break;
394-
}
396+
backslash_u(&mut chars)?;
395397
}
396398
Some((_, ch @ '\n')) | Some((_, ch @ '\r')) => {
397399
let mut last = ch;
@@ -539,6 +541,87 @@ fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
539541
Err(Reject)
540542
}
541543

544+
fn c_string(input: Cursor) -> Result<Cursor, Reject> {
545+
if let Ok(input) = input.parse("c\"") {
546+
cooked_c_string(input)
547+
} else if let Ok(input) = input.parse("cr") {
548+
raw_c_string(input)
549+
} else {
550+
Err(Reject)
551+
}
552+
}
553+
554+
fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
555+
let (input, delimiter) = delimiter_of_raw_string(input)?;
556+
let mut bytes = input.bytes().enumerate();
557+
while let Some((i, byte)) = bytes.next() {
558+
match byte {
559+
b'"' if input.rest[i + 1..].starts_with(delimiter) => {
560+
let rest = input.advance(i + 1 + delimiter.len());
561+
return Ok(literal_suffix(rest));
562+
}
563+
b'\r' => match bytes.next() {
564+
Some((_, b'\n')) => {}
565+
_ => break,
566+
},
567+
b'\0' => break,
568+
_ => {}
569+
}
570+
}
571+
Err(Reject)
572+
}
573+
574+
fn cooked_c_string(input: Cursor) -> Result<Cursor, Reject> {
575+
let mut chars = input.char_indices().peekable();
576+
577+
while let Some((i, ch)) = chars.next() {
578+
match ch {
579+
'"' => {
580+
let input = input.advance(i + 1);
581+
return Ok(literal_suffix(input));
582+
}
583+
'\r' => match chars.next() {
584+
Some((_, '\n')) => {}
585+
_ => break,
586+
},
587+
'\\' => match chars.next() {
588+
Some((_, 'x')) => {
589+
if !backslash_x_nonzero(&mut chars) {
590+
break;
591+
}
592+
}
593+
Some((_, 'n')) | Some((_, 'r')) | Some((_, 't')) | Some((_, '\\'))
594+
| Some((_, '\'')) | Some((_, '"')) => {}
595+
Some((_, 'u')) => {
596+
if backslash_u(&mut chars)? == '\0' {
597+
break;
598+
}
599+
}
600+
Some((_, ch @ '\n')) | Some((_, ch @ '\r')) => {
601+
let mut last = ch;
602+
loop {
603+
if last == '\r' && chars.next().map_or(true, |(_, ch)| ch != '\n') {
604+
return Err(Reject);
605+
}
606+
match chars.peek() {
607+
Some((_, ch @ ' ')) | Some((_, ch @ '\t')) | Some((_, ch @ '\n'))
608+
| Some((_, ch @ '\r')) => {
609+
last = *ch;
610+
chars.next();
611+
}
612+
_ => break,
613+
}
614+
}
615+
}
616+
_ => break,
617+
},
618+
'\0' => break,
619+
_ch => {}
620+
}
621+
}
622+
Err(Reject)
623+
}
624+
542625
fn byte(input: Cursor) -> Result<Cursor, Reject> {
543626
let input = input.parse("b'")?;
544627
let mut bytes = input.bytes().enumerate();
@@ -568,7 +651,7 @@ fn character(input: Cursor) -> Result<Cursor, Reject> {
568651
let ok = match chars.next().map(|(_, ch)| ch) {
569652
Some('\\') => match chars.next().map(|(_, ch)| ch) {
570653
Some('x') => backslash_x_char(&mut chars),
571-
Some('u') => backslash_u(&mut chars),
654+
Some('u') => backslash_u(&mut chars).is_ok(),
572655
Some('n') | Some('r') | Some('t') | Some('\\') | Some('0') | Some('\'') | Some('"') => {
573656
true
574657
}
@@ -614,11 +697,23 @@ where
614697
true
615698
}
616699

617-
fn backslash_u<I>(chars: &mut I) -> bool
700+
fn backslash_x_nonzero<I>(chars: &mut I) -> bool
701+
where
702+
I: Iterator<Item = (usize, char)>,
703+
{
704+
let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
705+
let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
706+
!(first == '0' && second == '0')
707+
}
708+
709+
fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
618710
where
619711
I: Iterator<Item = (usize, char)>,
620712
{
621-
next_ch!(chars @ '{');
713+
match chars.next() {
714+
Some((_, '{')) => {}
715+
_ => return Err(Reject),
716+
}
622717
let mut value = 0;
623718
let mut len = 0;
624719
for (_, ch) in chars {
@@ -627,17 +722,17 @@ where
627722
'a'..='f' => 10 + ch as u8 - b'a',
628723
'A'..='F' => 10 + ch as u8 - b'A',
629724
'_' if len > 0 => continue,
630-
'}' if len > 0 => return char::from_u32(value).is_some(),
631-
_ => return false,
725+
'}' if len > 0 => return char::from_u32(value).ok_or(Reject),
726+
_ => break,
632727
};
633728
if len == 6 {
634-
return false;
729+
break;
635730
}
636731
value *= 0x10;
637732
value += u32::from(digit);
638733
len += 1;
639734
}
640-
false
735+
Err(Reject)
641736
}
642737

643738
fn float(input: Cursor) -> Result<Cursor, Reject> {

0 commit comments

Comments
 (0)