diff --git a/src/modules/expression/interpolated_region.rs b/src/modules/expression/interpolated_region.rs index a5734412..ddc55998 100644 --- a/src/modules/expression/interpolated_region.rs +++ b/src/modules/expression/interpolated_region.rs @@ -38,44 +38,69 @@ fn is_escaped(word: &str, symbol: char) -> bool { } /// Parse Amber code's escaped strings and reterns it. -fn parse_escaped_string(string: String, region_type: &InterpolatedRegionType) -> String { - let mut chars = string.chars().peekable(); +fn parse_escaped_string(meta: &mut ParserMetadata, string: String, region_type: &InterpolatedRegionType) -> String { + let mut chars = string.chars().enumerate().peekable(); let mut result = String::new(); - while let Some(c) = chars.next() { - if c == '\\' { - match chars.peek() { - Some('\n') => {} - Some('\\') => result.push('\\'), - Some('n') => result.push('\n'), - Some('t') => result.push('\t'), - Some('r') => result.push('\r'), - Some('0') => result.push('\0'), - Some('{') => result.push('{'), - Some('"') => { - if *region_type == InterpolatedRegionType::Text { - result.push('"'); - } else { - result.push(c); - continue; - } - } - Some('$') => { - if *region_type == InterpolatedRegionType::Command { - result.push('$'); - } else { - result.push(c); - continue; + while let Some((_, c)) = chars.next() { + if let Some((i, peek)) = chars.peek() { + match (c, peek) { + ('\\', '\n') => {} + ('\\', '\\') => result.push('\\'), + ('\\', 'n') => result.push('\n'), + ('\\', 't') => result.push('\t'), + ('\\', 'r') => result.push('\r'), + ('\\', '0') => result.push('\0'), + ('\\', '{') => result.push('{'), + ('\\', '"') if *region_type == InterpolatedRegionType::Text => result.push('"'), + ('\\', '$') if *region_type == InterpolatedRegionType::Command => result.push('$'), + ('\\', peek) => { + let warning_msg = format!("Invalid escape sequence '\\{peek}'"); + let pos = PositionInfo::from_token(meta, meta.get_token_at(meta.get_index().saturating_sub(1))); + if let Position::Pos(start_row, start_col) = pos.position { + let mut current_row = start_row; + let mut current_col = start_col; + + // Calculate the absolute position of the 'peek' character, + // accounting for newlines within the token's word. + let mut byte_offset_in_string = 0; + for char_in_string in string.chars() { + if byte_offset_in_string == *i { + break; + } else if char_in_string == '\n' { + current_row += 1; + current_col = 0; + } else { + current_col += char_in_string.len_utf8(); + } + byte_offset_in_string += char_in_string.len_utf8(); + } + + let c_pos = PositionInfo::at_pos(pos.path, (current_row, current_col), 2); + let mut supported_escapes = String::from(r#"\n, \t, \r, \0, \{"#); + if *region_type == InterpolatedRegionType::Text { + supported_escapes.push_str(r#", \""#); + } else if *region_type == InterpolatedRegionType::Command { + supported_escapes.push_str(r#", \$"#); + } + + let message = Message::new_warn_at_position(meta, c_pos) + .message(warning_msg) + .comment(format!("Only these escape sequences are supported: {supported_escapes}")); + meta.add_message(message); } + result.push(c); + continue; } - _ => { + (_, _) => { result.push(c); continue; } } - chars.next(); } else { - result.push(c) + result.push(c); + continue; } + chars.next(); } result } @@ -92,14 +117,14 @@ pub fn parse_interpolated_region(meta: &mut ParserMetadata, interpolated_type: & && !is_escaped(word, letter) }) { let stripped = word.chars().take(word.chars().count() - 1).skip(1).collect::(); - strings.push(parse_escaped_string(stripped, interpolated_type)); + strings.push(parse_escaped_string(meta, stripped, interpolated_type)); Ok((strings, interps)) } else { let mut is_interp = false; // Initialize string let start = token_by(meta, |word| word.starts_with(letter))?; - strings.push(parse_escaped_string(start.chars().skip(1).collect::(), interpolated_type)); + strings.push(parse_escaped_string(meta, start.chars().skip(1).collect::(), interpolated_type)); // Factor rest of the interpolation while let Some(tok) = meta.get_current_token() { // Track interpolations @@ -114,12 +139,12 @@ pub fn parse_interpolated_region(meta: &mut ParserMetadata, interpolated_type: & meta.offset_index(-1); } else { - strings.push(parse_escaped_string(tok.word.clone(), interpolated_type)); + strings.push(parse_escaped_string(meta, tok.word.clone(), interpolated_type)); if tok.word.ends_with(letter) && !is_escaped(&tok.word, letter) { meta.increment_index(); // Right trim the symbol let trimmed = strings.last().unwrap() - .chars().take(parse_escaped_string(tok.word, interpolated_type).chars().count() - 1).collect::(); + .chars().take(parse_escaped_string(meta, tok.word, interpolated_type).chars().count() - 1).collect::(); // replace the last string *strings.last_mut().unwrap() = trimmed; return Ok((strings, interps)) @@ -138,41 +163,42 @@ mod tests { #[test] fn test_parse_escaped_string() { + let mut meta = ParserMetadata::new(Vec::new(), None, None); let text_type = InterpolatedRegionType::Text; let command_type = InterpolatedRegionType::Command; // Test text parsing - assert_eq!(parse_escaped_string("hello".to_string(), &text_type), "hello"); - assert_eq!(parse_escaped_string("\n".to_string(), &text_type), "\n"); - assert_eq!(parse_escaped_string("\t".to_string(), &text_type), "\t"); - assert_eq!(parse_escaped_string("\r".to_string(), &text_type), "\r"); - assert_eq!(parse_escaped_string("\0".to_string(), &text_type), "\0"); - assert_eq!(parse_escaped_string(r#"\\"#.to_string(), &text_type), r#"\"#); - assert_eq!(parse_escaped_string(r#"'"#.to_string(), &text_type), r#"'"#); - assert_eq!(parse_escaped_string(r#"\""#.to_string(), &text_type), r#"""#); - assert_eq!(parse_escaped_string(r#"$"#.to_string(), &text_type), r#"$"#); - assert_eq!(parse_escaped_string(r#"\\$"#.to_string(), &text_type), r#"\$"#); - assert_eq!(parse_escaped_string(r#"\{"#.to_string(), &text_type), r#"{"#); - assert_eq!(parse_escaped_string(r#"\\ "#.to_string(), &text_type), r#"\ "#); - assert_eq!(parse_escaped_string(r#"$\{var}"#.to_string(), &text_type), r#"${var}"#); - assert_eq!(parse_escaped_string(r#"\\$\{var}"#.to_string(), &text_type), r#"\${var}"#); + assert_eq!(parse_escaped_string(&mut meta, "hello".to_string(), &text_type), "hello"); + assert_eq!(parse_escaped_string(&mut meta, "\n".to_string(), &text_type), "\n"); + assert_eq!(parse_escaped_string(&mut meta, "\t".to_string(), &text_type), "\t"); + assert_eq!(parse_escaped_string(&mut meta, "\r".to_string(), &text_type), "\r"); + assert_eq!(parse_escaped_string(&mut meta, "\0".to_string(), &text_type), "\0"); + assert_eq!(parse_escaped_string(&mut meta, r#"\\"#.to_string(), &text_type), r#"\"#); + assert_eq!(parse_escaped_string(&mut meta, r#"'"#.to_string(), &text_type), r#"'"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\""#.to_string(), &text_type), r#"""#); + assert_eq!(parse_escaped_string(&mut meta, r#"$"#.to_string(), &text_type), r#"$"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\\$"#.to_string(), &text_type), r#"\$"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\{"#.to_string(), &text_type), r#"{"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\\ "#.to_string(), &text_type), r#"\ "#); + assert_eq!(parse_escaped_string(&mut meta, r#"$\{var}"#.to_string(), &text_type), r#"${var}"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\\$\{var}"#.to_string(), &text_type), r#"\${var}"#); // Test command parsing - assert_eq!(parse_escaped_string("hello".to_string(), &command_type), "hello"); - assert_eq!(parse_escaped_string("\n".to_string(), &command_type), "\n"); - assert_eq!(parse_escaped_string("\t".to_string(), &command_type), "\t"); - assert_eq!(parse_escaped_string("\r".to_string(), &command_type), "\r"); - assert_eq!(parse_escaped_string("\0".to_string(), &command_type), "\0"); - assert_eq!(parse_escaped_string(r#"\\"#.to_string(), &command_type), r#"\"#); - assert_eq!(parse_escaped_string(r#"""#.to_string(), &command_type), r#"""#); - assert_eq!(parse_escaped_string(r#"\""#.to_string(), &command_type), r#"\""#); - assert_eq!(parse_escaped_string(r#"'"#.to_string(), &command_type), r#"'"#); - assert_eq!(parse_escaped_string(r#"\'"#.to_string(), &command_type), r#"\'"#); - assert_eq!(parse_escaped_string(r#"\$"#.to_string(), &command_type), r#"$"#); - assert_eq!(parse_escaped_string(r#"\\\$"#.to_string(), &command_type), r#"\$"#); - assert_eq!(parse_escaped_string(r#"\{"#.to_string(), &command_type), r#"{"#); - assert_eq!(parse_escaped_string(r#"basename `pwd`"#.to_string(), &command_type), r#"basename `pwd`"#); - assert_eq!(parse_escaped_string(r#"\$\{var}"#.to_string(), &command_type), r#"${var}"#); - assert_eq!(parse_escaped_string(r#"\\\$\{var}"#.to_string(), &command_type), r#"\${var}"#); + assert_eq!(parse_escaped_string(&mut meta, "hello".to_string(), &command_type), "hello"); + assert_eq!(parse_escaped_string(&mut meta, "\n".to_string(), &command_type), "\n"); + assert_eq!(parse_escaped_string(&mut meta, "\t".to_string(), &command_type), "\t"); + assert_eq!(parse_escaped_string(&mut meta, "\r".to_string(), &command_type), "\r"); + assert_eq!(parse_escaped_string(&mut meta, "\0".to_string(), &command_type), "\0"); + assert_eq!(parse_escaped_string(&mut meta, r#"\\"#.to_string(), &command_type), r#"\"#); + assert_eq!(parse_escaped_string(&mut meta, r#"""#.to_string(), &command_type), r#"""#); + assert_eq!(parse_escaped_string(&mut meta, r#"\""#.to_string(), &command_type), r#"\""#); + assert_eq!(parse_escaped_string(&mut meta, r#"'"#.to_string(), &command_type), r#"'"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\'"#.to_string(), &command_type), r#"\'"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\$"#.to_string(), &command_type), r#"$"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\\\$"#.to_string(), &command_type), r#"\$"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\{"#.to_string(), &command_type), r#"{"#); + assert_eq!(parse_escaped_string(&mut meta, r#"basename `pwd`"#.to_string(), &command_type), r#"basename `pwd`"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\$\{var}"#.to_string(), &command_type), r#"${var}"#); + assert_eq!(parse_escaped_string(&mut meta, r#"\\\$\{var}"#.to_string(), &command_type), r#"\${var}"#); } } diff --git a/src/modules/expression/literal/mod.rs b/src/modules/expression/literal/mod.rs index ffa557a7..f82684d6 100644 --- a/src/modules/expression/literal/mod.rs +++ b/src/modules/expression/literal/mod.rs @@ -1,6 +1,3 @@ -use heraclitus_compiler::prelude::*; -use crate::utils::metadata::ParserMetadata; - pub mod bool; pub mod number; pub mod integer; @@ -8,29 +5,3 @@ pub mod text; pub mod null; pub mod array; pub mod status; - -fn validate_text_escape_sequences(meta: &mut ParserMetadata, string_content: &str, start_pos: usize, end_pos: usize) { - let mut chars = string_content.chars().peekable(); - while let Some(c) = chars.next() { - if c == '\\' { - if let Some(&next_char) = chars.peek() { - match next_char { - // Valid escape sequences - 'n' | 't' | 'r' | '0' | '{' | '$' | '\'' | '"' | '\\' => { - chars.next(); // consume the valid escape character - } - // Invalid escape sequences - _ => { - let warning_msg = format!("Invalid escape sequence '\\{next_char}'"); - let pos = PositionInfo::from_between_tokens(meta, meta.get_token_at(start_pos), meta.get_token_at(end_pos)); - let message = Message::new_warn_at_position(meta, pos) - .message(warning_msg) - .comment("Only these escape sequences are supported: \\n, \\t, \\r, \\0, \\{, \\$, \\', \\\", \\\\"); - meta.add_message(message); - chars.next(); // consume the invalid escape character - } - } - } - } - } -} diff --git a/src/modules/expression/literal/text.rs b/src/modules/expression/literal/text.rs index 43d16e25..df289efe 100644 --- a/src/modules/expression/literal/text.rs +++ b/src/modules/expression/literal/text.rs @@ -1,6 +1,5 @@ use heraclitus_compiler::prelude::*; use crate::docs::module::DocumentationModule; -use crate::modules::expression::literal::validate_text_escape_sequences; use crate::modules::prelude::*; use crate::modules::types::{Type, Typed}; use crate::translate::module::TranslateModule; @@ -30,11 +29,7 @@ impl SyntaxModule for Text { } fn parse(&mut self, meta: &mut ParserMetadata) -> SyntaxResult { - let start_pos = meta.get_index(); (self.strings, self.interps) = parse_interpolated_region(meta, &InterpolatedRegionType::Text)?; - for string in self.strings.iter() { - validate_text_escape_sequences(meta, string, start_pos, meta.get_index()); - } Ok(()) } }