Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 84 additions & 59 deletions src/modules/expression/interpolated_region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,36 +38,60 @@ fn is_escaped(word: &str, symbol: char) -> bool {
}

/// Parse Amber code's escaped strings and reterns it.
fn parse_escaped_string(string: String, region_type: &InterpolatedRegionType) -> String {
let mut chars = string.chars().peekable();
fn parse_escaped_string(meta: &mut ParserMetadata, string: String, region_type: &InterpolatedRegionType) -> String {
let mut chars = string.chars().enumerate().peekable();
let mut result = String::new();
while let Some(c) = chars.next() {
while let Some((_, c)) = chars.next() {
if c == '\\' {
match chars.peek() {
Some('\n') => {}
Some('\\') => result.push('\\'),
Some('n') => result.push('\n'),
Some('t') => result.push('\t'),
Some('r') => result.push('\r'),
Some('0') => result.push('\0'),
Some('{') => result.push('{'),
Some('"') => {
if *region_type == InterpolatedRegionType::Text {
result.push('"');
} else {
result.push(c);
continue;
}
}
Some('$') => {
if *region_type == InterpolatedRegionType::Command {
result.push('$');
} else {
result.push(c);
continue;
Some((_, '\n')) => {}
Some((_, '\\')) => result.push('\\'),
Some((_, 'n')) => result.push('\n'),
Some((_, 't')) => result.push('\t'),
Some((_, 'r')) => result.push('\r'),
Some((_, '0')) => result.push('\0'),
Some((_, '{')) => result.push('{'),
Some((_, '"')) if *region_type == InterpolatedRegionType::Text => result.push('"'),
Some((_, '$')) if *region_type == InterpolatedRegionType::Command => result.push('$'),
Some((i, peek)) => {
let warning_msg = format!("Invalid escape sequence '\\{peek}'");
let pos = PositionInfo::from_token(meta, meta.get_token_at(meta.get_index().saturating_sub(1)));
if let Position::Pos(start_row, start_col) = pos.position {
let mut current_row = start_row;
let mut current_col = start_col;

// Calculate the absolute position of the 'peek' character,
// accounting for newlines within the token's word.
let mut byte_offset_in_string = 0;
for char_in_string in string.chars() {
if byte_offset_in_string == *i {
break;
} else if char_in_string == '\n' {
current_row += 1;
current_col = 0;
} else {
current_col += char_in_string.len_utf8();
}
byte_offset_in_string += char_in_string.len_utf8();
}

let c_pos = PositionInfo::at_pos(pos.path, (current_row, current_col), 2);
let mut supported_escapes = String::from(r#"\n, \t, \r, \0, \{"#);
if *region_type == InterpolatedRegionType::Text {
supported_escapes.push_str(r#", \""#);
} else if *region_type == InterpolatedRegionType::Command {
supported_escapes.push_str(r#", \$"#);
}

let message = Message::new_warn_at_position(meta, c_pos)
.message(warning_msg)
.comment(format!("Only these escape sequences are supported: {supported_escapes}"));
meta.add_message(message);
}
result.push(c);
continue;
}
_ => {
None => {
result.push(c);
continue;
}
Expand All @@ -92,14 +116,14 @@ pub fn parse_interpolated_region(meta: &mut ParserMetadata, interpolated_type: &
&& !is_escaped(word, letter)
}) {
let stripped = word.chars().take(word.chars().count() - 1).skip(1).collect::<String>();
strings.push(parse_escaped_string(stripped, interpolated_type));
strings.push(parse_escaped_string(meta, stripped, interpolated_type));
Ok((strings, interps))
}
else {
let mut is_interp = false;
// Initialize string
let start = token_by(meta, |word| word.starts_with(letter))?;
strings.push(parse_escaped_string(start.chars().skip(1).collect::<String>(), interpolated_type));
strings.push(parse_escaped_string(meta, start.chars().skip(1).collect::<String>(), interpolated_type));
// Factor rest of the interpolation
while let Some(tok) = meta.get_current_token() {
// Track interpolations
Expand All @@ -114,12 +138,12 @@ pub fn parse_interpolated_region(meta: &mut ParserMetadata, interpolated_type: &
meta.offset_index(-1);
}
else {
strings.push(parse_escaped_string(tok.word.clone(), interpolated_type));
strings.push(parse_escaped_string(meta, tok.word.clone(), interpolated_type));
if tok.word.ends_with(letter) && !is_escaped(&tok.word, letter) {
meta.increment_index();
// Right trim the symbol
let trimmed = strings.last().unwrap()
.chars().take(parse_escaped_string(tok.word, interpolated_type).chars().count() - 1).collect::<String>();
.chars().take(parse_escaped_string(meta, tok.word, interpolated_type).chars().count() - 1).collect::<String>();
// replace the last string
*strings.last_mut().unwrap() = trimmed;
return Ok((strings, interps))
Expand All @@ -138,41 +162,42 @@ mod tests {

#[test]
fn test_parse_escaped_string() {
let mut meta = ParserMetadata::new(Vec::new(), None, None);
let text_type = InterpolatedRegionType::Text;
let command_type = InterpolatedRegionType::Command;

// Test text parsing
assert_eq!(parse_escaped_string("hello".to_string(), &text_type), "hello");
assert_eq!(parse_escaped_string("\n".to_string(), &text_type), "\n");
assert_eq!(parse_escaped_string("\t".to_string(), &text_type), "\t");
assert_eq!(parse_escaped_string("\r".to_string(), &text_type), "\r");
assert_eq!(parse_escaped_string("\0".to_string(), &text_type), "\0");
assert_eq!(parse_escaped_string(r#"\\"#.to_string(), &text_type), r#"\"#);
assert_eq!(parse_escaped_string(r#"'"#.to_string(), &text_type), r#"'"#);
assert_eq!(parse_escaped_string(r#"\""#.to_string(), &text_type), r#"""#);
assert_eq!(parse_escaped_string(r#"$"#.to_string(), &text_type), r#"$"#);
assert_eq!(parse_escaped_string(r#"\\$"#.to_string(), &text_type), r#"\$"#);
assert_eq!(parse_escaped_string(r#"\{"#.to_string(), &text_type), r#"{"#);
assert_eq!(parse_escaped_string(r#"\\ "#.to_string(), &text_type), r#"\ "#);
assert_eq!(parse_escaped_string(r#"$\{var}"#.to_string(), &text_type), r#"${var}"#);
assert_eq!(parse_escaped_string(r#"\\$\{var}"#.to_string(), &text_type), r#"\${var}"#);
assert_eq!(parse_escaped_string(&mut meta, "hello".to_string(), &text_type), "hello");
assert_eq!(parse_escaped_string(&mut meta, "\n".to_string(), &text_type), "\n");
assert_eq!(parse_escaped_string(&mut meta, "\t".to_string(), &text_type), "\t");
assert_eq!(parse_escaped_string(&mut meta, "\r".to_string(), &text_type), "\r");
assert_eq!(parse_escaped_string(&mut meta, "\0".to_string(), &text_type), "\0");
assert_eq!(parse_escaped_string(&mut meta, r#"\\"#.to_string(), &text_type), r#"\"#);
assert_eq!(parse_escaped_string(&mut meta, r#"'"#.to_string(), &text_type), r#"'"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\""#.to_string(), &text_type), r#"""#);
assert_eq!(parse_escaped_string(&mut meta, r#"$"#.to_string(), &text_type), r#"$"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\\$"#.to_string(), &text_type), r#"\$"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\{"#.to_string(), &text_type), r#"{"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\\ "#.to_string(), &text_type), r#"\ "#);
assert_eq!(parse_escaped_string(&mut meta, r#"$\{var}"#.to_string(), &text_type), r#"${var}"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\\$\{var}"#.to_string(), &text_type), r#"\${var}"#);

// Test command parsing
assert_eq!(parse_escaped_string("hello".to_string(), &command_type), "hello");
assert_eq!(parse_escaped_string("\n".to_string(), &command_type), "\n");
assert_eq!(parse_escaped_string("\t".to_string(), &command_type), "\t");
assert_eq!(parse_escaped_string("\r".to_string(), &command_type), "\r");
assert_eq!(parse_escaped_string("\0".to_string(), &command_type), "\0");
assert_eq!(parse_escaped_string(r#"\\"#.to_string(), &command_type), r#"\"#);
assert_eq!(parse_escaped_string(r#"""#.to_string(), &command_type), r#"""#);
assert_eq!(parse_escaped_string(r#"\""#.to_string(), &command_type), r#"\""#);
assert_eq!(parse_escaped_string(r#"'"#.to_string(), &command_type), r#"'"#);
assert_eq!(parse_escaped_string(r#"\'"#.to_string(), &command_type), r#"\'"#);
assert_eq!(parse_escaped_string(r#"\$"#.to_string(), &command_type), r#"$"#);
assert_eq!(parse_escaped_string(r#"\\\$"#.to_string(), &command_type), r#"\$"#);
assert_eq!(parse_escaped_string(r#"\{"#.to_string(), &command_type), r#"{"#);
assert_eq!(parse_escaped_string(r#"basename `pwd`"#.to_string(), &command_type), r#"basename `pwd`"#);
assert_eq!(parse_escaped_string(r#"\$\{var}"#.to_string(), &command_type), r#"${var}"#);
assert_eq!(parse_escaped_string(r#"\\\$\{var}"#.to_string(), &command_type), r#"\${var}"#);
assert_eq!(parse_escaped_string(&mut meta, "hello".to_string(), &command_type), "hello");
assert_eq!(parse_escaped_string(&mut meta, "\n".to_string(), &command_type), "\n");
assert_eq!(parse_escaped_string(&mut meta, "\t".to_string(), &command_type), "\t");
assert_eq!(parse_escaped_string(&mut meta, "\r".to_string(), &command_type), "\r");
assert_eq!(parse_escaped_string(&mut meta, "\0".to_string(), &command_type), "\0");
assert_eq!(parse_escaped_string(&mut meta, r#"\\"#.to_string(), &command_type), r#"\"#);
assert_eq!(parse_escaped_string(&mut meta, r#"""#.to_string(), &command_type), r#"""#);
assert_eq!(parse_escaped_string(&mut meta, r#"\""#.to_string(), &command_type), r#"\""#);
assert_eq!(parse_escaped_string(&mut meta, r#"'"#.to_string(), &command_type), r#"'"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\'"#.to_string(), &command_type), r#"\'"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\$"#.to_string(), &command_type), r#"$"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\\\$"#.to_string(), &command_type), r#"\$"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\{"#.to_string(), &command_type), r#"{"#);
assert_eq!(parse_escaped_string(&mut meta, r#"basename `pwd`"#.to_string(), &command_type), r#"basename `pwd`"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\$\{var}"#.to_string(), &command_type), r#"${var}"#);
assert_eq!(parse_escaped_string(&mut meta, r#"\\\$\{var}"#.to_string(), &command_type), r#"\${var}"#);
}
}
29 changes: 0 additions & 29 deletions src/modules/expression/literal/mod.rs
Original file line number Diff line number Diff line change
@@ -1,36 +1,7 @@
use heraclitus_compiler::prelude::*;
use crate::utils::metadata::ParserMetadata;

pub mod bool;
pub mod number;
pub mod integer;
pub mod text;
pub mod null;
pub mod array;
pub mod status;

fn validate_text_escape_sequences(meta: &mut ParserMetadata, string_content: &str, start_pos: usize, end_pos: usize) {
let mut chars = string_content.chars().peekable();
while let Some(c) = chars.next() {
if c == '\\' {
if let Some(&next_char) = chars.peek() {
match next_char {
// Valid escape sequences
'n' | 't' | 'r' | '0' | '{' | '$' | '\'' | '"' | '\\' => {
chars.next(); // consume the valid escape character
}
// Invalid escape sequences
_ => {
let warning_msg = format!("Invalid escape sequence '\\{next_char}'");
let pos = PositionInfo::from_between_tokens(meta, meta.get_token_at(start_pos), meta.get_token_at(end_pos));
let message = Message::new_warn_at_position(meta, pos)
.message(warning_msg)
.comment("Only these escape sequences are supported: \\n, \\t, \\r, \\0, \\{, \\$, \\', \\\", \\\\");
meta.add_message(message);
chars.next(); // consume the invalid escape character
}
}
}
}
}
}
5 changes: 0 additions & 5 deletions src/modules/expression/literal/text.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use heraclitus_compiler::prelude::*;
use crate::docs::module::DocumentationModule;
use crate::modules::expression::literal::validate_text_escape_sequences;
use crate::modules::prelude::*;
use crate::modules::types::{Type, Typed};
use crate::translate::module::TranslateModule;
Expand Down Expand Up @@ -30,11 +29,7 @@ impl SyntaxModule<ParserMetadata> for Text {
}

fn parse(&mut self, meta: &mut ParserMetadata) -> SyntaxResult {
let start_pos = meta.get_index();
(self.strings, self.interps) = parse_interpolated_region(meta, &InterpolatedRegionType::Text)?;
for string in self.strings.iter() {
validate_text_escape_sequences(meta, string, start_pos, meta.get_index());
}
Ok(())
}
}
Expand Down