Skip to content
81 changes: 77 additions & 4 deletions gcode/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub(crate) enum TokenType {
Letter,
Number,
Comment,
Newline,
Unknown,
}

Expand All @@ -16,6 +17,8 @@ impl From<char> for TokenType {
TokenType::Number
} else if c == '(' || c == ';' || c == ')' {
TokenType::Comment
} else if c == '\n' {
TokenType::Newline
} else {
TokenType::Unknown
}
Expand Down Expand Up @@ -53,14 +56,14 @@ impl<'input> Lexer<'input> {
{
let start = self.current_position;
let mut end = start;
let mut line_endings = 0;

for letter in self.rest().chars() {
if !predicate(letter) {
break;
}
if letter == '\n' {
line_endings += 1;
// Newline defines the command to be complete.
break;
}
end += letter.len_utf8();
}
Expand All @@ -69,7 +72,6 @@ impl<'input> Lexer<'input> {
None
} else {
self.current_position = end;
self.current_line += line_endings;
Some(&self.src[start..end])
}
}
Expand Down Expand Up @@ -175,6 +177,23 @@ impl<'input> Lexer<'input> {
},
})
}

fn tokenize_newline(&mut self) -> Option<Token<'input>> {
let start = self.current_position;
let line = self.current_line;
let value = "\n";
self.current_position += 1;
self.current_line += 1;
Some(Token {
kind: TokenType::Newline,
value,
span: Span {
start,
line,
end: start + 1,
},
})
}
Comment on lines +181 to +196
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If the only way this function can be called is when we're looking at a \n character, would it be enough to add something like debug_assert!(self.rest().starts_with("\n")) and return a Token<'input>?

Otherwise, we should actually check for the newline character and return None if it isn't found.

Either way, I think we should try to use the newline characters from the original stream instead of writing let value = "\n".

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think what you're getting at is "this code has a weird smell", and I think you're right. 😁 But I'm honestly not sure how to construct this better. If we don't explicitly set value = "\n", we'd have to chomp() or somehow otherwise pull the newline from the string, which is a computational expense that seems superfluous.

As for your suggestion

would it be enough to add something like debug_assert!(self.rest().starts_with("\n")) and return a Token<'input>

I don't think I understand. The debug_assert is something I could add; seems reasonable. But how would you return the Token<'input>?


fn finished(&self) -> bool { self.current_position >= self.src.len() }

Expand Down Expand Up @@ -219,6 +238,9 @@ impl<'input> Iterator for Lexer<'input> {
TokenType::Number => {
return Some(self.tokenize_number().expect(MSG))
},
TokenType::Newline => {
return Some(self.tokenize_newline().expect(MSG))
},
TokenType::Unknown => self.current_position += 1,
}
}
Expand Down Expand Up @@ -253,11 +275,22 @@ mod tests {

#[test]
fn skip_whitespace() {
let mut lexer = Lexer::new(" \n\r\t ");
let mut lexer = Lexer::new(" \r\t ");

lexer.skip_whitespace();

assert_eq!(lexer.current_position, lexer.src.len());
assert_eq!(lexer.current_line, 0);
}

#[test]
fn respect_newlines() {
let mut lexer = Lexer::new("\n\rM30garbage");

let token = lexer.tokenize_newline();
assert_eq!(token.expect("Failed.").kind, TokenType::Newline);

assert_eq!(lexer.current_position, 1);
assert_eq!(lexer.current_line, 1);
}

Expand Down Expand Up @@ -371,4 +404,44 @@ mod tests {

assert_eq!(got.value, "+3.14");
}

#[test]
fn two_multi() {
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would probably be more readable if we rewrite the test to be something like this:

let expected = vec![
  ("G", 0),
  ("0", 0),
  ("X", 0),
  ...
];

let actual: Vec<_> = Lexer::new("...")
  .map(|tok| (tok.value, tok.span.line))
  .collect();

assert_eq!(actual, expected);

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For long g-code example tests, that's probably the way to go. Maybe we could create some helper methods that make testing easier. We're really missing these long-form g-code verification tests, so I took this one from the pull request @dr0ps made.

How about we keep this test as is for now, and rework these kinds of tests in the future?

let mut lexer = Lexer::new("G0 X1\nG1 Y2");

let got = lexer.next().unwrap();
assert_eq!(got.value, "G");
assert_eq!(got.span.line, 0);

let got = lexer.next().unwrap();
assert_eq!(got.value, "0");
assert_eq!(got.span.line, 0);

let got = lexer.next().unwrap();
assert_eq!(got.value, "X");
assert_eq!(got.span.line, 0);

let got = lexer.next().unwrap();
assert_eq!(got.value, "1");
assert_eq!(got.span.line, 0);

let got = lexer.next().unwrap();
assert_eq!(got.value, "\n");

let got = lexer.next().unwrap();
assert_eq!(got.value, "G");
assert_eq!(got.span.line, 1);

let got = lexer.next().unwrap();
assert_eq!(got.value, "1");
assert_eq!(got.span.line, 1);

let got = lexer.next().unwrap();
assert_eq!(got.value, "Y");
assert_eq!(got.span.line, 1);

let got = lexer.next().unwrap();
assert_eq!(got.value, "2");
assert_eq!(got.span.line, 1);
}
}
2 changes: 1 addition & 1 deletion gcode/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@
unused_qualifications,
unused_results,
variant_size_differences,
intra_doc_link_resolution_failure,
rustdoc::broken_intra_doc_links,
missing_docs
)]
#![cfg_attr(not(feature = "std"), no_std)]
Expand Down
82 changes: 55 additions & 27 deletions gcode/src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ where
word.span,
B::Arguments::default(),
));

return;
}

Expand Down Expand Up @@ -219,11 +220,13 @@ where
// constructing
let mut temp_gcode = None;

while let Some(next_line) = self.next_line_number() {
if !line.is_empty() && next_line != line.span().line {
// we've started the next line
break;
}
if let None = self.atoms.peek() {
// There is nothing left in the file. :sad-face:
// This ends the parser's work.
return None;
}

while let Some(_next_line) = self.next_line_number() {

match self.atoms.next().expect("unreachable") {
Atom::Unknown(token) => {
Expand All @@ -234,6 +237,13 @@ where
self.on_comment_push_error(e.0);
}
},
Atom::Newline(_) => {
if !line.is_empty() {
// Newline ends the current command
// if there was something to parse.
break;
}
},
// line numbers are annoying, so handle them separately
Atom::Word(word) if word.letter.to_ascii_lowercase() == 'n' => {
self.handle_line_number(
Expand All @@ -255,11 +265,11 @@ where
}
}

if line.is_empty() {
None
} else {
Some(line)
}
// TODO: This should exit the parser under some conditions.
// IS M2 or M30: see 3.6.1.
// return None;

return Some(line);
}
}

Expand Down Expand Up @@ -404,36 +414,54 @@ mod tests {
assert_eq!(got[1].gcodes().len(), 1);
}

/// I wasn't sure if the `#[derive(Serialize)]` would work given we use
/// `B::Comments`, which would borrow from the original source.
#[test]
#[cfg(feature = "serde-1")]
fn you_can_actually_serialize_lines() {
let src = "G01 X5 G90 (comment) G91 M10\nG01\n";
let line = parse(src).next().unwrap();
fn funny_bug_in_crate_example() {
let src = "G90 \n G01 X50.0 Y-10";
let expected = vec![
GCode::new(Mnemonic::General, 90.0, Span::PLACEHOLDER),
GCode::new(Mnemonic::General, 1.0, Span::PLACEHOLDER)
.with_argument(Word::new('X', 50.0, Span::PLACEHOLDER))
.with_argument(Word::new('Y', -10.0, Span::PLACEHOLDER)),
];

let got: Vec<_> = crate::parse(src).collect();
assert_eq!(got, expected);
}

fn assert_serializable<S: serde::Serialize>(_: &S) {}
fn assert_deserializable<'de, D: serde::Deserialize<'de>>() {}
#[test]
#[ignore]
fn implicit_command_after_newline() {
let src = "G01 X1.0 Y2.0\n X3.0 Y4.0";
let expected = vec![
GCode::new(Mnemonic::General, 1.0, Span::PLACEHOLDER)
.with_argument(Word::new('X', 1.0, Span::PLACEHOLDER))
.with_argument(Word::new('Y', 2.0, Span::PLACEHOLDER)),
GCode::new(Mnemonic::General, 1.0, Span::PLACEHOLDER)
.with_argument(Word::new('X', 3.0, Span::PLACEHOLDER))
.with_argument(Word::new('Y', 4.0, Span::PLACEHOLDER)),
];

assert_serializable(&line);
assert_deserializable::<Line<'_>>();
let got: Vec<_> = crate::parse(src).collect();
assert_eq!(got, expected);
}

/// For some reason we were parsing the G90, then an empty G01 and the
/// actual G01.
#[test]
#[ignore]
fn funny_bug_in_crate_example() {
let src = "G90 \n G01 X50.0 Y-10";
// This test focuses on the G90 and M7 on the same line.
fn two_commands_in_a_row() {
let src = "G90 M7\nG01 X1.0 Y2.0\nX3.0 Y4.0";
let expected = vec![
GCode::new(Mnemonic::General, 90.0, Span::PLACEHOLDER),
GCode::new(Mnemonic::Miscellaneous, 7.0, Span::PLACEHOLDER),
GCode::new(Mnemonic::General, 1.0, Span::PLACEHOLDER)
.with_argument(Word::new('X', 50.0, Span::PLACEHOLDER))
.with_argument(Word::new('Y', -10.0, Span::PLACEHOLDER)),
.with_argument(Word::new('X', 1.0, Span::PLACEHOLDER))
.with_argument(Word::new('Y', 2.0, Span::PLACEHOLDER)),
GCode::new(Mnemonic::General, 1.0, Span::PLACEHOLDER)
.with_argument(Word::new('X', 3.0, Span::PLACEHOLDER))
.with_argument(Word::new('Y', 4.0, Span::PLACEHOLDER)),
];

let got: Vec<_> = crate::parse(src).collect();

assert_eq!(got, expected);
}
}
3 changes: 3 additions & 0 deletions gcode/src/words.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ impl Display for Word {
pub(crate) enum Atom<'input> {
Word(Word),
Comment(Comment<'input>),
Newline(Token<'input>),
/// Incomplete parts of a [`Word`].
BrokenWord(Token<'input>),
/// Garbage from the tokenizer (see [`TokenType::Unknown`]).
Expand All @@ -53,6 +54,7 @@ impl<'input> Atom<'input> {
match self {
Atom::Word(word) => word.span,
Atom::Comment(comment) => comment.span,
Atom::Newline(newline) => newline.span,
Atom::Unknown(token) | Atom::BrokenWord(token) => token.span,
}
}
Expand Down Expand Up @@ -90,6 +92,7 @@ where

match kind {
TokenType::Unknown => return Some(Atom::Unknown(token)),
TokenType::Newline => return Some(Atom::Newline(token)),
TokenType::Comment => {
return Some(Atom::Comment(Comment { value, span }))
},
Expand Down
29 changes: 1 addition & 28 deletions gcode/tests/smoke_test.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use gcode::{GCode, Mnemonic, Span, Word};
use gcode::{Mnemonic, Span, Word};

macro_rules! smoke_test {
($name:ident, $filename:expr) => {
Expand Down Expand Up @@ -26,25 +26,7 @@ smoke_test!(pi_rustlogo, "PI_rustlogo.gcode");
smoke_test!(insulpro_piping, "Insulpro.Piping.-.115mm.OD.-.40mm.WT.txt");

#[test]
#[ignore]
fn expected_program_2_output() {
// N10 T2 M3 S447 F80
// N20 G0 X112 Y-2
// ;N30 Z-5
// N40 G41
// N50 G1 X95 Y8 M8
// ;N60 X32
// ;N70 X5 Y15
// ;N80 Y52
// N90 G2 X15 Y62 I10 J0
// N100 G1 X83
// N110 G3 X95 Y50 I12 J0
// N120 G1 Y-12
// N130 G40
// N140 G0 Z100 M9
// ;N150 X150 Y150
// N160 M30

let src = include_str!("data/program_2.gcode");

let got: Vec<_> =
Expand All @@ -54,15 +36,6 @@ fn expected_program_2_output() {
assert_eq!(got.len(), 20);
// check lines without any comments
assert_eq!(got.iter().filter(|l| l.comments().is_empty()).count(), 11);

let gcodes: Vec<_> = got.iter().flat_map(|l| l.gcodes()).cloned().collect();
let expected = vec![
GCode::new(Mnemonic::ToolChange, 2.0, Span::PLACEHOLDER),
GCode::new(Mnemonic::Miscellaneous, 3.0, Span::PLACEHOLDER)
.with_argument(Word::new('S', 447.0, Span::PLACEHOLDER))
.with_argument(Word::new('F', 80.0, Span::PLACEHOLDER)),
];
pretty_assertions::assert_eq!(gcodes, expected);
}

struct PanicOnError;
Expand Down