Skip to content

Commit da37674

Browse files
Copilottudny
andcommitted
Implement basic matrix syntax parsing for shell
Co-authored-by: tudny <[email protected]>
1 parent f05754f commit da37674

File tree

2 files changed

+288
-4
lines changed

2 files changed

+288
-4
lines changed

GUIDE.md

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,22 @@ Scalars can be both added in *shell* or via *GUI editor*.
2323
Matrices are 2D arrays of Scalars. More precisely matrix $A \in \hat{Q}^{N\times M}$
2424
is a matrix over field $\hat{Q}$ with width $N$ and height $M$.
2525

26-
Currently, the only way to create a matrix is to use *GUI editor*.
26+
Matrices can be created in shell using the bracket syntax or via *GUI editor*.
27+
28+
### Matrix Syntax in Shell
29+
30+
Matrices can be created using MATLAB-like syntax with square brackets:
31+
32+
```matlab
33+
% Matrix with two rows. Each row has two elements. First row [1, 4], second [3, 2]
34+
[1 4; 3 2]
35+
% Matrix with two rows. Each row has three elements. First row [-1/2, 5/4, 5/2], second [1/2, 13/17, -9/2]
36+
[-1/2 5/4 5/2; 1/2 13/17 -9/2]
37+
% Matrix with one row. This row has three elements. It contains of [a, -3/2, x]
38+
[a -3/2 x]
39+
```
40+
41+
**Note:** This syntax only allows scalars and variables as matrix elements. Arithmetic expressions are not supported to avoid ambiguity. For example, `[3/4 -2/1]` could be interpreted as either `[3/4, -2/1]` or `[-5/4]`.
2742

2843
## Warning
2944

@@ -105,7 +120,10 @@ These are the rules expressed in BNF:
105120
<identifier> ::= (<letter> | "_") (<letter> | <digit> | "_")* | "$"
106121
<unary_op> ::= "+" | "-"
107122
<binary_op> ::= "+" | "-" | "*" | "/"
108-
<expr> ::= <integer> | <identifier> | <expr> <binary_op> <expr> | "(" <expr> ")" | <unary_op> <expr>
123+
<matrix_elem>::= <integer> | <identifier> | <unary_op> <matrix_elem>
124+
<matrix_row> ::= <matrix_elem> (" " <matrix_elem>)*
125+
<matrix> ::= "[" <matrix_row> (";" <matrix_row>)* "]"
126+
<expr> ::= <integer> | <identifier> | <matrix> | <expr> <binary_op> <expr> | "(" <expr> ")" | <unary_op> <expr>
109127
```
110128

111129
### Examples

src/parser.rs

Lines changed: 268 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use anyhow::{bail, Context};
55
use num_traits::checked_pow;
66

77
use crate::environment::{Environment, Identifier, Type};
8+
use crate::matrices::Matrix;
89
use crate::traits::MatrixNumber;
910

1011
#[derive(Debug, Clone, PartialEq, Eq)]
@@ -14,6 +15,9 @@ enum Token {
1415
Operator(char),
1516
LeftBracket,
1617
RightBracket,
18+
LeftMatrixBracket,
19+
RightMatrixBracket,
20+
Semicolon,
1721
}
1822

1923
impl Display for Token {
@@ -24,6 +28,9 @@ impl Display for Token {
2428
Token::Operator(op) => write!(f, "operator \"{op}\""),
2529
Token::LeftBracket => write!(f, "( bracket"),
2630
Token::RightBracket => write!(f, ") bracket"),
31+
Token::LeftMatrixBracket => write!(f, "[ bracket"),
32+
Token::RightMatrixBracket => write!(f, "] bracket"),
33+
Token::Semicolon => write!(f, "; semicolon"),
2734
}
2835
}
2936
}
@@ -47,6 +54,15 @@ impl<'a> Tokenizer<'a> {
4754
} else if self.raw.starts_with(')') {
4855
self.raw = &self.raw[1..];
4956
Ok(Some(Token::RightBracket))
57+
} else if self.raw.starts_with('[') {
58+
self.raw = &self.raw[1..];
59+
Ok(Some(Token::LeftMatrixBracket))
60+
} else if self.raw.starts_with(']') {
61+
self.raw = &self.raw[1..];
62+
Ok(Some(Token::RightMatrixBracket))
63+
} else if self.raw.starts_with(';') {
64+
self.raw = &self.raw[1..];
65+
Ok(Some(Token::Semicolon))
5066
} else if self.raw.starts_with(|c| "+-*/^=".contains(c)) {
5167
let op = self.raw.chars().next().unwrap();
5268
self.raw = &self.raw[1..];
@@ -82,6 +98,9 @@ enum WorkingToken<T: MatrixNumber> {
8298
BinaryOp(char),
8399
LeftBracket,
84100
RightBracket,
101+
LeftMatrixBracket,
102+
RightMatrixBracket,
103+
Semicolon,
85104
}
86105

87106
impl<T: MatrixNumber> Display for WorkingToken<T> {
@@ -93,6 +112,9 @@ impl<T: MatrixNumber> Display for WorkingToken<T> {
93112
WorkingToken::BinaryOp(op) => write!(f, "binary operator \"{op}\""),
94113
WorkingToken::LeftBracket => write!(f, "( bracket"),
95114
WorkingToken::RightBracket => write!(f, ") bracket"),
115+
WorkingToken::LeftMatrixBracket => write!(f, "[ bracket"),
116+
WorkingToken::RightMatrixBracket => write!(f, "] bracket"),
117+
WorkingToken::Semicolon => write!(f, "; semicolon"),
96118
}
97119
}
98120
}
@@ -161,20 +183,148 @@ fn unary_op<T: MatrixNumber>(arg: Type<T>, op: char) -> anyhow::Result<Type<T>>
161183
}
162184
}
163185

186+
fn parse_matrix_element<T: MatrixNumber>(
187+
raw: &str,
188+
env: &Environment<T>,
189+
) -> anyhow::Result<T> {
190+
let raw = raw.trim();
191+
if raw.is_empty() {
192+
bail!("Empty matrix element");
193+
}
194+
195+
// Handle unary operators
196+
if let Some(rest) = raw.strip_prefix('+') {
197+
return parse_matrix_element(rest.trim(), env);
198+
}
199+
200+
if let Some(rest) = raw.strip_prefix('-') {
201+
let val = parse_matrix_element(rest.trim(), env)?;
202+
return T::zero().checked_sub(&val)
203+
.context("Arithmetic operation resulted in overflow!");
204+
}
205+
206+
// Try to parse as a number (including rational numbers if T supports it)
207+
if let Ok(val) = T::from_str(raw) {
208+
return Ok(val);
209+
}
210+
211+
// Parse identifier
212+
if raw == Identifier::RESULT {
213+
let id = Identifier::result();
214+
if let Some(value) = env.get_value(&id) {
215+
return match value {
216+
Type::Scalar(s) => Ok(s.clone()),
217+
Type::Matrix(_) => bail!("Matrix elements cannot be matrices"),
218+
};
219+
} else {
220+
bail!("Undefined identifier: {}", raw);
221+
}
222+
} else if let Ok(id) = Identifier::new(raw.to_string()) {
223+
if let Some(value) = env.get_value(&id) {
224+
return match value {
225+
Type::Scalar(s) => Ok(s.clone()),
226+
Type::Matrix(_) => bail!("Matrix elements cannot be matrices"),
227+
};
228+
} else {
229+
bail!("Undefined identifier: {}", raw);
230+
}
231+
}
232+
233+
bail!("Invalid matrix element: {}", raw);
234+
}
235+
236+
fn parse_matrix<T: MatrixNumber>(
237+
raw: &str,
238+
env: &Environment<T>,
239+
) -> anyhow::Result<Matrix<T>> {
240+
let raw = raw.trim();
241+
if !raw.starts_with('[') || !raw.ends_with(']') {
242+
bail!("Matrix must be enclosed in square brackets");
243+
}
244+
245+
let content = &raw[1..raw.len()-1].trim();
246+
if content.is_empty() {
247+
bail!("Empty matrix not allowed");
248+
}
249+
250+
let rows: Vec<&str> = content.split(';').collect();
251+
let mut matrix_data: Vec<Vec<T>> = Vec::new();
252+
253+
for (row_idx, row_str) in rows.iter().enumerate() {
254+
let row_str = row_str.trim();
255+
if row_str.is_empty() {
256+
bail!("Empty row at position {}", row_idx);
257+
}
258+
259+
let elements: Vec<&str> = row_str.split_whitespace().collect();
260+
if elements.is_empty() {
261+
bail!("Row {} has no elements", row_idx);
262+
}
263+
264+
let mut row_data: Vec<T> = Vec::new();
265+
for element_str in elements {
266+
let element = parse_matrix_element(element_str, env)?;
267+
row_data.push(element);
268+
}
269+
270+
// Check that all rows have the same number of columns
271+
if !matrix_data.is_empty() && matrix_data[0].len() != row_data.len() {
272+
bail!("All rows must have the same number of elements. Row 0 has {} elements, row {} has {} elements",
273+
matrix_data[0].len(), row_idx, row_data.len());
274+
}
275+
276+
matrix_data.push(row_data);
277+
}
278+
279+
Matrix::new(matrix_data)
280+
}
281+
164282
/*
165283
<digit> ::= "0" | "1" | ... | "9"
166284
<integer> ::= <digit>+
167285
<letter> ::= "a" | "ą" | "b" | ... | "ż"
168286
<identifier> ::= (<letter> | "_") (<letter> | <digit> | "_")* | "$"
169287
<unary_op> ::= "+" | "-"
170288
<binary_op> ::= "+" | "-" | "*" | "/"
171-
<expr> ::= <integer> | <identifier> | <expr> <binary_op> <expr>
289+
<matrix_elem>::= <integer> | <identifier> | <unary_op> <matrix_elem>
290+
<matrix_row> ::= <matrix_elem> (" " <matrix_elem>)*
291+
<matrix> ::= "[" <matrix_row> (";" <matrix_row>)* "]"
292+
<expr> ::= <integer> | <identifier> | <matrix> | <expr> <binary_op> <expr>
172293
| "(" <expr> ")" | <unary_op> <expr> | <identifier> "(" <expr> ")"
173294
*/
174295
pub fn parse_expression<T: MatrixNumber>(
175296
raw: &str,
176297
env: &Environment<T>,
177298
) -> anyhow::Result<Type<T>> {
299+
let raw = raw.trim();
300+
301+
// Check if the entire expression is just a matrix (starts with '[' and ends with ']' with balanced brackets)
302+
if raw.starts_with('[') {
303+
let mut bracket_count = 0;
304+
let mut found_end = false;
305+
for (i, ch) in raw.char_indices() {
306+
match ch {
307+
'[' => bracket_count += 1,
308+
']' => {
309+
bracket_count -= 1;
310+
if bracket_count == 0 {
311+
// If we've closed all brackets and we're at the end, it's a pure matrix
312+
if i == raw.len() - 1 {
313+
found_end = true;
314+
}
315+
break;
316+
}
317+
}
318+
_ => {}
319+
}
320+
}
321+
322+
if found_end && bracket_count == 0 {
323+
let matrix = parse_matrix(raw, env)?;
324+
return Ok(Type::Matrix(matrix));
325+
}
326+
}
327+
178328
let mut tokenizer = Tokenizer::new(raw);
179329
let mut operators: VecDeque<WorkingToken<T>> = VecDeque::new();
180330
let mut outputs: VecDeque<WorkingToken<T>> = VecDeque::new();
@@ -194,7 +344,7 @@ pub fn parse_expression<T: MatrixNumber>(
194344
current: &Token,
195345
) -> bool {
196346
match current {
197-
Token::Integer(_) | Token::Identifier(_) | Token::LeftBracket => matches!(
347+
Token::Integer(_) | Token::Identifier(_) | Token::LeftBracket | Token::LeftMatrixBracket => matches!(
198348
previous,
199349
None | Some(WorkingToken::LeftBracket)
200350
| Some(WorkingToken::BinaryOp(_))
@@ -207,11 +357,20 @@ pub fn parse_expression<T: MatrixNumber>(
207357
| Some(WorkingToken::Type(_))
208358
| Some(WorkingToken::BinaryOp(_))
209359
| Some(WorkingToken::LeftBracket)
360+
| Some(WorkingToken::RightMatrixBracket)
210361
),
211362
Token::RightBracket => matches!(
212363
previous,
213364
Some(WorkingToken::RightBracket) | Some(WorkingToken::Type(_))
214365
),
366+
Token::RightMatrixBracket => matches!(
367+
previous,
368+
Some(WorkingToken::Type(_)) | Some(WorkingToken::RightMatrixBracket)
369+
),
370+
Token::Semicolon => matches!(
371+
previous,
372+
Some(WorkingToken::Type(_)) | Some(WorkingToken::RightMatrixBracket)
373+
),
215374
}
216375
}
217376

@@ -308,6 +467,9 @@ pub fn parse_expression<T: MatrixNumber>(
308467
operators.front()
309468
}
310469
Token::Operator(_) => bail!("Assignment is not allowed in expressions!"),
470+
Token::LeftMatrixBracket => bail!("Matrix brackets should not appear in expressions! Use matrix syntax: [1 2; 3 4]"),
471+
Token::RightMatrixBracket => bail!("Matrix brackets should not appear in expressions! Use matrix syntax: [1 2; 3 4]"),
472+
Token::Semicolon => bail!("Semicolons should only appear in matrix syntax: [1 2; 3 4]"),
311473
};
312474
}
313475

@@ -686,4 +848,108 @@ mod tests {
686848
Type::Matrix(im![2, 4, 6; 8, 10, 12])
687849
);
688850
}
851+
852+
#[test]
853+
fn test_matrix_syntax_simple() {
854+
let env = Environment::<i64>::new();
855+
856+
// Test simple 2x2 matrix
857+
assert_eq!(
858+
parse_expression("[1 4; 3 2]", &env).unwrap(),
859+
Type::Matrix(im![1, 4; 3, 2])
860+
);
861+
862+
// Test 1x3 matrix (row vector)
863+
assert_eq!(
864+
parse_expression("[1 2 3]", &env).unwrap(),
865+
Type::Matrix(im![1, 2, 3])
866+
);
867+
868+
// Test 3x1 matrix (column vector)
869+
assert_eq!(
870+
parse_expression("[1; 2; 3]", &env).unwrap(),
871+
Type::Matrix(im![1; 2; 3])
872+
);
873+
}
874+
875+
#[test]
876+
fn test_matrix_syntax_rational() {
877+
let env = Environment::<Rational64>::new();
878+
879+
// Test with rational numbers - create expected matrix manually
880+
let expected_matrix = Matrix::new_unsafe(vec![
881+
vec![Rational64::new(-1, 2), Rational64::new(5, 4)],
882+
vec![Rational64::new(1, 2), Rational64::new(-9, 2)]
883+
]);
884+
885+
assert_eq!(
886+
parse_expression("[-1/2 5/4; 1/2 -9/2]", &env).unwrap(),
887+
Type::Matrix(expected_matrix)
888+
);
889+
}
890+
891+
#[test]
892+
fn test_matrix_syntax_with_variables() {
893+
let mut env = Environment::<Rational64>::new();
894+
env.insert(Identifier::new("a".to_string()).unwrap(), Type::Scalar(Rational64::new(2, 1)));
895+
env.insert(Identifier::new("x".to_string()).unwrap(), Type::Scalar(Rational64::new(-3, 1)));
896+
897+
// Test with variables - create expected matrices manually
898+
let expected_matrix1 = Matrix::new_unsafe(vec![
899+
vec![Rational64::new(2, 1), Rational64::new(-3, 2), Rational64::new(-3, 1)]
900+
]);
901+
902+
assert_eq!(
903+
parse_expression("[a -3/2 x]", &env).unwrap(),
904+
Type::Matrix(expected_matrix1)
905+
);
906+
907+
// Test with unary operators
908+
let expected_matrix2 = Matrix::new_unsafe(vec![
909+
vec![Rational64::new(2, 1), Rational64::new(3, 1)],
910+
vec![Rational64::new(-2, 1), Rational64::new(-3, 1)]
911+
]);
912+
913+
assert_eq!(
914+
parse_expression("[+a -x; -a +x]", &env).unwrap(),
915+
Type::Matrix(expected_matrix2)
916+
);
917+
}
918+
919+
#[test]
920+
fn test_matrix_syntax_errors() {
921+
let env = Environment::<i64>::new();
922+
923+
// Test empty matrix
924+
assert!(parse_expression("[]", &env).is_err());
925+
926+
// Test mismatched row sizes
927+
assert!(parse_expression("[1 2; 3 4 5]", &env).is_err());
928+
929+
// Test empty row
930+
assert!(parse_expression("[1 2; ; 3 4]", &env).is_err());
931+
932+
// Test undefined variable
933+
assert!(parse_expression("[a b]", &env).is_err());
934+
}
935+
936+
#[test]
937+
fn test_matrix_in_expressions() {
938+
let mut env = Environment::<i64>::new();
939+
940+
// For now, test that individual matrices work
941+
env.insert(Identifier::new("A".to_string()).unwrap(), Type::Matrix(im![1, 2; 3, 4]));
942+
env.insert(Identifier::new("B".to_string()).unwrap(), Type::Matrix(im![5, 6; 7, 8]));
943+
944+
// Test matrix variables in expressions
945+
assert_eq!(
946+
parse_expression("A + B", &env).unwrap(),
947+
Type::Matrix(im![6, 8; 10, 12])
948+
);
949+
950+
assert_eq!(
951+
parse_expression("A * B", &env).unwrap(),
952+
Type::Matrix(im![19, 22; 43, 50])
953+
);
954+
}
689955
}

0 commit comments

Comments
 (0)