Skip to content

Commit 2fe812c

Browse files
committed
Global variables are now loaded from _ENV.
In the Lua specification, the global variables are loaded from a global table called `_ENV`. Programs can overwrite this variable as well, and can read and write from/to it. There are a few changes: * The compiler: * assumes that `_ENV` is always stored in register 0 * replaces all global variable writing, and reading with `GetAttr`, and `SetAttr` instructions * The vm: * always creates a `LuaTable` and stores it in register 0, so that globals can be easily accessed. * can now set and get attributes of `LuaTable`s
1 parent 20b854e commit 2fe812c

File tree

12 files changed

+439
-119
lines changed

12 files changed

+439
-119
lines changed

luacompiler/src/lib/bytecode/instructions.rs

+13-11
Original file line numberDiff line numberDiff line change
@@ -46,17 +46,19 @@ impl HLInstr {
4646
/// refer to at most 256 constants.
4747
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
4848
pub enum Opcode {
49-
MOV = 0, // R(1) = R(2)
50-
LDI = 1, // R(1) = I(1); load integer from the constant table
51-
LDF = 2, // R(1) = F(1); load float from the constant table
52-
LDS = 3, // R(1) = S(1); load string from the constant table
53-
ADD = 4, // R(1) = R(2) + R(3)
54-
SUB = 5, // R(1) = R(2) - R(3)
55-
MUL = 6, // R(1) = R(2) * R(3)
56-
DIV = 7, // R(1) = R(2) / R(3)
57-
MOD = 8, // R(1) = R(2) % R(3)
58-
FDIV = 9, // R(1) = R(2) // R(3)
59-
EXP = 10, // R(1) = R(2) ^ R(3)
49+
MOV = 0, // R(1) = R(2)
50+
LDI = 1, // R(1) = I(1); load integer from the constant table
51+
LDF = 2, // R(1) = F(1); load float from the constant table
52+
LDS = 3, // R(1) = S(1); load string from the constant table
53+
ADD = 4, // R(1) = R(2) + R(3)
54+
SUB = 5, // R(1) = R(2) - R(3)
55+
MUL = 6, // R(1) = R(2) * R(3)
56+
DIV = 7, // R(1) = R(2) / R(3)
57+
MOD = 8, // R(1) = R(2) % R(3)
58+
FDIV = 9, // R(1) = R(2) // R(3)
59+
EXP = 10, // R(1) = R(2) ^ R(3)
60+
GetAttr = 11, // R(1) = R(2)[R(3)]
61+
SetAttr = 12, // R(1)[R(2)] = R(3)
6062
}
6163

6264
#[cfg(test)]

luacompiler/src/lib/bytecode/mod.rs

+5
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,11 @@ impl LuaBytecode {
7474
&self.strings[i as usize]
7575
}
7676

77+
/// Gets the size of the string constant table.
78+
pub fn get_strings_len(&self) -> usize {
79+
self.strings.len()
80+
}
81+
7782
/// Serialize the bytecode to a file using bincode.
7883
pub fn serialize_to_file(&self, file: &str) -> io::Result<()> {
7984
let mut f = File::create(file)?;

luacompiler/src/lib/irgen/mod.rs

+144-34
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ pub fn compile_to_ir(pt: &LuaParseTree) -> LuaIR {
1616
}
1717

1818
/// Represents a compiler which translates a given Lua parse tree to an SSA IR.
19+
/// The compiler assumes that the `_ENV` variable is always stored in register 0!
1920
struct LuaToIR<'a> {
2021
pt: &'a LuaParseTree,
2122
reg_map: RegisterMap<'a>,
@@ -43,22 +44,7 @@ impl<'a> LuaToIR<'a> {
4344
ridx: RIdx(ridx),
4445
ref nodes,
4546
} if ridx == lua5_3_y::R_STAT => {
46-
debug_assert!(nodes.len() == 3);
47-
match nodes[1] {
48-
Term { lexeme } if lexeme.tok_id() == lua5_3_l::T_EQ => {
49-
let value = self.compile_expr(&nodes[2]);
50-
let name = self.compile_variable(&nodes[0]);
51-
// because we are creating an IR which is in SSA form, it
52-
// means that each assignment creates a new register
53-
let reg = self.reg_map.get_new_reg();
54-
// if a variable is assigned a value multiple times, we have
55-
// to make sure that the map knows the new register which
56-
// holds the new value
57-
self.reg_map.set_reg(name, reg);
58-
self.instrs.push(HLInstr(Opcode::MOV, reg, value, 0));
59-
}
60-
_ => {}
61-
}
47+
self.compile_stat(nodes);
6248
}
6349
Nonterm { ridx: _, ref nodes } => {
6450
for i in (0..nodes.len()).rev() {
@@ -73,6 +59,32 @@ impl<'a> LuaToIR<'a> {
7359
LuaIR::new(self.instrs, self.const_map, self.reg_map.get_lifetimes())
7460
}
7561

62+
fn compile_stat(&mut self, nodes: &Vec<Node<u8>>) {
63+
debug_assert!(nodes.len() == 3);
64+
match nodes[1] {
65+
Term { lexeme } if lexeme.tok_id() == lua5_3_l::T_EQ => {
66+
// x = 3 => _ENV["x"] = 3
67+
// compile the expression on the right
68+
let value = self.compile_expr(&nodes[2]);
69+
// load a reference to _ENV
70+
let env_reg = self.reg_map.get_reg("_ENV").unwrap();
71+
// prepare the attribute for _ENV which is the name of the variable
72+
let name = self.compile_variable(&nodes[0]);
73+
let name_index = self.const_map.get_str(name.to_string());
74+
let attr_reg = self.reg_map.get_new_reg();
75+
self.instrs
76+
.push(HLInstr(Opcode::LDS, attr_reg, name_index, 0));
77+
// if a variable is assigned a value multiple times, we have
78+
// to make sure that the map knows the new register which
79+
// holds the new value
80+
self.reg_map.set_reg(name, value);
81+
self.instrs
82+
.push(HLInstr(Opcode::SetAttr, env_reg, attr_reg, value));
83+
}
84+
_ => {}
85+
}
86+
}
87+
7688
/// Jumps to the first child of <node> which denotes a variable name.
7789
fn compile_variable(&self, node: &Node<u8>) -> &'a str {
7890
let name = LuaToIR::find_term(node, lua5_3_l::T_NAME);
@@ -130,7 +142,24 @@ impl<'a> LuaToIR<'a> {
130142
self.instrs.push(HLInstr(Opcode::LDS, reg, short_str, 0));
131143
reg
132144
}
133-
_ => self.reg_map.get_reg(value),
145+
lua5_3_l::T_NAME => {
146+
// if the variable is in a register, then we can return reg number
147+
// otherwise we have to generate code for `_ENV[<name>]`
148+
self.reg_map.get_reg(value).unwrap_or_else(|| {
149+
let env_reg = self.reg_map.get_reg("_ENV").unwrap();
150+
let name_index = self.const_map.get_str(value.to_string());
151+
let attr_reg = self.reg_map.get_new_reg();
152+
self.instrs
153+
.push(HLInstr(Opcode::LDS, attr_reg, name_index, 0));
154+
let reg = self.reg_map.get_new_reg();
155+
self.instrs
156+
.push(HLInstr(Opcode::GetAttr, reg, env_reg, attr_reg));
157+
reg
158+
})
159+
}
160+
_ => panic!(
161+
"Cannot compile terminals that are not variable names, numbers or strings."
162+
),
134163
}
135164
}
136165
}
@@ -187,20 +216,21 @@ mod tests {
187216
let pt = LuaParseTree::from_str(String::from("x = 1 + 2 * 3 / 2 ^ 2.0 // 1 - 2"));
188217
let ir = compile_to_ir(&pt.unwrap());
189218
let expected_instrs = vec![
190-
HLInstr(Opcode::LDI, 0, 0, 0),
191-
HLInstr(Opcode::LDI, 1, 1, 0),
192-
HLInstr(Opcode::LDI, 2, 2, 0),
193-
HLInstr(Opcode::MUL, 3, 1, 2),
194-
HLInstr(Opcode::LDI, 4, 1, 0),
195-
HLInstr(Opcode::LDF, 5, 0, 0),
196-
HLInstr(Opcode::EXP, 6, 4, 5),
197-
HLInstr(Opcode::DIV, 7, 3, 6),
198-
HLInstr(Opcode::LDI, 8, 0, 0),
199-
HLInstr(Opcode::FDIV, 9, 7, 8),
200-
HLInstr(Opcode::ADD, 10, 0, 9),
201-
HLInstr(Opcode::LDI, 11, 1, 0),
202-
HLInstr(Opcode::SUB, 12, 10, 11),
203-
HLInstr(Opcode::MOV, 13, 12, 0),
219+
HLInstr(Opcode::LDI, 1, 0, 0),
220+
HLInstr(Opcode::LDI, 2, 1, 0),
221+
HLInstr(Opcode::LDI, 3, 2, 0),
222+
HLInstr(Opcode::MUL, 4, 2, 3),
223+
HLInstr(Opcode::LDI, 5, 1, 0),
224+
HLInstr(Opcode::LDF, 6, 0, 0),
225+
HLInstr(Opcode::EXP, 7, 5, 6),
226+
HLInstr(Opcode::DIV, 8, 4, 7),
227+
HLInstr(Opcode::LDI, 9, 0, 0),
228+
HLInstr(Opcode::FDIV, 10, 8, 9),
229+
HLInstr(Opcode::ADD, 11, 1, 10),
230+
HLInstr(Opcode::LDI, 12, 1, 0),
231+
HLInstr(Opcode::SUB, 13, 11, 12),
232+
HLInstr(Opcode::LDS, 14, 0, 0),
233+
HLInstr(Opcode::SetAttr, 0, 14, 13),
204234
];
205235
assert_eq!(ir.instrs.len(), expected_instrs.len());
206236
for (lhs, rhs) in ir.instrs.iter().zip(expected_instrs.iter()) {
@@ -213,11 +243,15 @@ mod tests {
213243
regs[i.1] = !regs[i.1];
214244
// if at any point this assertion fails, it means that a register has been
215245
// assigned a value multiple times
216-
assert!(regs[i.1]);
246+
// SetAttr only updates the state of a register, so it doesn't mess up the
247+
// correctness of the SSA
248+
if i.0 != Opcode::SetAttr {
249+
assert!(regs[i.1]);
250+
}
217251
}
218252
// check lifetimes
219253
let expected_lifetimes = vec![
220-
Lifetime::with_end_point(0, 1),
254+
Lifetime::with_end_point(0, 15),
221255
Lifetime::with_end_point(1, 2),
222256
Lifetime::with_end_point(2, 3),
223257
Lifetime::with_end_point(3, 4),
@@ -231,6 +265,7 @@ mod tests {
231265
Lifetime::with_end_point(11, 12),
232266
Lifetime::with_end_point(12, 13),
233267
Lifetime::with_end_point(13, 14),
268+
Lifetime::with_end_point(14, 15),
234269
];
235270
assert_eq!(ir.lifetimes.len(), expected_lifetimes.len());
236271
for (lhs, rhs) in ir.lifetimes.iter().zip(expected_lifetimes.iter()) {
@@ -249,6 +284,81 @@ mod tests {
249284
for (lhs, rhs) in floats.iter().zip(expected_floats.iter()) {
250285
assert_eq!(lhs, rhs);
251286
}
252-
assert_eq!(ir.const_map.get_strings().len(), 0);
287+
let expected_strings = vec!["x"];
288+
let strings = ir.const_map.get_strings();
289+
assert_eq!(strings.len(), expected_strings.len());
290+
for (lhs, rhs) in strings.iter().zip(expected_strings.iter()) {
291+
assert_eq!(lhs, rhs);
292+
}
293+
}
294+
295+
#[test]
296+
fn correctness_of_ssa_ir2() {
297+
let pt = LuaParseTree::from_str(String::from("x = 1\ny = x"));
298+
let ir = compile_to_ir(&pt.unwrap());
299+
let expected_instrs = vec![
300+
HLInstr(Opcode::LDI, 1, 0, 0),
301+
HLInstr(Opcode::LDS, 2, 0, 0),
302+
HLInstr(Opcode::SetAttr, 0, 2, 1),
303+
HLInstr(Opcode::LDS, 3, 1, 0),
304+
HLInstr(Opcode::SetAttr, 0, 3, 1),
305+
];
306+
assert_eq!(ir.instrs.len(), expected_instrs.len());
307+
for (lhs, rhs) in ir.instrs.iter().zip(expected_instrs.iter()) {
308+
assert_eq!(lhs, rhs);
309+
}
310+
// check that the IR is in SSA form
311+
let mut regs = Vec::with_capacity(ir.instrs.len());
312+
regs.resize(ir.instrs.len(), false);
313+
for i in &ir.instrs {
314+
regs[i.1] = !regs[i.1];
315+
// if at any point this assertion fails, it means that a register has been
316+
// assigned a value multiple times
317+
if i.0 != Opcode::SetAttr {
318+
assert!(regs[i.1]);
319+
}
320+
}
321+
// check lifetimes
322+
let expected_lifetimes = vec![
323+
Lifetime::with_end_point(0, 4),
324+
Lifetime::with_end_point(1, 4),
325+
Lifetime::with_end_point(2, 3),
326+
Lifetime::with_end_point(3, 4),
327+
];
328+
assert_eq!(ir.lifetimes.len(), expected_lifetimes.len());
329+
for (lhs, rhs) in ir.lifetimes.iter().zip(expected_lifetimes.iter()) {
330+
assert_eq!(lhs, rhs);
331+
}
332+
// check constats map
333+
let expected_ints = vec![1];
334+
let ints = ir.const_map.get_ints();
335+
assert_eq!(ints.len(), expected_ints.len());
336+
for (lhs, rhs) in ints.iter().zip(expected_ints.iter()) {
337+
assert_eq!(lhs, rhs);
338+
}
339+
assert!(ir.const_map.get_floats().is_empty());
340+
let expected_strings = vec!["x", "y"];
341+
let strings = ir.const_map.get_strings();
342+
assert_eq!(strings.len(), expected_strings.len());
343+
for (lhs, rhs) in strings.iter().zip(expected_strings.iter()) {
344+
assert_eq!(lhs, rhs);
345+
}
253346
}
347+
348+
#[test]
349+
fn generates_get_attr_instr() {
350+
let pt = LuaParseTree::from_str(String::from("x = y"));
351+
let ir = compile_to_ir(&pt.unwrap());
352+
let expected_instrs = vec![
353+
HLInstr(Opcode::LDS, 1, 0, 0),
354+
HLInstr(Opcode::GetAttr, 2, 0, 1),
355+
HLInstr(Opcode::LDS, 3, 1, 0),
356+
HLInstr(Opcode::SetAttr, 0, 3, 2),
357+
];
358+
assert_eq!(ir.instrs.len(), expected_instrs.len());
359+
for (lhs, rhs) in ir.instrs.iter().zip(expected_instrs.iter()) {
360+
assert_eq!(lhs, rhs);
361+
}
362+
}
363+
254364
}

0 commit comments

Comments
 (0)