From ff596154e4546a7dbdfc24b7bfaad3e5f03575a6 Mon Sep 17 00:00:00 2001 From: Robert Bartlensky Date: Sun, 2 Dec 2018 20:14:06 +0000 Subject: [PATCH 1/2] Global variables are now loaded from _ENV. In the Lua specification, the global variables are loaded from a global table called `_ENV`. Programs can overwrite this variable as well, and can read and write from/to it. There are a few changes: * The compiler: * assumes that `_ENV` is always stored in register 0 * replaces all global variable writing, and reading with `GetAttr`, and `SetAttr` instructions * The vm: * always creates a `LuaTable` and stores it in register 0, so that globals can be easily accessed. * can now set and get attributes of `LuaTable`s --- luacompiler/src/lib/bytecode/instructions.rs | 24 +-- luacompiler/src/lib/bytecode/mod.rs | 5 + luacompiler/src/lib/irgen/mod.rs | 178 +++++++++++++++---- luacompiler/src/lib/irgen/register_map.rs | 91 +++++----- luacompiler/tests/integration_test.rs | 64 +++++-- luavm/src/lib/instructions/loads.rs | 7 +- luavm/src/lib/instructions/mod.rs | 1 + luavm/src/lib/instructions/tables.rs | 93 ++++++++++ luavm/src/lib/lua_values/lua_obj.rs | 16 +- luavm/src/lib/lua_values/lua_table.rs | 2 +- luavm/src/lib/lua_values/mod.rs | 29 ++- luavm/src/lib/mod.rs | 60 ++++++- 12 files changed, 451 insertions(+), 119 deletions(-) create mode 100644 luavm/src/lib/instructions/tables.rs diff --git a/luacompiler/src/lib/bytecode/instructions.rs b/luacompiler/src/lib/bytecode/instructions.rs index acb065c..da888bb 100644 --- a/luacompiler/src/lib/bytecode/instructions.rs +++ b/luacompiler/src/lib/bytecode/instructions.rs @@ -46,17 +46,19 @@ impl HLInstr { /// refer to at most 256 constants. #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum Opcode { - MOV = 0, // R(1) = R(2) - LDI = 1, // R(1) = I(1); load integer from the constant table - LDF = 2, // R(1) = F(1); load float from the constant table - LDS = 3, // R(1) = S(1); load string from the constant table - ADD = 4, // R(1) = R(2) + R(3) - SUB = 5, // R(1) = R(2) - R(3) - MUL = 6, // R(1) = R(2) * R(3) - DIV = 7, // R(1) = R(2) / R(3) - MOD = 8, // R(1) = R(2) % R(3) - FDIV = 9, // R(1) = R(2) // R(3) - EXP = 10, // R(1) = R(2) ^ R(3) + MOV = 0, // R(1) = R(2) + LDI = 1, // R(1) = I(1); load integer from the constant table + LDF = 2, // R(1) = F(1); load float from the constant table + LDS = 3, // R(1) = S(1); load string from the constant table + ADD = 4, // R(1) = R(2) + R(3) + SUB = 5, // R(1) = R(2) - R(3) + MUL = 6, // R(1) = R(2) * R(3) + DIV = 7, // R(1) = R(2) / R(3) + MOD = 8, // R(1) = R(2) % R(3) + FDIV = 9, // R(1) = R(2) // R(3) + EXP = 10, // R(1) = R(2) ^ R(3) + GetAttr = 11, // R(1) = R(2)[R(3)] + SetAttr = 12, // R(1)[R(2)] = R(3) } #[cfg(test)] diff --git a/luacompiler/src/lib/bytecode/mod.rs b/luacompiler/src/lib/bytecode/mod.rs index a07d231..8e359af 100644 --- a/luacompiler/src/lib/bytecode/mod.rs +++ b/luacompiler/src/lib/bytecode/mod.rs @@ -74,6 +74,11 @@ impl LuaBytecode { &self.strings[i as usize] } + /// Gets the size of the string constant table. + pub fn get_strings_len(&self) -> usize { + self.strings.len() + } + /// Serialize the bytecode to a file using bincode. pub fn serialize_to_file(&self, file: &str) -> io::Result<()> { let mut f = File::create(file)?; diff --git a/luacompiler/src/lib/irgen/mod.rs b/luacompiler/src/lib/irgen/mod.rs index fe1b2af..5411758 100644 --- a/luacompiler/src/lib/irgen/mod.rs +++ b/luacompiler/src/lib/irgen/mod.rs @@ -16,6 +16,7 @@ pub fn compile_to_ir(pt: &LuaParseTree) -> LuaIR { } /// Represents a compiler which translates a given Lua parse tree to an SSA IR. +/// The compiler assumes that the `_ENV` variable is always stored in register 0! struct LuaToIR<'a> { pt: &'a LuaParseTree, reg_map: RegisterMap<'a>, @@ -43,22 +44,7 @@ impl<'a> LuaToIR<'a> { ridx: RIdx(ridx), ref nodes, } if ridx == lua5_3_y::R_STAT => { - debug_assert!(nodes.len() == 3); - match nodes[1] { - Term { lexeme } if lexeme.tok_id() == lua5_3_l::T_EQ => { - let value = self.compile_expr(&nodes[2]); - let name = self.compile_variable(&nodes[0]); - // because we are creating an IR which is in SSA form, it - // means that each assignment creates a new register - let reg = self.reg_map.get_new_reg(); - // if a variable is assigned a value multiple times, we have - // to make sure that the map knows the new register which - // holds the new value - self.reg_map.set_reg(name, reg); - self.instrs.push(HLInstr(Opcode::MOV, reg, value, 0)); - } - _ => {} - } + self.compile_stat(nodes); } Nonterm { ridx: _, ref nodes } => { for i in (0..nodes.len()).rev() { @@ -73,6 +59,32 @@ impl<'a> LuaToIR<'a> { LuaIR::new(self.instrs, self.const_map, self.reg_map.get_lifetimes()) } + fn compile_stat(&mut self, nodes: &Vec>) { + debug_assert!(nodes.len() == 3); + match nodes[1] { + Term { lexeme } if lexeme.tok_id() == lua5_3_l::T_EQ => { + // x = 3 => _ENV["x"] = 3 + // compile the expression on the right + let value = self.compile_expr(&nodes[2]); + // load a reference to _ENV + let env_reg = self.reg_map.get_reg("_ENV").unwrap(); + // prepare the attribute for _ENV which is the name of the variable + let name = self.compile_variable(&nodes[0]); + let name_index = self.const_map.get_str(name.to_string()); + let attr_reg = self.reg_map.get_new_reg(); + self.instrs + .push(HLInstr(Opcode::LDS, attr_reg, name_index, 0)); + // if a variable is assigned a value multiple times, we have + // to make sure that the map knows the new register which + // holds the new value + self.reg_map.set_reg(name, value); + self.instrs + .push(HLInstr(Opcode::SetAttr, env_reg, attr_reg, value)); + } + _ => {} + } + } + /// Jumps to the first child of which denotes a variable name. fn compile_variable(&self, node: &Node) -> &'a str { let name = LuaToIR::find_term(node, lua5_3_l::T_NAME); @@ -130,7 +142,24 @@ impl<'a> LuaToIR<'a> { self.instrs.push(HLInstr(Opcode::LDS, reg, short_str, 0)); reg } - _ => self.reg_map.get_reg(value), + lua5_3_l::T_NAME => { + // if the variable is in a register, then we can return reg number + // otherwise we have to generate code for `_ENV[]` + self.reg_map.get_reg(value).unwrap_or_else(|| { + let env_reg = self.reg_map.get_reg("_ENV").unwrap(); + let name_index = self.const_map.get_str(value.to_string()); + let attr_reg = self.reg_map.get_new_reg(); + self.instrs + .push(HLInstr(Opcode::LDS, attr_reg, name_index, 0)); + let reg = self.reg_map.get_new_reg(); + self.instrs + .push(HLInstr(Opcode::GetAttr, reg, env_reg, attr_reg)); + reg + }) + } + _ => panic!( + "Cannot compile terminals that are not variable names, numbers or strings." + ), } } } @@ -187,20 +216,21 @@ mod tests { let pt = LuaParseTree::from_str(String::from("x = 1 + 2 * 3 / 2 ^ 2.0 // 1 - 2")); let ir = compile_to_ir(&pt.unwrap()); let expected_instrs = vec![ - HLInstr(Opcode::LDI, 0, 0, 0), - HLInstr(Opcode::LDI, 1, 1, 0), - HLInstr(Opcode::LDI, 2, 2, 0), - HLInstr(Opcode::MUL, 3, 1, 2), - HLInstr(Opcode::LDI, 4, 1, 0), - HLInstr(Opcode::LDF, 5, 0, 0), - HLInstr(Opcode::EXP, 6, 4, 5), - HLInstr(Opcode::DIV, 7, 3, 6), - HLInstr(Opcode::LDI, 8, 0, 0), - HLInstr(Opcode::FDIV, 9, 7, 8), - HLInstr(Opcode::ADD, 10, 0, 9), - HLInstr(Opcode::LDI, 11, 1, 0), - HLInstr(Opcode::SUB, 12, 10, 11), - HLInstr(Opcode::MOV, 13, 12, 0), + HLInstr(Opcode::LDI, 1, 0, 0), + HLInstr(Opcode::LDI, 2, 1, 0), + HLInstr(Opcode::LDI, 3, 2, 0), + HLInstr(Opcode::MUL, 4, 2, 3), + HLInstr(Opcode::LDI, 5, 1, 0), + HLInstr(Opcode::LDF, 6, 0, 0), + HLInstr(Opcode::EXP, 7, 5, 6), + HLInstr(Opcode::DIV, 8, 4, 7), + HLInstr(Opcode::LDI, 9, 0, 0), + HLInstr(Opcode::FDIV, 10, 8, 9), + HLInstr(Opcode::ADD, 11, 1, 10), + HLInstr(Opcode::LDI, 12, 1, 0), + HLInstr(Opcode::SUB, 13, 11, 12), + HLInstr(Opcode::LDS, 14, 0, 0), + HLInstr(Opcode::SetAttr, 0, 14, 13), ]; assert_eq!(ir.instrs.len(), expected_instrs.len()); for (lhs, rhs) in ir.instrs.iter().zip(expected_instrs.iter()) { @@ -213,11 +243,15 @@ mod tests { regs[i.1] = !regs[i.1]; // if at any point this assertion fails, it means that a register has been // assigned a value multiple times - assert!(regs[i.1]); + // SetAttr only updates the state of a register, so it doesn't mess up the + // correctness of the SSA + if i.0 != Opcode::SetAttr { + assert!(regs[i.1]); + } } // check lifetimes let expected_lifetimes = vec![ - Lifetime::with_end_point(0, 1), + Lifetime::with_end_point(0, 15), Lifetime::with_end_point(1, 2), Lifetime::with_end_point(2, 3), Lifetime::with_end_point(3, 4), @@ -231,6 +265,7 @@ mod tests { Lifetime::with_end_point(11, 12), Lifetime::with_end_point(12, 13), Lifetime::with_end_point(13, 14), + Lifetime::with_end_point(14, 15), ]; assert_eq!(ir.lifetimes.len(), expected_lifetimes.len()); for (lhs, rhs) in ir.lifetimes.iter().zip(expected_lifetimes.iter()) { @@ -249,6 +284,81 @@ mod tests { for (lhs, rhs) in floats.iter().zip(expected_floats.iter()) { assert_eq!(lhs, rhs); } - assert_eq!(ir.const_map.get_strings().len(), 0); + let expected_strings = vec!["x"]; + let strings = ir.const_map.get_strings(); + assert_eq!(strings.len(), expected_strings.len()); + for (lhs, rhs) in strings.iter().zip(expected_strings.iter()) { + assert_eq!(lhs, rhs); + } + } + + #[test] + fn correctness_of_ssa_ir2() { + let pt = LuaParseTree::from_str(String::from("x = 1\ny = x")); + let ir = compile_to_ir(&pt.unwrap()); + let expected_instrs = vec![ + HLInstr(Opcode::LDI, 1, 0, 0), + HLInstr(Opcode::LDS, 2, 0, 0), + HLInstr(Opcode::SetAttr, 0, 2, 1), + HLInstr(Opcode::LDS, 3, 1, 0), + HLInstr(Opcode::SetAttr, 0, 3, 1), + ]; + assert_eq!(ir.instrs.len(), expected_instrs.len()); + for (lhs, rhs) in ir.instrs.iter().zip(expected_instrs.iter()) { + assert_eq!(lhs, rhs); + } + // check that the IR is in SSA form + let mut regs = Vec::with_capacity(ir.instrs.len()); + regs.resize(ir.instrs.len(), false); + for i in &ir.instrs { + regs[i.1] = !regs[i.1]; + // if at any point this assertion fails, it means that a register has been + // assigned a value multiple times + if i.0 != Opcode::SetAttr { + assert!(regs[i.1]); + } + } + // check lifetimes + let expected_lifetimes = vec![ + Lifetime::with_end_point(0, 4), + Lifetime::with_end_point(1, 4), + Lifetime::with_end_point(2, 3), + Lifetime::with_end_point(3, 4), + ]; + assert_eq!(ir.lifetimes.len(), expected_lifetimes.len()); + for (lhs, rhs) in ir.lifetimes.iter().zip(expected_lifetimes.iter()) { + assert_eq!(lhs, rhs); + } + // check constats map + let expected_ints = vec![1]; + let ints = ir.const_map.get_ints(); + assert_eq!(ints.len(), expected_ints.len()); + for (lhs, rhs) in ints.iter().zip(expected_ints.iter()) { + assert_eq!(lhs, rhs); + } + assert!(ir.const_map.get_floats().is_empty()); + let expected_strings = vec!["x", "y"]; + let strings = ir.const_map.get_strings(); + assert_eq!(strings.len(), expected_strings.len()); + for (lhs, rhs) in strings.iter().zip(expected_strings.iter()) { + assert_eq!(lhs, rhs); + } } + + #[test] + fn generates_get_attr_instr() { + let pt = LuaParseTree::from_str(String::from("x = y")); + let ir = compile_to_ir(&pt.unwrap()); + let expected_instrs = vec![ + HLInstr(Opcode::LDS, 1, 0, 0), + HLInstr(Opcode::GetAttr, 2, 0, 1), + HLInstr(Opcode::LDS, 3, 1, 0), + HLInstr(Opcode::SetAttr, 0, 3, 2), + ]; + assert_eq!(ir.instrs.len(), expected_instrs.len()); + for (lhs, rhs) in ir.instrs.iter().zip(expected_instrs.iter()) { + assert_eq!(lhs, rhs); + } + } + } diff --git a/luacompiler/src/lib/irgen/register_map.rs b/luacompiler/src/lib/irgen/register_map.rs index 23f36f7..0ff3a2f 100644 --- a/luacompiler/src/lib/irgen/register_map.rs +++ b/luacompiler/src/lib/irgen/register_map.rs @@ -1,5 +1,8 @@ use std::{collections::HashMap, vec::Vec}; +/// The register in which `_ENV` lives. +pub const ENV_REG: usize = 0; + /// Represents a tuple which is used to specify the lifetime of a register. /// For example if a register is first used by the 4th instruction of the bytecode, and /// used last by the 7th instruction, the register's lifetime would be (4, 8). @@ -40,8 +43,7 @@ pub struct RegisterMap<'a> { impl<'a> RegisterMap<'a> { pub fn new() -> RegisterMap<'a> { RegisterMap { - lifetimes: vec![], - // the first map holds the variables of the module + lifetimes: vec![Lifetime::new(0)], // env's lifetime will be [0, 1) reg_maps: vec![HashMap::new()], } } @@ -72,27 +74,27 @@ impl<'a> RegisterMap<'a> { } /// Get the register of . - pub fn get_reg(&mut self, name: &'a str) -> usize { + pub fn get_reg(&mut self, name: &'a str) -> Option { let lifetimes = &mut self.lifetimes; - for map in self.reg_maps[1..].iter().rev() { + for map in self.reg_maps.iter().rev() { if let Some(®) = map.get(name) { - return reg; + let len = lifetimes.len(); + lifetimes[reg].set_end_point(len + 1); + return Some(reg); } } - // In lua, if a variable is queried, but isn't in scope, a Nil is returned instead - // If none of the maps have a definition for that means we have to define - // it ourselves in the map of the module (the first map in ). - *self.reg_maps[0] - .entry(name) - .and_modify(|reg| { - let len = lifetimes.len(); - lifetimes[*reg].set_end_point(len + 1); - }) - .or_insert_with(|| { - let lifetime = Lifetime::new(lifetimes.len()); - lifetimes.push(lifetime); - lifetimes.len() - 1 - }) + // If we cannot find in any of the maps, that means it is a global, and we + // will return a None, indicating to the compiler that it needs to generate + // instructions that will load from `_ENV` + // Users can also reference _ENV, in which case we want to update _ENVs lifetime + // and return the register of _ENV (which is always 0) + if name == "_ENV" { + let len = lifetimes.len(); + lifetimes[ENV_REG].set_end_point(len + 1); + Some(ENV_REG) + } else { + None + } } /// Set the register of to . @@ -118,38 +120,37 @@ mod tests { fn new_reg_correctly_increments_counter() { let mut rm = RegisterMap::new(); for i in 0..10 { - assert_eq!(rm.get_new_reg(), i); + assert_eq!(rm.get_new_reg(), i + 1); } - assert_eq!(rm.reg_count(), 10); + assert_eq!(rm.reg_count(), 11); } #[test] fn correctly_maps_strings_to_registers() { let mut rm = RegisterMap::new(); // create a new register - assert_eq!(rm.get_new_reg(), 0); + assert_eq!(rm.get_new_reg(), 1); // create a mapping - assert_eq!(rm.create_reg("foo"), 1); - assert_eq!(*rm.reg_maps[0].get("foo").unwrap(), 1); - assert_eq!(rm.get_reg("foo"), 1); - assert_eq!(*rm.reg_maps[0].get("foo").unwrap(), 1); - assert_eq!(rm.get_reg("bar"), 2); - assert_eq!(*rm.reg_maps[0].get("bar").unwrap(), 2); + assert_eq!(rm.create_reg("foo"), 2); + assert_eq!(*rm.reg_maps[0].get("foo").unwrap(), 2); + assert_eq!(rm.get_reg("foo"), Some(2)); + assert_eq!(*rm.reg_maps[0].get("foo").unwrap(), 2); + assert_eq!(rm.get_reg("bar"), None); + assert!(rm.reg_maps[0].get("bar").is_none()); // create a new scope in which we define another foo rm.push_scope(); assert_eq!(rm.create_reg("foo"), 3); assert_eq!(*rm.reg_maps[1].get("foo").unwrap(), 3); - assert_eq!(rm.get_reg("foo"), 3); + assert_eq!(rm.get_reg("foo"), Some(3)); assert_eq!(*rm.reg_maps[1].get("foo").unwrap(), 3); - assert_eq!(rm.get_reg("bar"), 2); - assert_eq!(*rm.reg_maps[0].get("bar").unwrap(), 2); + assert_eq!(rm.get_reg("bar"), None); assert!(rm.reg_maps[1].get("bar").is_none()); rm.pop_scope(); // pop the scope and query foo and bar again to check if they have the same values - assert_eq!(rm.get_reg("foo"), 1); - assert_eq!(*rm.reg_maps[0].get("foo").unwrap(), 1); - assert_eq!(rm.get_reg("bar"), 2); - assert_eq!(*rm.reg_maps[0].get("bar").unwrap(), 2); + assert_eq!(rm.get_reg("foo"), Some(2)); + assert_eq!(*rm.reg_maps[0].get("foo").unwrap(), 2); + assert!(rm.get_reg("bar").is_none()); + assert!(rm.reg_maps[0].get("bar").is_none()); // test total number of registers created assert_eq!(rm.reg_count(), 4); } @@ -158,20 +159,20 @@ mod tests { fn lifetimes_are_correcly_updated() { let mut rm = RegisterMap::new(); let reg1 = rm.get_new_reg(); - assert_eq!(rm.lifetimes[reg1].0, 0); - assert_eq!(rm.lifetimes[reg1].1, 1); + assert_eq!(rm.lifetimes[reg1].0, 1); + assert_eq!(rm.lifetimes[reg1].1, 2); let reg2 = rm.create_reg("reg"); - assert_eq!(rm.lifetimes[reg2].0, 1); - assert_eq!(rm.lifetimes[reg2].1, 2); - rm.get_reg("reg"); - assert_eq!(rm.lifetimes[reg2].0, 1); + assert_eq!(rm.lifetimes[reg2].0, 2); assert_eq!(rm.lifetimes[reg2].1, 3); + rm.get_reg("reg"); + assert_eq!(rm.lifetimes[reg2].0, 2); + assert_eq!(rm.lifetimes[reg2].1, 4); rm.push_scope(); let reg3 = rm.create_reg("reg3"); rm.pop_scope(); - assert_eq!(rm.lifetimes[reg3].0, 2); - assert_eq!(rm.lifetimes[reg3].1, 3); - assert_eq!(rm.reg_count(), 3); + assert_eq!(rm.lifetimes[reg3].0, 3); + assert_eq!(rm.lifetimes[reg3].1, 4); + assert_eq!(rm.reg_count(), 4); } #[test] @@ -182,7 +183,7 @@ mod tests { rm.create_reg("foo"); } for i in 0..3 { - assert_eq!(rm.get_reg("foo"), 2 - i); + assert_eq!(rm.get_reg("foo"), Some(3 - i)); rm.pop_scope(); } } diff --git a/luacompiler/tests/integration_test.rs b/luacompiler/tests/integration_test.rs index 4b30b68..709408a 100644 --- a/luacompiler/tests/integration_test.rs +++ b/luacompiler/tests/integration_test.rs @@ -11,46 +11,74 @@ use luacompiler::{ fn ldi_generation() { let pt = LuaParseTree::from_str(String::from("x = 1")).unwrap(); let bc = compile_to_bytecode(compile_to_ir(&pt)); - assert_eq!(bc.instrs_len(), 2); - assert_eq!(bc.reg_count(), 2); + assert_eq!(bc.reg_count(), 3); assert_eq!(bc.get_int(0), 1); - assert_eq!(bc.get_instr(0), make_instr(Opcode::LDI, 0, 0, 0)); - assert_eq!(bc.get_instr(1), make_instr(Opcode::MOV, 1, 0, 0)); + assert_eq!(bc.get_string(0), "x"); + let expected_instrs = vec![ + make_instr(Opcode::LDI, 1, 0, 0), + make_instr(Opcode::LDS, 2, 0, 0), + make_instr(Opcode::SetAttr, 0, 2, 1), + ]; + assert_eq!(bc.instrs_len(), expected_instrs.len()); + for i in 0..expected_instrs.len() { + assert_eq!(bc.get_instr(i), expected_instrs[i]); + } } #[test] fn ldf_generation() { let pt = LuaParseTree::from_str(String::from("x = 2.0")).unwrap(); let bc = compile_to_bytecode(compile_to_ir(&pt)); - assert_eq!(bc.instrs_len(), 2); - assert_eq!(bc.reg_count(), 2); + assert_eq!(bc.reg_count(), 3); assert_eq!(bc.get_float(0).to_string(), "2"); - assert_eq!(bc.get_instr(0), make_instr(Opcode::LDF, 0, 0, 0)); - assert_eq!(bc.get_instr(1), make_instr(Opcode::MOV, 1, 0, 0)); + assert_eq!(bc.get_string(0), "x"); + let expected_instrs = vec![ + make_instr(Opcode::LDF, 1, 0, 0), + make_instr(Opcode::LDS, 2, 0, 0), + make_instr(Opcode::SetAttr, 0, 2, 1), + ]; + assert_eq!(bc.instrs_len(), expected_instrs.len()); + for i in 0..expected_instrs.len() { + assert_eq!(bc.get_instr(i), expected_instrs[i]); + } } #[test] fn lds_generation() { let pt = LuaParseTree::from_str(String::from("x = \"1.2\"")).unwrap(); let bc = compile_to_bytecode(compile_to_ir(&pt)); - assert_eq!(bc.instrs_len(), 2); - assert_eq!(bc.reg_count(), 2); + assert_eq!(bc.reg_count(), 3); assert_eq!(bc.get_string(0), "1.2"); - assert_eq!(bc.get_instr(0), make_instr(Opcode::LDS, 0, 0, 0)); - assert_eq!(bc.get_instr(1), make_instr(Opcode::MOV, 1, 0, 0)); + assert_eq!(bc.get_string(1), "x"); + let expected_instrs = vec![ + make_instr(Opcode::LDS, 1, 0, 0), + make_instr(Opcode::LDS, 2, 1, 0), + make_instr(Opcode::SetAttr, 0, 2, 1), + ]; + assert_eq!(bc.instrs_len(), expected_instrs.len()); + for i in 0..expected_instrs.len() { + assert_eq!(bc.get_instr(i), expected_instrs[i]); + } } fn assert_bytecode(opcode: Opcode, operation: &str) { let pt = LuaParseTree::from_str(String::from(format!("x = 1 {} 2", operation))).unwrap(); let bc = compile_to_bytecode(compile_to_ir(&pt)); - assert_eq!(bc.instrs_len(), 4); - assert_eq!(bc.reg_count(), 4); assert_eq!(bc.get_int(0), 1); assert_eq!(bc.get_int(1), 2); - assert_eq!(bc.get_instr(0), make_instr(Opcode::LDI, 0, 0, 0)); - assert_eq!(bc.get_instr(1), make_instr(Opcode::LDI, 1, 1, 0)); - assert_eq!(bc.get_instr(2), make_instr(opcode, 2, 0, 1)); - assert_eq!(bc.get_instr(3), make_instr(Opcode::MOV, 3, 2, 0)); + assert_eq!(bc.get_string(0), "x"); + let expected_instrs = vec![ + make_instr(Opcode::LDI, 1, 0, 0), + make_instr(Opcode::LDI, 2, 1, 0), + make_instr(opcode, 3, 1, 2), + make_instr(Opcode::LDS, 4, 0, 0), + make_instr(Opcode::SetAttr, 0, 4, 3), + ]; + assert_eq!(bc.instrs_len(), expected_instrs.len()); + for i in 0..expected_instrs.len() { + assert_eq!(bc.get_instr(i), expected_instrs[i]); + } + assert_eq!(bc.reg_count(), 5); } #[test] diff --git a/luavm/src/lib/instructions/loads.rs b/luavm/src/lib/instructions/loads.rs index 783fa98..755bcc8 100644 --- a/luavm/src/lib/instructions/loads.rs +++ b/luavm/src/lib/instructions/loads.rs @@ -23,7 +23,10 @@ pub fn ldf(vm: &mut Vm, instr: u32) -> Result<(), LuaError> { } pub fn lds(vm: &mut Vm, instr: u32) -> Result<(), LuaError> { - let val = vm.bytecode.get_string(second_arg(instr)); - vm.registers[first_arg(instr) as usize] = LuaVal::from(val.to_string()); + let arg2 = second_arg(instr); + let val = vm.bytecode.get_string(arg2); + // we also want to save the index of the string in the constant table in order to + // speed up lookups in _ENV + vm.registers[first_arg(instr) as usize] = LuaVal::from((val.to_string(), arg2 as usize)); Ok(()) } diff --git a/luavm/src/lib/instructions/mod.rs b/luavm/src/lib/instructions/mod.rs index 84a8ee4..a489735 100644 --- a/luavm/src/lib/instructions/mod.rs +++ b/luavm/src/lib/instructions/mod.rs @@ -1,2 +1,3 @@ pub mod arithmetic_operators; pub mod loads; +pub mod tables; diff --git a/luavm/src/lib/instructions/tables.rs b/luavm/src/lib/instructions/tables.rs new file mode 100644 index 0000000..0655e85 --- /dev/null +++ b/luavm/src/lib/instructions/tables.rs @@ -0,0 +1,93 @@ +use errors::LuaError; +use luacompiler::bytecode::instructions::{first_arg, second_arg, third_arg}; +use luacompiler::irgen::register_map::ENV_REG; +use Vm; + +/// R(1) = R(2)[R(3)] +pub fn get_attr(vm: &mut Vm, instr: u32) -> Result<(), LuaError> { + let val = { + let arg2 = second_arg(instr) as usize; + let from = &vm.registers[arg2]; + let attr = &vm.registers[third_arg(instr) as usize]; + match attr.get_constant_index() { + Some(i) => { + if arg2 == ENV_REG { + vm.env_attrs[i].clone() + } else { + from.get_attr(attr)? + } + } + _ => from.get_attr(attr)?, + } + }; + vm.registers[first_arg(instr) as usize] = val; + Ok(()) +} + +/// R(1)[R(2)] = R(3) +pub fn set_attr(vm: &mut Vm, instr: u32) -> Result<(), LuaError> { + let attr = vm.registers[second_arg(instr) as usize].clone(); + let val = vm.registers[third_arg(instr) as usize].clone(); + let arg1 = first_arg(instr) as usize; + match attr.get_constant_index() { + Some(i) => { + if arg1 == ENV_REG { + vm.env_attrs[i] = val + } else { + vm.registers[arg1].set_attr(attr, val)? + } + } + _ => vm.registers[arg1].set_attr(attr, val)?, + } + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use lua_values::LuaVal; + use luacompiler::{ + bytecode::instructions::{make_instr, Opcode}, + bytecodegen::compile_to_bytecode, + irgen::compile_to_ir, + LuaParseTree, + }; + + fn get_vm_for(p: String) -> Vm { + let pt = LuaParseTree::from_str(p).unwrap(); + let ir = compile_to_ir(&pt); + let bc = compile_to_bytecode(ir); + Vm::new(bc) + } + + #[test] + fn get_attr_works() { + // this should generate: + // LDI 1 0 0 + // LDS 2 0 0 + // SetAttr 0 2 1 + let mut vm = get_vm_for("x = 2".to_string()); + vm.eval(); // so that the registers are updated based on the supplied program + assert!(get_attr(&mut vm, make_instr(Opcode::GetAttr, 1, ENV_REG as u8, 2)).is_ok()); + assert_eq!(vm.registers[1], LuaVal::from(2)); + } + + #[test] + fn set_attr_works() { + // this should generate: + // LDI 1 0 0 + // LDS 2 0 0 + // SetAttr 0 2 1 + let mut vm = get_vm_for("x = 2".to_string()); + vm.eval(); // so that the registers are updated based on the supplied program + assert!(set_attr(&mut vm, make_instr(Opcode::SetAttr, ENV_REG as u8, 2, 1)).is_ok()); + let index_of_x = 0; + assert_eq!( + vm.registers[ENV_REG] + .get_attr(&LuaVal::from((String::from("x"), index_of_x))) + .unwrap(), + LuaVal::new() + ); + assert_eq!(vm.env_attrs[index_of_x], LuaVal::from(2)); + } +} diff --git a/luavm/src/lib/lua_values/lua_obj.rs b/luavm/src/lib/lua_values/lua_obj.rs index d54b406..d0576d3 100644 --- a/luavm/src/lib/lua_values/lua_obj.rs +++ b/luavm/src/lib/lua_values/lua_obj.rs @@ -22,6 +22,11 @@ pub trait LuaObj { fn get_string_ref(&self) -> Option<&str> { None } + /// If the underlying type is a String, then this method returns the String's index + /// in the constant table. + fn get_constant_index(&self) -> Option { + None + } } /// Boxes the given `LuaObj`, and returns the address of the box. @@ -101,11 +106,16 @@ impl LuaObj for LuaFloat { pub struct LuaString { pub v: String, + /// The index of the string in the constant table. + pub const_index: Option, } impl LuaObj for LuaString { fn clone_box(&self) -> Box { - Box::new(LuaString { v: self.v.clone() }) + Box::new(LuaString { + v: self.v.clone(), + const_index: self.const_index, + }) } fn is_number(&self) -> bool { @@ -135,4 +145,8 @@ impl LuaObj for LuaString { fn get_string_ref(&self) -> Option<&str> { Some(&self.v) } + + fn get_constant_index(&self) -> Option { + self.const_index + } } diff --git a/luavm/src/lib/lua_values/lua_table.rs b/luavm/src/lib/lua_values/lua_table.rs index 6d61a89..3d57c30 100644 --- a/luavm/src/lib/lua_values/lua_table.rs +++ b/luavm/src/lib/lua_values/lua_table.rs @@ -19,7 +19,7 @@ impl LuaTable { self.v.borrow_mut().insert(attr, val); } - /// Gets a reference to given attribute. + /// Gets a reference to the given attribute. pub fn get_attr(&self, attr: &LuaVal) -> LuaVal { match self.v.borrow().get(attr) { Some(val) => val.clone(), diff --git a/luavm/src/lib/lua_values/mod.rs b/luavm/src/lib/lua_values/mod.rs index 5b085b7..c1f16ae 100644 --- a/luavm/src/lib/lua_values/mod.rs +++ b/luavm/src/lib/lua_values/mod.rs @@ -1,5 +1,5 @@ mod lua_obj; -mod lua_table; +pub mod lua_table; mod tagging; use self::{lua_obj::*, lua_table::LuaTable, tagging::*}; @@ -57,6 +57,14 @@ impl LuaVal { } } + /// Gets the index of the underlying string in the constant table. + pub fn get_constant_index(&self) -> Option { + match self.kind() { + LuaValKind::BOXED => unsafe { (*self.as_boxed()).get_constant_index() }, + _ => None, + } + } + /// Returns true if the underlying type is either a float or a string. /// In Lua, if either of these two types are used in an arithmetic /// expression, then both arguments are converted to floats. @@ -264,7 +272,24 @@ impl From for LuaVal { /// Create a float LuaVal. fn from(string: String) -> Self { LuaVal { - val: LuaValKind::BOXED ^ to_boxed(Box::new(LuaString { v: string })), + val: LuaValKind::BOXED + ^ to_boxed(Box::new(LuaString { + v: string, + const_index: None, + })), + } + } +} + +impl From<(String, usize)> for LuaVal { + /// Create a float LuaVal. + fn from(string: (String, usize)) -> Self { + LuaVal { + val: LuaValKind::BOXED + ^ to_boxed(Box::new(LuaString { + v: string.0, + const_index: Some(string.1), + })), } } } diff --git a/luavm/src/lib/mod.rs b/luavm/src/lib/mod.rs index ab1cefe..fe5294a 100644 --- a/luavm/src/lib/mod.rs +++ b/luavm/src/lib/mod.rs @@ -12,18 +12,26 @@ mod instructions; mod lua_values; use errors::LuaError; -use instructions::{arithmetic_operators::*, loads::*}; -use lua_values::LuaVal; +use instructions::{arithmetic_operators::*, loads::*, tables::*}; +use lua_values::{lua_table::LuaTable, LuaVal}; use luacompiler::bytecode::{instructions::opcode, LuaBytecode}; +use std::collections::HashMap; /// The instruction handler for each opcode. -const OPCODE_HANDLER: &'static [fn(&mut Vm, u32) -> Result<(), LuaError>] = - &[mov, ldi, ldf, lds, add, sub, mul, div, modulus, fdiv, exp]; +const OPCODE_HANDLER: &'static [fn(&mut Vm, u32) -> Result<(), LuaError>] = &[ + mov, ldi, ldf, lds, add, sub, mul, div, modulus, fdiv, exp, get_attr, set_attr, +]; /// Represents a `LuaBytecode` interpreter. pub struct Vm { pub bytecode: LuaBytecode, pub registers: Vec, + /// All attributes of _ENV that are also part of the string constant table are stored + /// in a vector. Let's consider an example: "x" is mapped to index 2 in the constant + /// table. This means that _ENV["x"] = will modify env_attrs[2]. If however + /// "x" was not in the constant table, then the lookup of the attribute would be + /// done via the `get_attr` method of the `LuaTable` struct. + pub env_attrs: Vec, } impl Vm { @@ -31,12 +39,16 @@ impl Vm { pub fn new(bytecode: LuaBytecode) -> Vm { let regs = bytecode.reg_count(); let mut registers: Vec = Vec::with_capacity(regs as usize); - for _ in 0..regs { + registers.push(LuaVal::from(LuaTable::new(HashMap::new()))); + for _ in 1..regs { registers.push(LuaVal::new()); } + let mut env_attrs = Vec::new(); + env_attrs.resize(bytecode.get_strings_len(), LuaVal::new()); Vm { bytecode, registers, + env_attrs, } } @@ -51,3 +63,41 @@ impl Vm { } } } + +#[cfg(test)] +mod tests { + use super::*; + use luacompiler::irgen::register_map::ENV_REG; + use luacompiler::{bytecodegen::compile_to_bytecode, irgen::compile_to_ir, LuaParseTree}; + + fn get_vm_for(p: String) -> Vm { + let pt = LuaParseTree::from_str(p).unwrap(); + let ir = compile_to_ir(&pt); + let bc = compile_to_bytecode(ir); + Vm::new(bc) + } + + #[test] + fn env_set_and_get() { + let mut vm = get_vm_for("x = 3\ny = x + 1".to_string()); + vm.eval(); + let index_of_x = 0; + // vm.registers[0] has a reference to the _ENV variable + // this is true because the compiler always loads the environment into register 0 + assert_eq!( + vm.registers[ENV_REG] + .get_attr(&LuaVal::from((String::from("x"), index_of_x))) + .unwrap(), + LuaVal::new() + ); + assert_eq!(vm.env_attrs[index_of_x], LuaVal::from(3)); + let index_of_y = 1; + assert_eq!( + vm.registers[ENV_REG] + .get_attr(&LuaVal::from((String::from("y"), index_of_y))) + .unwrap(), + LuaVal::new() + ); + assert_eq!(vm.env_attrs[index_of_y], LuaVal::from(4)); + } +} From 0ec46bac5575db2fdceb544d939e9c0564b1b54f Mon Sep 17 00:00:00 2001 From: Robert Bartlensky Date: Mon, 14 Jan 2019 13:42:08 +0000 Subject: [PATCH 2/2] Update to grmtools 0.2. --- luacompiler/Cargo.toml | 12 ++++++------ luacompiler/build.rs | 11 ++++++----- luacompiler/src/lib/mod.rs | 1 - 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/luacompiler/Cargo.toml b/luacompiler/Cargo.toml index 7cddb48..8760129 100644 --- a/luacompiler/Cargo.toml +++ b/luacompiler/Cargo.toml @@ -13,10 +13,9 @@ name = "luacompiler" path = "src/lib/mod.rs" [dependencies] -cfgrammar = { git="https://github.com/softdevteam/grmtools" } -lrlex = { git="https://github.com/softdevteam/grmtools" } -lrpar = { git="https://github.com/softdevteam/grmtools" } -lrtable = { git="https://github.com/softdevteam/grmtools" } +cfgrammar = "0.2" +lrlex = "0.2" +lrpar = "0.2" bincode = "1.0.1" serde = "1.0.80" serde_derive = "1.0" @@ -26,5 +25,6 @@ version = "2.32" default-features = false [build-dependencies] -lrpar = { git="https://github.com/softdevteam/grmtools" } -lrlex = { git="https://github.com/softdevteam/grmtools" } +cfgrammar = "0.2" +lrlex = "0.2" +lrpar = "0.2" diff --git a/luacompiler/build.rs b/luacompiler/build.rs index f06619c..d1e3b76 100644 --- a/luacompiler/build.rs +++ b/luacompiler/build.rs @@ -1,16 +1,17 @@ +extern crate cfgrammar; extern crate lrlex; extern crate lrpar; +use cfgrammar::yacc::{YaccKind, YaccOriginalActionKind}; use lrlex::LexerBuilder; -use lrpar::ActionKind; use lrpar::CTParserBuilder; fn main() -> Result<(), Box> { - let mut ct = CTParserBuilder::::new_with_storaget() + let lex_rule_ids_map = CTParserBuilder::::new_with_storaget() .error_on_conflicts(false) - .action_kind(ActionKind::GenericParseTree); - let lex_rule_ids_map = ct.process_file_in_src("lua5_3/lua5_3.y")?; - LexerBuilder::new() + .yacckind(YaccKind::Original(YaccOriginalActionKind::GenericParseTree)) + .process_file_in_src("lua5_3/lua5_3.y")?; + LexerBuilder::::new() .rule_ids_map(lex_rule_ids_map) .process_file_in_src("lua5_3/lua5_3.l")?; Ok(()) diff --git a/luacompiler/src/lib/mod.rs b/luacompiler/src/lib/mod.rs index d61f1bd..d94626d 100644 --- a/luacompiler/src/lib/mod.rs +++ b/luacompiler/src/lib/mod.rs @@ -3,7 +3,6 @@ extern crate cfgrammar; extern crate lrlex; #[macro_use] extern crate lrpar; -extern crate lrtable; #[macro_use] extern crate serde_derive; extern crate bincode;