diff --git a/aria-bin/src/file_eval.rs b/aria-bin/src/file_eval.rs index 5bf2ea00..38b1f1e6 100644 --- a/aria-bin/src/file_eval.rs +++ b/aria-bin/src/file_eval.rs @@ -63,7 +63,7 @@ fn eval_buffer( println!("Module dump:\n{output}\n"); } - let r_module = match RuntimeModule::new(c_module) { + let r_module = match RuntimeModule::new(vm, c_module) { Ok(m) => m, Err(err) => { return Err(print_report_from_vm_error(&err.into())); diff --git a/aria-bin/src/repl_eval.rs b/aria-bin/src/repl_eval.rs index 0679f925..fdb75106 100644 --- a/aria-bin/src/repl_eval.rs +++ b/aria-bin/src/repl_eval.rs @@ -190,7 +190,7 @@ impl<'a> Repl<'a> { } }; - let r_module = match RuntimeModule::new(c_module) { + let r_module = match RuntimeModule::new(&mut vm, c_module) { Ok(m) => m, Err(err) => { return Err(print_report_from_vm_error(&err.into())); @@ -278,7 +278,7 @@ impl<'a> Repl<'a> { println!("Module dump:\n{output}\n"); } - let r_module = match RuntimeModule::new(c_module) { + let r_module = match RuntimeModule::new(&mut self.vm, c_module) { Ok(m) => m, Err(err) => { return Err(self.print_error_report(build_report_from_vm_error(&err.into()))); diff --git a/vm-lib/src/builtins/mod.rs b/vm-lib/src/builtins/mod.rs index d78af880..9d7c8a54 100644 --- a/vm-lib/src/builtins/mod.rs +++ b/vm-lib/src/builtins/mod.rs @@ -12,6 +12,7 @@ use crate::{ kind::RuntimeValueType, object::ObjectBox, }, + symbol::Interner, }; mod alloc; @@ -70,6 +71,7 @@ impl AriaBuiltinTypes { pub struct VmGlobals { values: Rc, builtin_types: AriaBuiltinTypes, + interner: Interner, } impl VmGlobals { @@ -119,6 +121,7 @@ impl Default for VmGlobals { let mut this = Self { values: Default::default(), builtin_types: Default::default(), + interner: Default::default(), }; this.register_builtin_type(BuiltinTypeId::Any, RuntimeValueType::Any); // Most anything needs Any @@ -162,6 +165,16 @@ impl Default for VmGlobals { } impl VmGlobals { + pub fn intern_symbol(&mut self, s: &str) -> Result { + self.interner.intern(s).map_err(|e| match e { + crate::symbol::InternError::TooManySymbols => VmErrorReason::TooManyInternedSymbols, + }) + } + + pub fn resolve_symbol(&self, sym: crate::symbol::Symbol) -> Option<&str> { + self.interner.resolve(sym) + } + pub fn load_named_value(&self, name: &str) -> Option { self.values.read(name) } diff --git a/vm-lib/src/error/vm_error.rs b/vm-lib/src/error/vm_error.rs index c28d76d9..f92fa5db 100644 --- a/vm-lib/src/error/vm_error.rs +++ b/vm-lib/src/error/vm_error.rs @@ -81,6 +81,9 @@ pub enum VmErrorReason { #[error("bytecode exceeds maximum allowed size")] BytecodeTooLarge, + #[error("too many symbols have been interned")] + TooManyInternedSymbols, + #[error("VM execution halted")] VmHalted, } diff --git a/vm-lib/src/lib.rs b/vm-lib/src/lib.rs index e3d8c164..45da5c11 100644 --- a/vm-lib/src/lib.rs +++ b/vm-lib/src/lib.rs @@ -12,6 +12,7 @@ pub mod opcodes; pub mod runtime_module; pub mod runtime_value; pub mod stack; +pub mod symbol; pub mod vm; #[cfg(test)] diff --git a/vm-lib/src/runtime_module.rs b/vm-lib/src/runtime_module.rs index 4f648944..17374ad9 100644 --- a/vm-lib/src/runtime_module.rs +++ b/vm-lib/src/runtime_module.rs @@ -1,7 +1,8 @@ // SPDX-License-Identifier: Apache-2.0 use std::{cell::RefCell, collections::HashSet, rc::Rc}; -use aria_compiler::{bc_reader::DecodeError, module::CompiledModule}; +use aria_compiler::module::CompiledModule; +use haxby_opcodes::Opcode; use rustc_data_structures::fx::FxHashMap; use crate::{ @@ -12,6 +13,7 @@ use crate::{ function::{BuiltinFunctionImpl, Function}, isa::IsaCheckable, }, + vm::VirtualMachine, }; #[derive(Clone)] @@ -24,14 +26,100 @@ struct RuntimeModuleImpl { compiled_module: CompiledModule, indexed_constants: Vec, values: RefCell>, + entry_co: crate::runtime_value::runtime_code_object::CodeObject, +} + +fn byte_array_to_opcode_array(bytes: &[u8]) -> aria_compiler::bc_reader::DecodeResult> { + let mut opcodes = Vec::new(); + let mut decoder = aria_compiler::bc_reader::BytecodeReader::try_from(bytes)?; + + loop { + let next = decoder.read_opcode(); + match next { + Ok(op) => opcodes.push(op), + Err(err) => { + return match err { + aria_compiler::bc_reader::DecodeError::EndOfStream => Ok(opcodes), + _ => Err(err), + }; + } + } + } +} + +fn replace_attribute_access_with_interned( + vm: &mut VirtualMachine, + cm: &CompiledModule, + opcodes: &mut Vec, +) -> Result<(), VmErrorReason> { + for opcode in opcodes { + match opcode { + Opcode::ReadAttribute(n) | Opcode::WriteAttribute(n) => { + let x_str = cm + .load_indexed_const(*n) + .expect("missing constant") + .as_string() + .expect("expected string constant") + .clone(); + let _ = match vm.globals.intern_symbol(&x_str) { + Ok(s) => s, + Err(_) => return Err(VmErrorReason::UnexpectedVmState), + }; + } + _ => {} + } + } + Ok(()) +} + +fn compiled_code_object_to_runtime_code_object( + vm: &mut VirtualMachine, + cm: &CompiledModule, + cco: aria_compiler::constant_value::CompiledCodeObject, +) -> Result { + let mut ops = byte_array_to_opcode_array(cco.body.as_slice())?; + replace_attribute_access_with_interned(vm, cm, &mut ops)?; + let body: Rc<[Opcode]> = ops.into(); + + Ok(crate::runtime_value::runtime_code_object::CodeObject { + name: cco.name.clone(), + body, + required_argc: cco.required_argc, + default_argc: cco.default_argc, + frame_size: cco.frame_size, + loc: cco.loc.clone(), + line_table: Rc::from(cco.line_table.clone()), + }) +} + +fn compiled_constant_to_runtime_value( + vm: &mut VirtualMachine, + cm: &CompiledModule, + value: aria_compiler::constant_value::ConstantValue, +) -> Result { + use aria_compiler::constant_value::ConstantValue::{ + CompiledCodeObject, Float, Integer, String, + }; + match value { + Integer(n) => Ok(RuntimeValue::Integer(From::from(n))), + String(s) => Ok(RuntimeValue::String(s.into())), + CompiledCodeObject(cco) => Ok(RuntimeValue::CodeObject( + compiled_code_object_to_runtime_code_object(vm, cm, cco)?, + )), + Float(f) => Ok(RuntimeValue::Float(f.raw_value().into())), + } } impl RuntimeModuleImpl { - fn new(cm: CompiledModule) -> Result { + fn new(vm: &mut VirtualMachine, cm: CompiledModule) -> Result { + let entry_co = + compiled_code_object_to_runtime_code_object(vm, &cm, cm.load_entry_code_object())?; + let mut this = Self { compiled_module: cm, indexed_constants: Vec::new(), values: Default::default(), + entry_co, }; let mut i = 0; @@ -41,7 +129,7 @@ impl RuntimeModuleImpl { .load_indexed_const(i as u16) .expect("module has missing constant data"); - let r = RuntimeValue::try_from(&c)?; + let r = compiled_constant_to_runtime_value(vm, &this.compiled_module, c)?; this.indexed_constants.push(r); i += 1; @@ -128,12 +216,16 @@ pub struct RuntimeModule { } impl RuntimeModule { - pub fn new(cm: CompiledModule) -> Result { + pub fn new(vm: &mut VirtualMachine, cm: CompiledModule) -> Result { Ok(Self { - imp: Rc::new(RuntimeModuleImpl::new(cm)?), + imp: Rc::new(RuntimeModuleImpl::new(vm, cm)?), }) } + pub fn load_entry_code_object(&self) -> &crate::runtime_value::runtime_code_object::CodeObject { + &self.imp.entry_co + } + pub(crate) fn named_values_of_this(&self) -> Vec<(String, NamedValue)> { self.imp.named_values_of_this() } diff --git a/vm-lib/src/symbol.rs b/vm-lib/src/symbol.rs new file mode 100644 index 00000000..d761ca0d --- /dev/null +++ b/vm-lib/src/symbol.rs @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: Apache-2.0 +use rustc_data_structures::fx::FxHashMap; +use thiserror::Error; + +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub struct Symbol(pub u32); + +impl std::fmt::Display for Symbol { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "Symbol({})", self.0) + } +} + +#[derive(Default)] +pub struct Interner { + map: FxHashMap, + strings: Vec, +} + +#[derive(Clone, Error, PartialEq, Eq, Debug)] +pub enum InternError { + #[error("too many symbols have been interned")] + TooManySymbols, +} + +impl Interner { + pub fn intern(&mut self, s: &str) -> Result { + if let Some(&sym) = self.map.get(s) { + return Ok(sym); + } + + let id = self.strings.len(); + if id >= u32::MAX as usize { + return Err(InternError::TooManySymbols); + } + + let s = s.to_string(); + + let sym = Symbol(id as u32); + self.strings.push(s.clone()); + self.map.insert(s, sym); + Ok(sym) + } + + pub fn resolve(&self, sym: Symbol) -> Option<&str> { + self.strings.get(sym.0 as usize).map(|s| s.as_str()) + } +} diff --git a/vm-lib/src/vm.rs b/vm-lib/src/vm.rs index 74fb9c9f..ed1bcc89 100644 --- a/vm-lib/src/vm.rs +++ b/vm-lib/src/vm.rs @@ -31,7 +31,6 @@ use crate::{ list::List, mixin::Mixin, object::Object, - runtime_code_object::CodeObject, structure::Struct, }, stack::Stack, @@ -442,13 +441,8 @@ impl VirtualMachine { self.modules.insert(name.to_owned(), r_mod.clone()); } - let entry_cm = r_mod.get_compiled_module(); - let entry_cco = entry_cm.load_entry_code_object(); - let entry_co = match CodeObject::try_from(&entry_cco) { - Ok(co) => co, - Err(err) => return Err(VmErrorReason::from(err).into()), - }; - let entry_f = Function::from_code_object(&entry_co, 0, &r_mod); + let entry_co = r_mod.load_entry_code_object(); + let entry_f = Function::from_code_object(entry_co, 0, &r_mod); let mut entry_frame: Frame = Default::default(); let entry_result = entry_f.eval(0, &mut entry_frame, self, &Default::default(), true); @@ -466,7 +460,8 @@ impl VirtualMachine { name: &str, entry_cm: CompiledModule, ) -> ExecutionResult> { - self.load_into_module(name, RuntimeModule::new(entry_cm)?) + let r_mod = RuntimeModule::new(self, entry_cm)?; + self.load_into_module(name, r_mod) } pub fn get_module_by_name(&self, name: &str) -> Option {