diff --git a/.gitignore b/.gitignore index 74f23a8a..460ec05e 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,6 @@ CoqMakefile.conf *.vo *.vos *.css +Makefile +*.pdf +doc \ No newline at end of file diff --git a/coqdoc.css b/coqdoc.css index d5a0b8b4..4d887044 100644 --- a/coqdoc.css +++ b/coqdoc.css @@ -63,8 +63,9 @@ div:empty{ display: none;} #main .doc { margin: 0; text-align: left; - background-color: rgba(0,0,0,0.08); - max-width: 40em; + background-color: #d3d3d3; //rgba(0,0,0,0.08); + padding: 1em; + max-width: 50em; } .inlinecode, .code, #main pre { @@ -196,4 +197,8 @@ ul.doclist { display: inline; } - +ul,li { + /*margin:20pt;*/ + margin-left: 0.5em; + padding-left:0.5em; +} diff --git a/docseq b/docseq index e4d9181e..49b8804f 100644 --- a/docseq +++ b/docseq @@ -1,41 +1,46 @@ +doc/EraVM.Introduction.html doc/EraVM.Glossary.html -doc/EraVM.Common.html doc/EraVM.Core.html doc/EraVM.PrimitiveValue.html + doc/EraVM.State.html -doc/EraVM.CallStack.html -doc/EraVM.KernelMode.html +doc/EraVM.GPR.html +doc/EraVM.Flags.html doc/EraVM.Predication.html -doc/EraVM.Addressing.html -doc/EraVM.Resolution.html -doc/EraVM.Bootloader.html -doc/EraVM.Coder.html -doc/EraVM.Decommitter.html -doc/EraVM.Ergs.html +doc/EraVM.Memory.html +doc/EraVM.Pointer.html +doc/EraVM.Slice.html +doc/EraVM.CallStack.html +doc/EraVM.MemoryContext.html + doc/EraVM.History.html doc/EraVM.Event.html -doc/EraVM.Flags.html -doc/EraVM.GPR.html -doc/EraVM.Memory.html + doc/EraVM.MemoryBase.html doc/EraVM.MemoryOps.html -doc/EraVM.MemoryContext.html doc/EraVM.MemoryManagement.html -doc/EraVM.Slice.html -doc/EraVM.Pointer.html -doc/EraVM.StaticMode.html -doc/EraVM.VersionedHash.html + +doc/EraVM.Addressing.html +doc/EraVM.Resolution.html doc/EraVM.isa.Instructions.html +doc/EraVM.isa.Modifiers.html doc/EraVM.isa.Assembly.html doc/EraVM.isa.CoreSet.html +doc/EraVM.isa.GeneratedMachISA.html doc/EraVM.isa.AssemblyToCore.html -doc/EraVM.isa.Modifiers.html +doc/EraVM.isa.AssemblyToMach.html +doc/EraVM.Ergs.html +doc/EraVM.KernelMode.html +doc/EraVM.StaticMode.html +doc/EraVM.Steps.html doc/EraVM.Semantics.html doc/EraVM.VMPanic.html doc/EraVM.sem.SemanticCommon.html -doc/EraVM.sem.StepPanic.html + + +doc/EraVM.sem.Nop.html doc/EraVM.sem.SpAdd.html doc/EraVM.sem.SpSub.html doc/EraVM.sem.Jump.html @@ -54,14 +59,15 @@ doc/EraVM.sem.Ror.html doc/EraVM.sem.NearCall.html doc/EraVM.sem.NearRet.html -doc/EraVM.sem.NearRetTo.html doc/EraVM.sem.NearRevert.html +doc/EraVM.sem.NearRetTo.html doc/EraVM.sem.NearRevertTo.html +doc/EraVM.sem.StepPanic.html doc/EraVM.sem.Panic.html doc/EraVM.sem.NearPanicTo.html +doc/EraVM.sem.Farcall.html doc/EraVM.sem.FarRet.html doc/EraVM.sem.FarRevert.html -doc/EraVM.sem.Farcall.html doc/EraVM.sem.LoadPtr.html doc/EraVM.sem.LoadPtrInc.html @@ -80,19 +86,29 @@ doc/EraVM.sem.SLoad.html doc/EraVM.sem.OpEvent.html doc/EraVM.sem.ToL1.html -doc/EraVM.sem.Context.html doc/EraVM.sem.PrecompileCall.html +doc/EraVM.sem.Context.html -doc/EraVM.Steps.html + +doc/EraVM.Coder.html doc/EraVM.ABI.html +doc/EraVM.ABI.NearCallABI.html doc/EraVM.ABI.FatPointerABI.html doc/EraVM.ABI.ForwardPageTypesABI.html -doc/EraVM.ABI.FarRetABI.html doc/EraVM.ABI.FarCallABI.html +doc/EraVM.ABI.FarRetABI.html doc/EraVM.ABI.MetaParametersABI.html -doc/EraVM.ABI.NearCallABI.html doc/EraVM.ABI.PrecompileParametersABI.html + +doc/EraVM.encoding.EncodingUtils.html +doc/EraVM.encoding.GeneratedEncodeOpcode.html +doc/EraVM.encoding.Encoder.html + +doc/EraVM.Bootloader.html doc/EraVM.Precompiles.html +doc/EraVM.VersionedHash.html +doc/EraVM.Decommitter.html + doc/EraVM.Arith.html doc/index.html diff --git a/src/ABI.v b/src/ABI.v index be5d13cd..61c714f2 100644 --- a/src/ABI.v +++ b/src/ABI.v @@ -9,13 +9,10 @@ Import Core Common Coder Bool GPR Ergs Memory MemoryManagement Pointer. This section details the serialization and deserialization formats for compound instruction arguments. - -Currently, they are not described in details, but introduced axiomatically. - The description from Rust VM implementation is described here: https://github.com/matter-labs/zkevm_opcode_defs/blob/v1.4.1/src/definitions/abi -## Fat pointers *) +*) Require Export ABI.FatPointerABI ABI.MetaParametersABI diff --git a/src/ABI/FarCallABI.v b/src/ABI/FarCallABI.v index 332bbc09..6b10a3a8 100644 --- a/src/ABI/FarCallABI.v +++ b/src/ABI/FarCallABI.v @@ -3,6 +3,7 @@ Require Coder Ergs MemoryManagement Pointer lib.BitsExt ABI.FatPointerABI ABI.Fo Import ssreflect ssreflect.ssrfun ssreflect.eqtype ssreflect.tuple. Import Arith Core Common Coder Ergs Memory MemoryManagement Pointer lib.BitsExt FatPointerABI ForwardPageTypesABI. +Section FarCallABI. Record params := mk_params { fwd_memory: fwd_memory; @@ -177,3 +178,4 @@ fun params => End LayoutCoder. Definition coder := coder_compose coder_binary coder_layout. +End FarCallABI. diff --git a/src/ABI/FarRetABI.v b/src/ABI/FarRetABI.v index 6123c8b7..bb4c25c1 100644 --- a/src/ABI/FarRetABI.v +++ b/src/ABI/FarRetABI.v @@ -3,7 +3,9 @@ Require Coder Ergs MemoryManagement Pointer lib.BitsExt ABI.FatPointerABI ABI.Fo Import ssreflect ssreflect.ssrfun ssreflect.eqtype ssreflect.tuple. Import Arith Core Common Coder Ergs Memory MemoryManagement Pointer lib.BitsExt FatPointerABI ForwardPageTypesABI. -Record params := + +Section FarRetABI. + Record params := mk_params { fwd_memory: fwd_memory; }. @@ -147,3 +149,4 @@ fun params => End LayoutCoder. Definition coder := coder_compose coder_binary coder_layout. +End FarRetABI. diff --git a/src/ABI/FatPointerABI.v b/src/ABI/FatPointerABI.v index 88b2fa1d..0469a8e5 100644 --- a/src/ABI/FatPointerABI.v +++ b/src/ABI/FatPointerABI.v @@ -3,6 +3,7 @@ Require Coder Pointer lib.BitsExt. Import ssreflect ssreflect.ssrfun ssreflect.eqtype ssreflect.tuple. Import Core Common Coder Pointer lib.BitsExt. +Section FatPointerABI. (** Record [%fat_ptr_layout] displays the memory layout of a 128-bit fat pointer.*) Record fat_ptr_layout := mk_fat_ptr_layout { length: u32; @@ -11,7 +12,6 @@ Record fat_ptr_layout := mk_fat_ptr_layout { offset: u32; }. Definition null_fat_ptr_layout := mk_fat_ptr_layout zero32 zero32 zero32 zero32. - Section LayoutCoder. (** Functions [%encode_layout] and [%decode_layout] formalize the encoding of a [%fat_ptr_nullable] to [%fat_ptr_layout]. *) @@ -163,3 +163,4 @@ Section ComposedCoder. | _ => None end. End ComposedCoder. +End FatPointerABI. diff --git a/src/ABI/MetaParametersABI.v b/src/ABI/MetaParametersABI.v index 8228b926..fb620e0f 100644 --- a/src/ABI/MetaParametersABI.v +++ b/src/ABI/MetaParametersABI.v @@ -3,6 +3,7 @@ Require Coder Ergs MemoryManagement Pointer lib.BitsExt. Import ssreflect ssreflect.ssrfun ssreflect.eqtype ssreflect.tuple. Import Arith Core Common Coder Ergs Memory MemoryManagement Pointer lib.BitsExt. +Section MetaParametersABI. Record params := mk_params { ergs_per_pubdata_byte: ergs; @@ -134,3 +135,4 @@ Qed. End LayoutCoder. Definition coder := coder_compose binary_coder layout_coder. +End MetaParametersABI. diff --git a/src/ABI/NearCallABI.v b/src/ABI/NearCallABI.v index 1656bbe1..728649a7 100644 --- a/src/ABI/NearCallABI.v +++ b/src/ABI/NearCallABI.v @@ -3,6 +3,7 @@ Require Coder Ergs Memory. Import ssreflect. Import Types Core Coder Ergs Memory. +Section NearCallABI. Record params: Type := mk_params { ergs_passed: u32; @@ -22,3 +23,4 @@ refine (mk_coder decode encode _). unfold revertible, encode, decode. by move => []; inversion 1. Defined. +End NearCallABI. diff --git a/src/ABI/PrecompileParametersABI.v b/src/ABI/PrecompileParametersABI.v index 629b0b29..01131547 100644 --- a/src/ABI/PrecompileParametersABI.v +++ b/src/ABI/PrecompileParametersABI.v @@ -3,7 +3,9 @@ Require Coder Memory lib.BitsExt. Import ssreflect ssreflect.ssrfun ssreflect.eqtype ssreflect.tuple. Import Core Common Coder Memory lib.BitsExt. -Record params := +Section PrecompileParametersABI. + + Record params := mk_params { input_memory_offset: mem_address; @@ -49,5 +51,5 @@ Canonical params_eqType := Eval hnf in EqType _ params_eqMixin. (* end details *) - Axiom ABI: @coder word params. +End PrecompileParametersABI. diff --git a/src/Core.v b/src/Core.v index 23dbb07d..d0a0a6fe 100644 --- a/src/Core.v +++ b/src/Core.v @@ -24,6 +24,7 @@ EraVM is a 256-bit register-based language machine with two stacks and dedicated (* end details *) End Parameters. +Section ArchOverview. (** ![](img/arch-overview.png) @@ -175,6 +176,6 @@ There are three types of behaviors triggered by execution failures. *) - +End ArchOverview. Definition timestamp := nat. Definition tx_num := u16. diff --git a/src/Glossary.v b/src/Glossary.v index ebf9980b..d6a0b42b 100644 --- a/src/Glossary.v +++ b/src/Glossary.v @@ -1,3 +1,4 @@ +Section Glossary. (** # Notations - Definitional equality is denoted with $A:=B$. Its meaning is: $A$ can be substituted with $B$. @@ -26,7 +27,6 @@ - **ABI** -- application binary interface. See [%ABI]. - **Active external frame** -- the closest external frame to the top of call stack. For example, in a callstack [%(InternalCall _ (InternalCall _ (ExternalCall _ (InternalCall _ ...))))] the active external frame will be the third frame in stack. See [%active_extframe]. - **Auxheap** -- one of two heap variants (heap and auxheap), mostly used by system contracts. Executing one of far call instructions creates a new external frame and allocates pages for code, constants, data stack, heap and aux heap. -- **Batch** -- ?? - **Burning ergs** -- setting ergs to zero in topmost call stack frame, external or internal. See [%Ergs]. - **Callstack** -- a stack of context information. Executing one of near call instructions pushes a frame of type [%InternalCall] to the callstack, executing one of far call instructions pushes a frame of type [%ExternalCall]. See [%CallStack]. - **Checkpoint** -- see [%state_checkpoint]. Not to confuse with EraVM snapshot. @@ -64,11 +64,10 @@ - **Memory forwarding** -- a mechanism of sharing a read-only fragment ofmemory between contracts. The memory fragment is created from [%span] and described by [%fat_ptr]. This fragment can be narrowed or shrunk as a result of far call or executing [%OpPtrShrink]. - **Machine instruction** -- a low-level machine instruction with a fixed format. The high-level [%instruction_predicated] is encoded to machine instructions. - **Memory growth** -- a process, where an access to a heap variant beyond its bound leads to increasing the bound and payment. -- **Message** -- TODO - **Narrowing a fat pointer** -- subtract a given number from its length and add it to its start; it is guaranteed to not overflow. See [%fat_ptr_narrow]. Used when passing a fat pointer to a far call, or returning it from a contract. - **Near call** -- calling a function that belongs to the same contract. - **Operand** -- data or the address that is operated upon by the instruction. It represents the input or output values used by the instruction to perform a specific operation or computation. See [%InstructionArguments]. -- **Page** -- see [%page]. +- **Page** -- see [%page_type]. - **Pointer value** -- a [%primitive_value] with a set pointer tag. May contain a fat pointer. See [%PtrValue]. - **Precompile call** -- an invokation of [%OpPrecompileCall]. Precompiles are extensions of EraVM bound to one of the system contracts. When this contract @@ -83,33 +82,34 @@ algorithm specific to this contract. See [%Precompiles]. - **System contracts** -- contracts with addresses from 0 to [%KERNEL_MODE_MAXADDR_LIMIT]. They are executed in [%KernelMode]. - **Topmost callstack frame** -- the last frame pushed to call stack. - **Word** -- 256-bit unsigned untagged integer value. -- **address resolution** -- a matching between instruction operands and locations using the supported address modes. See [%resolve]. -- **base cost** -- the fixed cost of executing instruction, in ergs. Some instructions imply additional costs, e.g. far calls may require paying for code decommitment. -- **bootloader** -- a system contract written in YUL in charge of block construction. -- **cell** -- alias to "word". Often used to distinguish between values themselves and the memory locations holding them. -- **depot** -- all storages in all shards. See [%depot]. -- **ergs** -- resource spent on actions such as executing instructions. See [%Ergs]. -- **instruction predicate** -- see [%Predication]. -- **flag** -- see [%flag_state]. -- **fully qualified address** -- see [%fqa_key]. -- **instruction** -- low-level command or operation that is executed by a virtual machine to perform a specific task. Instructions supported by EraVm are described by the type [%instruction_predicated]. -- **kernel mode** -- a mode of execution for system contracts opening access to full instruction set, containing instructions potentially harmful to the global state e.g. [%OpContextIncrementTxNumber]. See [%KernelMode]. -- **key** -- a 256-bit address of a cell in storage. -- **location** -- see [%loc]. -- **panic** -- irrecoverable error. Handled by formally executing [%OpPanic]. -- **revert** -- execution of [%OpRevert], usually as a consequence of recoverable error. -- **malformed transaction** -- a transaction rejected by the bootloader. Handling it is the responsibility of the server that controls EraVM. -- **predicate** -- see [%Predication]. -- **predication** -- see [%Predication]. -- **program counter** -- the [%code_address] of the next instruction to be executed. See [%cf_pc]. -- **rollback** -- restoration of the [%gs_revertable] portion of [%state] as a result of revert or panic. The state is saved at every near or far call. See also [%state_checkpoint]. -- **shard** -- a collection of [%storage]s. See [%shard]. -- **snapshot** -- a copy of the full state of EraVM. Server makes a snapshot of VM state every time a transaction is successfully completed. When the bootloader encounters a malformed transaction, it fails, and the server restarts EraVM from the most recent snapshot, skipping this transaction. -- **server** -- a program that launches EraVM and controls it. Feeds the transactions to the bootloader, provides decommitter and other external modules, restarts EraVM from the latest snapshot in case of malformed transactions. -- **stack pointer** -- the [%stack_address] where the next element will be pushed. It is the address of the (top of the stack + 1). -- **static mode** -- see [%StaticMode]. -- **storage** -- see [%storage]. -- **total cost** -- a sum of [%base_cost] of an instruction and all its additional costs. -- **versioned hash** -- a key used to retrieve the contract code from decommitter. See [%versioned_hash] and [%Decommitter]. +- **Address resolution** -- a matching between instruction operands and locations using the supported address modes. See [%resolve]. +- **Base cost** -- the fixed cost of executing instruction, in ergs. Some instructions imply additional costs, e.g. far calls may require paying for code decommitment. +- **Bootloader** -- a system contract written in YUL in charge of block construction. +- **Cell** -- alias to "word". Often used to distinguish between values themselves and the memory locations holding them. +- **Depot** -- all storages in all shards. See [%depot]. +- **Ergs** -- resource spent on actions such as executing instructions. See [%Ergs]. +- **Instruction predicate** -- see [%Predication]. +- **Flag** -- see [%flag_state]. +- **Fully qualified address** -- see [%fqa_key]. +- **Instruction** -- low-level command or operation that is executed by a virtual machine to perform a specific task. Instructions supported by EraVm are described by the type [%instruction_predicated]. +- **Kernel mode** -- a mode of execution for system contracts opening access to full instruction set, containing instructions potentially harmful to the global state e.g. [%OpContextIncrementTxNumber]. See [%KernelMode]. +- **Key** -- a 256-bit address of a cell in storage. +- **Location** -- see [%loc]. +- **Panic** -- irrecoverable error. Handled by formally executing [%OpPanic]. +- **Revert** -- execution of [%OpRevert], usually as a consequence of recoverable error. +- **Malformed transaction** -- a transaction rejected by the bootloader. Handling it is the responsibility of the server that controls EraVM. +- **Predicate** -- see [%Predication]. +- **Predication** -- see [%Predication]. +- **Program counter** -- the [%code_address] of the next instruction to be executed. See [%cf_pc]. +- **Rollback** -- restoration of the [%gs_revertable] portion of [%state] as a result of revert or panic. The state is saved at every near or far call. See also [%state_checkpoint]. +- **Shard** -- a collection of [%storage]s. See [%shard]. +- **Snapshot** -- a copy of the full state of EraVM. Server makes a snapshot of VM state every time a transaction is successfully completed. When the bootloader encounters a malformed transaction, it fails, and the server restarts EraVM from the most recent snapshot, skipping this transaction. +- **Server** -- a program that launches EraVM and controls it. Feeds the transactions to the bootloader, provides decommitter and other external modules, restarts EraVM from the latest snapshot in case of malformed transactions. +- **Stack pointer** -- the [%stack_address] where the next element will be pushed. It is the address of the (top of the stack + 1). +- **Static mode** -- see [%StaticMode]. +- **Storage** -- see [%storage]. +- **Total cost** -- a sum of [%base_cost] of an instruction and all its additional costs. +- **Versioned hash** -- a key used to retrieve the contract code from decommitter. See [%versioned_hash] and [%Decommitter]. - **VM** -- the same as EraVM, the abstract virtual machine that this document specifies. *) +End Glossary. diff --git a/src/Introduction.v b/src/Introduction.v new file mode 100644 index 00000000..d32bf065 --- /dev/null +++ b/src/Introduction.v @@ -0,0 +1,139 @@ +(** This is the specification of the instruction set of EraVM 1.4.0, a language +virtual machine for zkSync Era. It describes the virtual machine's architecture, +instruction syntax and semantics, and some elements of system protocol. + +# Table of contents + +- **Overview** + + + [%Glossary] : a list of all important recurring terms and notations defined in this document. + + [%ArchOverview] : a bird's eye overview of EraVM architecture with links to more detailed descriptions of its parts. + + [%PrimitiveValue] : defines a tagged 256-bit word datatype; can be an integer or a pointer. + +- **EraVM Structure** + + [%StateDefinitions] collects all system state, persistent and transient. + + + [%Registers] : a group of isolated read/write memory cells available to all instructions. + + [%Flags] : special registers showing the status of the latest computations. + + [%Predication] : how setting flags makes EraVM skip of execute instructions. + + [%Memory] : transient memory (heap, stack) for supporting computations and persistent storages of contracts. + + [%PointerDefinitions] : definition of pointers to transient memory and operations on them. + + [%Slices] : how individual pointers are restricted to selected subranges of memory addresses. + + [%Callstack] : states of running functions and contracts. + + [%MemoryContext] : how new transient memory is allocated when a contract is called. + +- **Instruction Set** + + + [%Addressing] : how instructions can refer to their arguments. + + [%Resolution] : how instruction arguments are resolved to the operand values. + + [%InstructionSets] : overview of all layers of instruction set and their relations. + + * [%Modifiers] : recurring modifiers supported by many instructions: [%mod_set_flags] and [%mod_swap]. + * [%AssemblyInstructionSet] : **instruction set exposed to assembly programmers**. + * [%CoreInstructionSet] : simplified instruction set used to define instruction semantic. + * [%MachInstructionDefinition] : layout of encoded assembly instructions. + * Conversions between instruction sets: [%asm_to_core] and [%asm_to_mach]. + +- **EraVM operation** + + + [%Ergs] : resource/gas system and basic operation costs. + + [%MemoryOps] : precise formalization of memory writes and reads. + + [%MemoryForwarding] : a mechanism to pass memory slices between contracts. + + [%Events] : different types of events emitted when EraVM is operating. + + + [%KernelMode] : privileged mode of execution for system contracts allowing a restricted part of the instructions set. + + [%StaticMode] : mode of execution with limited effects on persistent state. + + [%SmallStep] : formal semantics of instruction execution cycle, in small-step operational style + + [%Panics] : a mechanism of signaling irrecoverable errors and a list of situations where they occur. + + +- **Instruction set semantic** + + + Stack manipulation + * **`SpAdd`** : [%SpAddDefinition] : forward stack pointer. + * **`SpSub`** : [%SpSubDefinition] : rewind stack pointer. + + + Arithmetic + * **`Add`** : [%AddDefinition] : + * **`Sub`** : [%SubDefinition] : + * **`Mul`** : [%MulDefinition] : + * **`Div`** : [%DivDefinition] : + + + Bitwise logical + * **`And`** : [%AndDefinition] : + * **`Or`** : [%OrDefinition] : + * **`Xor`** : [%XorDefinition] : + + + Bitwise shifts + * **`Shl`** : [%ShlDefinition] : left logical shift. + * **`Shr`** : [%ShrDefinition] : right logical shift. + * **`Rol`** : [%RolDefinition] : left circular shift. + * **`Ror`** : [%RorDefinition] : right circular shift. + + + Control flow + * **`Nop`** : [%NopDefinition] : do nothing. + * **`Jump`** : [%JumpDefinition] : jump to code (conditional jumps are implemented through [%Predication]). + * **`NearCall`** : [%NearCallDefinition] : call a function in the same contract. + * **`NearRet`** : [%NearRetDefinition] : normal return from near call to the call site, return unspend ergs. + * **`NearRevert`** : [%NearRevertDefinition] : return from near call due to a recoverable error, return unspend ergs, roll back storage/events, execute exception handler. + + * **`NearRetTo`** : [%NearRetToDefinition] : Like `NearRet` but returns to explicit label. + * **`NearRevertTo`** : [%NearRevertToDefinition] : like `NearRevert` but executes code at label instead of exception handler. + + * **`Panic`** : [%PanicDefinition] : trigger panic. + * **`NearPanicTo`** : [%NearPanicToDefinition] : trigger panic and return to label. + * **`Farcall`** : [%FarcallDefinition] : call a contract + * **`FarRet`** : [%FarRetDefinition] : return from farcall. + * **`FarRevert`** : [%FarRevertDefinition] : like `FarRet` but roll back storage/events and execute exception handler. + + + Operations with heaps + + * **`LoadPtr`** : [%LoadPtrDefinition] : load word by a fat pointer. + * **`LoadPtrInc`** : [%LoadPtrIncDefinition] : like `LoadPtr`, additionally return an incremented pointer. + * **`Load`** : [%LoadDefinition] : load word from heap by a fat pointer. + * **`LoadInc`** : [%LoadIncDefinition] : like `Load`, additionally return offset+32. + * **`Store`** : [%StoreDefinition] : store word to heap by an offset. + * **`StoreInc`** : [%StoreIncDefinition] : like `Store`, additionally return offset+32. + + + Operations with pointers + + * **`PtrAdd`** : [%PtrAddDefinition] : increment pointer. + * **`PtrSub`** : [%PtrSubDefinition] : decrement pointer. + * **`PtrShrink`** : [%PtrShrinkDefinition] : restrict the range of addresses that a pointer can reference. + * **`PtrPack`** : [%PtrPackDefinition] : put data in the unused high 128 bits of a pointer. + + + Operations with storage + * **`SStore`** : [%SStoreDefinition] : store value at a key in storage of the current contract. + * **`SLoad`** : [%SLoadDefinition] : load value by key from storage of the current contract. + + + Events and precompiles + * **`OpEvent`** : [%OpEventDefinition] : emit an event. + * **`ToL1`** : [%ToL1Definition] : emit a message to L1. + * **`PrecompileCall`** : [%PrecompileCallDefinition] : call an extension of VM specific to currently executing system contract. + + * **`Context`** : [%ContextDefinition] : access some parts of VM state such as currently executed contract or stack pointer. + +- **ABI** (memory layouts of compound data structures serialized to 256-bit words.) + + + [%Coder] : general definitions of encoding, decoding, composition and required properties. + + [%NearCallABI] + + [%FatPointerABI] + + [%FarCallABI] + + [%FarRetABI] + + [%MetaParametersABI] : layout of result of [%ContextMeta]. + + [%PrecompileParametersABI] : layout of result of [%ContextMeta]. + +- **Instruction encoding** + + + [%EncodingTools] : binary encoding of different parts of instruction layout. + + [%encode_opcode] : encoding source/destination, addressing modes, and all modifiers as a single 11-bit opcode value. + + [%MachEncoder] : the main assembly instruction encoder definition. + +- **Elements of protocol** + + + [%Bootloader] : a system contract in charge of block construction. + + [%Precompiles] : extensions of virtual machine. + + [%VersionedHash] : used to fetch the contract code. + + [%Decommitter] + *) diff --git a/src/Memory.v b/src/Memory.v index 7d576377..2adfc7a5 100644 --- a/src/Memory.v +++ b/src/Memory.v @@ -47,7 +47,7 @@ Contract code is global and shared between shards. ## Transient memory -Transient memory consists of [%page]s holding data or code. +Transient memory consists of **pages** ([%page_type]) holding data or code. Each page holds $2^{32}$ bytes; all bytes are initialized to zero at genesis. New pages are allocated implicitly when the contract execution starts; calling @@ -231,7 +231,7 @@ heaps, and constants. When the execution of a contract starts, new pages are allocated for: - contract code: [%code_page], fetched by decommitter; see [%Decommitter] and - [%FarCall]); + [%FarCallDefinitions]); - data stack: [%stack_page]; - heap: [%data_page]; - aux_heap: [%data_page]; @@ -272,8 +272,7 @@ and code pages. **) (** The set of identifiers has a complete linear order, ordering the pages by the time of creation. The ability to distinguish older pages from newer is necessary to prevent returning fat pointers to pages from older frames. - See e.g. [%step_retext]. *) - + See e.g. [% step_RetExt_ForwardFatPointer]. *) Section Order. Definition page_ordering := Nat.leb. Definition page_eq x y := page_ordering x y && page_ordering y x. @@ -437,8 +436,7 @@ They are not writable. On genesis, code pages are filled as follows: - The contract code is places starting from the address 0. -- The rest is filled with a value guaranteed to decode as invalid instruction - [%invalid_ins]. +- The rest is filled with a value guaranteed to decode as [%invalid] instruction. Const pages can coincide with code pages. *) diff --git a/src/Pointer.v b/src/Pointer.v index 3ef6c0b3..8b20b336 100644 --- a/src/Pointer.v +++ b/src/Pointer.v @@ -5,7 +5,7 @@ Require Memory. Import Arith Core Common MemoryBase Memory RecordSetNotations PMap_ext BinInt zmodp. Import ssreflect.tuple ssreflect.eqtype. -Section Definitions. +Section PointerDefinitions. Context (bytes_in_word := u32_of z_bytes_in_word). Open Scope ZMod_scope. @@ -38,8 +38,8 @@ to [%s_start + s_length] exclusive. It is not bound to a specific page. *) Passing a span in [%ForwardNewHeapPointer] as an argument to far calls or far returns creates a **fat pointer**. -The required encoding is described by [%ABI.FarRet.ABI] or [%ABI.FarCall.ABI]. -See [%FarCall] and [%FarRet]. +The required encoding is described by [%FarRetABI] or [%FarCallABI]. +See [%FarCallDefinitions] and [%FarRet]. See also: [%Slices]. @@ -98,7 +98,7 @@ Heap pointers are used by the following instructions: - [%OpLoad]/[%OpLoadInc] - [%OpStore]/[%OpStoreInc] -The layout of a heap pointer in a 256-bit word is described by [%ABI.FatPointer.decode_heap_ptr]. +The layout of a heap pointer in a 256-bit word is described by [%decode_heap_ptr]. ## Relation to fat pointers @@ -290,4 +290,4 @@ Shrinking may result in a pointer with $\mathit{offset}>\mathit{length}$, but su Definition fat_ptr_inc_OF := fat_ptr_opt_map ptr_inc_OF. End FatPointer. -End Definitions. +End PointerDefinitions. diff --git a/src/Resolution.v b/src/Resolution.v index 4bddcc69..fa911a84 100644 --- a/src/Resolution.v +++ b/src/Resolution.v @@ -67,7 +67,7 @@ Inductive loc : Type := more complicated and requires putting in registers specially formed pointers [%heap_ptr]. *) -Section Resolve. +Section Resolution. Import Addressing.Coercions. Open Scope ZMod_scope. @@ -226,4 +226,4 @@ In the current state of EraVM, only SP modifications are allowed, therefore the sp_map_spec (fun _ => new_sp) cs cs' -> resolve_apply arg (cs', loc). -End Resolve. +End Resolution. diff --git a/src/State.v b/src/State.v index a7df6b8c..5cbde9fb 100644 --- a/src/State.v +++ b/src/State.v @@ -6,7 +6,7 @@ Import RecordSetNotations. Import Core Flags ZArith ABI Common GPR Ergs Event CallStack History MemoryBase Memory Decommitter Predication VMPanic PrimitiveValue. -Section Definitions. +Section StateDefinitions. Definition exception_handler := code_address. @@ -33,19 +33,19 @@ Definition page_has_id := @page_has_id code_page const_page data_page stack_page EraVM employs a [%state] that comprises the following components: 1. The [%global_state] contains: -- current price of publishing one byte of **pubdata** to L1 [%gs_current_ergs_per_pubdata_byte]. -- transaction number in the current block [%gs_tx_number_in_block] -- decommitter [%gs_contracts] -- a revertable part [%state_checkpoint]. It houses the **depot** state, embodying - all contracts storages across all shards, as well as two queues for events - and L1 messages. - - Launching a contract (far call) or a function (near call) defines a - checkpoint. - If a contract or a function reverts or panics, the state rolls back - to the latest snapshot (see [%rollback]). - - The rollback may be implemented in any efficient way conforming to this behavior. + - current price of publishing one byte of **pubdata** to L1 [%gs_current_ergs_per_pubdata_byte]. + - transaction number in the current block [%gs_tx_number_in_block] + - decommitter [%gs_contracts] + - a revertable part [%state_checkpoint]. It houses the **depot** state, embodying + all contracts storages across all shards, as well as two queues for events + and L1 messages. + + Launching a contract (far call) or a function (near call) defines a + checkpoint. + If a contract or a function reverts or panics, the state rolls back + to the latest snapshot (see [%rollback]). + + The rollback may be implemented in any efficient way conforming to this behavior. *) Record state_checkpoint := { gs_depot: depot; @@ -66,11 +66,11 @@ Inductive rollback checkpoint: global_state -> global_state -> Prop := rollback checkpoint (mk_gstate e tx ccs _cp) (mk_gstate e tx ccs checkpoint). (** 2. The [%transient_state] contains: - - flags [%gs_flags]: boolean values representing some characteristics of the computation results. See [%Flags]. - - general purpose registers [%gs_regs]: 15 mutable tagged words (primitive values) [%r1]--[%r15], and a reserved read-only zero valued [%r0]. See [%Registers]. - - all memory [%gs_pages] allocated by VM, including code pages, data stack pages, data pages for heap variants etc. See [%memory]. - - [%gs_callstack], where each currently running contract and function has a stack frame. Note, that program counter, data stack pointer, and the remaining ergs allocated for the current function's run are parts of a stack frame. See [%CallStack]. - - 128-bit register [%gs_context_u128]. Its usage is detailed below. + - flags [%gs_flags]: boolean values representing some characteristics of the computation results. See [%Flags]. + - general purpose registers [%gs_regs]: 15 mutable tagged words (primitive values) [%r1]--[%r15], and a reserved read-only zero valued [%r0]. See [%Registers]. + - all memory [%gs_pages] allocated by VM, including code pages, data stack pages, data pages for heap variants etc. See [%memory]. + - [%gs_callstack], where each currently running contract and function has a stack frame. Note, that program counter, data stack pointer, and the remaining ergs allocated for the current function's run are parts of a stack frame. See [%Callstack]. + - 128-bit register [%gs_context_u128]. Its usage is detailed below. *) (* begin details: helpers *) Definition callstack := @callstack state_checkpoint. @@ -192,7 +192,7 @@ Inductive global_state_increment_tx tx_mod: global_state -> global_state -> Prop gs_revertable := rev; |}. -End Definitions. +End StateDefinitions. Definition heap_variant_page_id (page_type: data_page_type) : callstack -> page_id := diff --git a/src/VersionedHash.v b/src/VersionedHash.v index bb21d936..4e707532 100644 --- a/src/VersionedHash.v +++ b/src/VersionedHash.v @@ -1,6 +1,6 @@ Require Coder Memory. -Section Def. +Section VersionedHash. Import ZArith ssrbool eqtype ssreflect ssrfun ssrbool ssreflect.eqtype ssreflect.tuple zmodp. Import Memory Coder Common. @@ -81,4 +81,4 @@ modulo $2^{28 \times 8}$. Canonical vh_eqType := Eval hnf in EqType _ vh_eqMixin. (* end details *) -End Def. +End VersionedHash. diff --git a/src/encoding/Encoder.v b/src/encoding/Encoder.v index da73f7e0..7d5e8530 100644 --- a/src/encoding/Encoder.v +++ b/src/encoding/Encoder.v @@ -6,9 +6,26 @@ Require encoding.EncodingUtils encoding.GeneratedEncodeOpcode. +Section MachEncoder. Import ZArith. Import Assembly Common GeneratedMachISA GeneratedEncodeOpcode isa.AssemblyToMach Predication EncodingUtils. +(** # Encoding machine instructions + +For an overview of instruction sets and different layers of instructions +definitions, used in this specification, refer to [%InstructionSets]. + +The binary encoding is defined for [%mach_instruction], which is a +representation of an EraVM instruction aware of its encoding and layout. +Once the [%asm_instruction] has been transformed into [%mach_instruction] via +[%asm_to_mach], it is trivial to put all of [%mach_instruction]'s fields in +binary form via [%encode_mach_instruction]. + +Reminder: `##` notation stands for concatenating binary strings; `a ## b` +signifies that `b` holds less significant bits and `a` is prepended to it, +forming a more significant part. + +*) Definition encode_mach_instruction (i:@mach_instruction GPR.reg_name u16) : BITS 64 := match i with | mk_ins op_code op_predicate op_src0 op_src1 op_dst0 op_dst1 op_imm0 op_imm1 => @@ -28,3 +45,4 @@ Definition encode_mach_instruction (i:@mach_instruction GPR.reg_name u16) : BITS Definition encode_asm (i:predicated asm_instruction) : option (BITS 64) := option_map encode_mach_instruction (asm_to_mach i) . +End MachEncoder. diff --git a/src/encoding/EncodingUtils.v b/src/encoding/EncodingUtils.v index 3e48af93..3dabc4fd 100644 --- a/src/encoding/EncodingUtils.v +++ b/src/encoding/EncodingUtils.v @@ -3,6 +3,7 @@ Require Common Predication isa.Modifiers isa.GeneratedMachISA. Import ZBits ZArith. Import Common GPR GeneratedMachISA Modifiers Predication. +Section EncodingTools. (** # Encoding parts of instructions The encoding of [%asm_instruction] is described in two stages: @@ -108,4 +109,5 @@ Definition encode_reg_opt (name:option reg_name) : BITS 4 := end . -(** 4. Immediate values are encoded as is. *) +(** 4. Immediate 16-bit values are encoded as-is. *) +End EncodingTools. diff --git a/src/encoding/Tests.v b/src/encoding/Tests.v index 3cbf657c..08e86741 100644 --- a/src/encoding/Tests.v +++ b/src/encoding/Tests.v @@ -27,17 +27,16 @@ Check (equals: "0000000A02100079" = (Show.to_string (encode_asm (Ins _ (OpSub (A (* Require Import Extraction. *) (* Require Import ExtrOcamlBasic. *) -(* Require Import ExtrOcamlString. *) +(* Require Import ExtrOcamlString. *) (* Extract Inlined Constant Datatypes.fst => "fst". *) (* Extract Inlined Constant Datatypes.snd => "snd". *) (* Extraction Blacklist List String Int. *) -(* Separate Extraction encode_asm. *) -(* (*Recursive Extraction encode_asm. *) *) (* Recursive Extraction encode_asm. *) -(* Extraction insadd1. *) -(* Definition s := Show.to_string (encode_asm insadd1). *) -(* Recursive Extraction s. *) +(* Definition ins_encode (i:predicated asm_instruction) : string := *) +(* Show.to_string (encode_asm i). *) + +(* Recursive Extraction sub ins_encode. *) diff --git a/src/isa/Instructions.v b/src/isa/Instructions.v index 47e67184..327ca15f 100644 --- a/src/isa/Instructions.v +++ b/src/isa/Instructions.v @@ -1,3 +1,4 @@ +Section InstructionSets. (** # EraVM instruction sets *Note* If you are interested in the instruction set exposed to the assembly @@ -77,3 +78,4 @@ from lowest level to the highest level: [%bind_farret_params] describes how [%OpFarRet]'s ABI parameters are deserialized. *) +End InstructionSets. diff --git a/src/sem/Context.v b/src/sem/Context.v index 9b2743fa..51dd3c5a 100644 --- a/src/sem/Context.v +++ b/src/sem/Context.v @@ -361,7 +361,7 @@ Fetches ## Semantic -- Stores the encoded value of [%ABI.MetaParameters.params] in `out`. They follow the structure: +- Stores the encoded value of [%MetaParametersABI] in `out`. They follow the structure: ``` Record params := { diff --git a/src/sem/FarRet.v b/src/sem/FarRet.v index 3cf71da2..5f618cbf 100644 --- a/src/sem/FarRet.v +++ b/src/sem/FarRet.v @@ -195,7 +195,7 @@ Section FarRetDefinition. ## Panics -1. Attempt to forward an existing fat pointer, but the value holding [%ABI.Ret.params] is not tagged as a pointer. +1. Attempt to forward an existing fat pointer, but the value holding [%RetABI] is not tagged as a pointer. *) | step_RetExt_ForwardFatPointer_requires_ptrtag: diff --git a/src/sem/FarRevert.v b/src/sem/FarRevert.v index 28324bb6..7f9ad80a 100644 --- a/src/sem/FarRevert.v +++ b/src/sem/FarRevert.v @@ -185,7 +185,7 @@ Use `panic` for irrecoverable errors. ## Panics -1. Attempt to forward an existing fat pointer, but the value holding [%ABI.Ret.params] is not tagged as a pointer. +1. Attempt to forward an existing fat pointer, but the value holding [%RetABI] is not tagged as a pointer. *) | step_RevertExt_ForwardFatPointer_requires_ptrtag: diff --git a/src/sem/NearCall.v b/src/sem/NearCall.v index 9cae4146..8c65719a 100644 --- a/src/sem/NearCall.v +++ b/src/sem/NearCall.v @@ -39,7 +39,7 @@ caller calls the callee. Step-by-step explanation: -1. Read the value of `abi_reg` and decode the following structure [%ABI.NearCall.params] from it. +1. Read the value of `abi_reg` and decode the following structure from [%NearCallABI] from it. The `ergs_passed` field indicates the amount of ergs we intend to pass, but the actual amount of ergs passed gets decided at runtime (see step 2). diff --git a/src/sem/PtrPack.v b/src/sem/PtrPack.v index dc8ca498..bd7191bf 100644 --- a/src/sem/PtrPack.v +++ b/src/sem/PtrPack.v @@ -55,7 +55,7 @@ $$result := \mathit{op_1}\{255\dots128\} \#\# \mathit{op_2}\{128\dots 0\}$$ This is used by for memory forwarding to far calls when we need to forward an existing fat pointer. To pass a fat pointer `P` to far call, it is necessary to encode an instance of - [%ABI.FarCall.params] with [%fwd_memory := ForwardFatPointer P] into a + [%FarCallABI] with [%fwd_memory := ForwardFatPointer P] into a [%PtrValue]. ``` @@ -71,18 +71,18 @@ $$result := \mathit{op_1}\{255\dots128\} \#\# \mathit{op_2}\{128\dots 0\}$$ }. ``` - The compound type [%ABI.FarCall.params] is serialized to a [%word] in such a + The compound type of [%FarCallABI] is serialized to a [%word] in such a way that the pointer takes up the lower 128 bits of memory. This matches the layout of any fat pointer: serialized pointers occupy the lower 128 bit of a word. - Therefore, encoding an instance of [%ABI.FarCall.params] can be done as follows: + Therefore, encoding an instance of [%FarCallABI] can be done as follows: - take an existing [%PtrValue P] - form a value `A` encoding [%ergs_passed], [%shard_id] and other fields of - [%ABI.FarCall.params] in A{128...255}. + [%FarCallABI] in A{128...255}. - invoke [%OpPtrPack P A B]. Now `B` stores an encoded instance of - [%ABI.FarCall.params] and can be passed to one of far call instructions. + [%FarCallABI] and can be passed to one of far call instructions. ## Similar instructions