Skip to content

Commit

Permalink
Implement disassembler and instruction fetch cycle (#25)
Browse files Browse the repository at this point in the history
* Add kllvm and kllvm-runtime targets

* Implement disassemble function

* Implement basic instruction fetch cycle

* Fix simple test suite to update expected output properly. Use _halt instead of _end

* Implement ADDI instruction

* Implement LUI instruction

* Set Version: 0.1.17

* Move test-unit after kdist-build in test workflow so that kllvm-runtime is built

* Use a Map{Int, Int} instead of a RangeMap{Int, Bytes}

* Mark functions total

* Rename INVALIDINSTR to INVALID_INSTR

---------

Co-authored-by: devops <[email protected]>
  • Loading branch information
Scott-Guest and devops authored Jul 18, 2024
1 parent aad94bd commit f6c0156
Show file tree
Hide file tree
Showing 25 changed files with 1,009 additions and 55 deletions.
8 changes: 4 additions & 4 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,12 +43,10 @@ jobs:
run: make check
- name: 'Run pyupgrade'
run: make pyupgrade
- name: 'Run unit tests'
run: make test-unit

integration-tests:
build-and-test:
needs: code-quality-checks
name: 'Integration Tests'
name: 'Build and Test'
runs-on: [self-hosted, linux, normal]
env:
CONTAINER: riscv-integration-${{ github.sha }}
Expand All @@ -66,6 +64,8 @@ jobs:
run: docker exec --user user ${CONTAINER} poetry install
- name: 'Build semantics'
run: docker exec --user user ${CONTAINER} make kdist-build
- name: 'Run unit tests'
run: docker exec --user user ${CONTAINER} make test-unit
- name: 'Run integration tests'
run: docker exec --user user ${CONTAINER} make test-integration
- name: 'Tear down Docker'
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ poetry-install:
# Semantics

kdist-build: poetry-install
$(POETRY) run kdist -v build riscv-semantics.llvm
$(POETRY) run kdist -v build riscv-semantics.llvm riscv-semantics.kllvm riscv-semantics.kllvm-runtime

kdist-clean: poetry-install
$(POETRY) run kdist clean
Expand Down
2 changes: 1 addition & 1 deletion package/version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.1.16
0.1.17
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "kriscv"
version = "0.1.16"
version = "0.1.17"
description = "K tooling for the RISC-V architecture"
authors = [
"Runtime Verification, Inc. <[email protected]>",
Expand Down
7 changes: 4 additions & 3 deletions src/kriscv/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ class KRISCVOpts: ...
@dataclass
class RunOpts(KRISCVOpts):
input_file: Path
end_symbol: str | None


def kriscv(args: Sequence[str]) -> None:
Expand All @@ -35,14 +36,14 @@ def _parse_args(args: Sequence[str]) -> KRISCVOpts:

match ns.command:
case 'run':
return RunOpts(input_file=Path(ns.input_file).resolve(strict=True))
return RunOpts(input_file=Path(ns.input_file).resolve(strict=True), end_symbol=ns.end_symbol)
case _:
raise AssertionError()


def _kriscv_run(opts: RunOpts) -> None:
tools = semantics()
final_conf = tools.run_elf(opts.input_file)
final_conf = tools.run_elf(opts.input_file, end_symbol=opts.end_symbol)
print(tools.kprint.pretty_print(final_conf))


Expand All @@ -53,7 +54,7 @@ def _arg_parser() -> ArgumentParser:

run_parser = command_parser.add_parser('run', help='execute RISC-V ELF files programs')
run_parser.add_argument('input_file', metavar='FILE', help='RISC-V ELF file to run')

run_parser.add_argument('--end-symbol', type=str, help='Symbol marking the address which terminates execution')
return parser


Expand Down
14 changes: 13 additions & 1 deletion src/kriscv/build.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
from __future__ import annotations

from typing import TYPE_CHECKING

from pyk.kdist import kdist
from pyk.kllvm import importer

from .tools import Tools

if TYPE_CHECKING:
from typing import Final

importer.import_kllvm(kdist.get('riscv-semantics.kllvm'))

runtime: Final = importer.import_runtime(kdist.get('riscv-semantics.kllvm-runtime'))


def semantics() -> Tools:
return Tools(definition_dir=kdist.get('riscv-semantics.llvm'))
return Tools(definition_dir=kdist.get('riscv-semantics.llvm'), runtime=runtime)
34 changes: 23 additions & 11 deletions src/kriscv/elf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,43 @@

from typing import TYPE_CHECKING

from pyk.prelude.bytes import bytesToken
from pyk.prelude.collections import rangemap_of
from pyk.prelude.collections import map_of
from pyk.prelude.kint import intToken

if TYPE_CHECKING:
from elftools.elf.elffile import ELFFile # type: ignore
from pyk.kast.inner import KInner


def _memory_rangemap(elf: ELFFile) -> KInner:
segments: dict[tuple[KInner, KInner], KInner] = {}
def _memory_segments(elf: ELFFile) -> dict[tuple[int, int], bytes]:
segments: dict[tuple[int, int], bytes] = {}
for seg in elf.iter_segments():
if seg['p_type'] == 'PT_LOAD':
start = seg['p_vaddr']
file_size = seg['p_filesz']
data = seg.data() + b'\0' * (seg['p_memsz'] - file_size)
segments[(intToken(start), intToken(start + file_size))] = bytesToken(data)
return rangemap_of(segments)
segments[(start, start + file_size)] = data
return segments


def _entry_point(elf: ELFFile) -> KInner:
def memory_map(elf: ELFFile) -> KInner:
mem_map: dict[KInner, KInner] = {}
for addr_range, data in _memory_segments(elf).items():
start, end = addr_range
for addr in range(start, end):
mem_map[intToken(addr)] = intToken(data[addr - start])
return map_of(mem_map)


def entry_point(elf: ELFFile) -> KInner:
return intToken(elf.header['e_entry'])


def config_vars(elf: ELFFile) -> dict[str, KInner]:
memory = _memory_rangemap(elf)
pc = _entry_point(elf)
return {'$MEM': memory, '$PC': pc}
def read_symbol(elf: ELFFile, symbol: str) -> list[int]:
symtab = elf.get_section_by_name('.symtab')
if symtab is None:
return []
syms = symtab.get_symbol_by_name(symbol)
if syms is None:
return []
return [sym['st_value'] for sym in syms]
2 changes: 0 additions & 2 deletions src/kriscv/hello.py

This file was deleted.

35 changes: 35 additions & 0 deletions src/kriscv/kdist/plugin.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
from __future__ import annotations

import shutil
import sys
from pathlib import Path
from typing import TYPE_CHECKING

from pyk.kbuild.utils import k_version
from pyk.kdist.api import Target
from pyk.kllvm.compiler import compile_kllvm, compile_runtime
from pyk.ktool.kompile import kompile

if TYPE_CHECKING:
Expand Down Expand Up @@ -40,6 +42,37 @@ def deps(self) -> tuple[str]:
return ('riscv-semantics.source',)


class KLLVMTarget(Target):
def build(self, output_dir: Path, deps: dict[str, Path], args: dict[str, Any], verbose: bool) -> None:
compile_kllvm(output_dir, verbose=verbose)

def context(self) -> dict[str, str]:
return {
'k-version': k_version().text,
'python-path': sys.executable,
'python-version': sys.version,
}


class KLLVMRuntimeTarget(Target):
def build(self, output_dir: Path, deps: dict[str, Path], args: dict[str, Any], verbose: bool) -> None:
compile_runtime(
definition_dir=deps['riscv-semantics.llvm'],
target_dir=output_dir,
verbose=verbose,
)

def deps(self) -> tuple[str, ...]:
return ('riscv-semantics.llvm',)

def context(self) -> dict[str, str]:
return {
'k-version': k_version().text,
'python-path': sys.executable,
'python-version': sys.version,
}


__TARGETS__: Final = {
'source': SourceTarget(),
'llvm': KompileTarget(
Expand All @@ -51,4 +84,6 @@ def deps(self) -> tuple[str]:
'warnings_to_errors': True,
},
),
'kllvm': KLLVMTarget(),
'kllvm-runtime': KLLVMRuntimeTarget(),
}
141 changes: 141 additions & 0 deletions src/kriscv/kdist/riscv-semantics/riscv-disassemble.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
```k
requires "riscv-instructions.md"
module RISCV-DISASSEMBLE
imports RISCV-INSTRUCTIONS
imports INT
imports STRING
syntax OpCode ::=
RTypeOpCode
| ITypeOpCode
| STypeOpCode
| BTypeOpCode
| UTypeOpCode
| JTypeOpCode
| UnrecognizedOpCode
syntax RTypeOpCode ::=
"OP"
syntax ITypeOpCode ::=
"JALR"
| "LOAD"
| "OP-IMM"
| "MISC-MEM"
| "SYSTEM"
syntax STypeOpCode ::=
"STORE"
syntax BTypeOpCode ::=
"BRANCH"
syntax UTypeOpCode ::=
"LUI"
| "AUIPC"
syntax JTypeOpCode ::=
"JAL"
syntax UnrecognizedOpCode ::=
"UNRECOGNIZED"
syntax OpCode ::= decodeOpCode(Int) [function, total]
rule decodeOpCode(55 ) => LUI
rule decodeOpCode(23 ) => AUIPC
rule decodeOpCode(111) => JAL
rule decodeOpCode(103) => JALR
rule decodeOpCode(99 ) => BRANCH
rule decodeOpCode(3 ) => LOAD
rule decodeOpCode(35 ) => STORE
rule decodeOpCode(19 ) => OP-IMM
rule decodeOpCode(51 ) => OP
rule decodeOpCode(15 ) => MISC-MEM
rule decodeOpCode(115) => SYSTEM
rule decodeOpCode(_ ) => UNRECOGNIZED [owise]
syntax EncodingType ::=
RType(opcode: RTypeOpCode, funct3: Int, funct7: Int, rd: Register, rs1: Register, rs2: Register)
| IType(opcode: ITypeOpCode, funct3: Int, rd: Register, rs1: Register, imm: Int)
| SType(opcode: STypeOpCode, funct3: Int, rs1: Register, rs2: Register, imm: Int)
| BType(opcode: BTypeOpCode, funct3: Int, rs1: Register, rs2: Register, imm: Int)
| UType(opcode: UTypeOpCode, rd: Register, imm: Int)
| JType(opcode: JTypeOpCode, rd: Register, imm: Int)
| UnrecognizedEncodingType(Int)
syntax EncodingType ::=
decode(Int) [function, total]
| decodeWithOp(OpCode, Int) [function]
rule decode(I) => decodeWithOp(decodeOpCode(I &Int 127), I >>Int 7)
rule decodeWithOp(OPCODE:RTypeOpCode, I) =>
RType(OPCODE, (I >>Int 5) &Int 7, (I >>Int 18) &Int 127, I &Int 31, (I >>Int 8) &Int 31, (I >>Int 13) &Int 31)
rule decodeWithOp(OPCODE:ITypeOpCode, I) =>
IType(OPCODE, (I >>Int 5) &Int 7, I &Int 31, (I >>Int 8) &Int 31, (I >>Int 13) &Int 4095)
rule decodeWithOp(OPCODE:STypeOpCode, I) =>
SType(OPCODE, (I >>Int 5) &Int 7, (I >>Int 8) &Int 31, (I >>Int 13) &Int 31, (((I >>Int 18) &Int 127) <<Int 5) |Int (I &Int 31))
rule decodeWithOp(OPCODE:BTypeOpCode, I) =>
BType(OPCODE, (I >>Int 5) &Int 7, (I >>Int 8) &Int 31, (I >>Int 13) &Int 31, (((I >>Int 24) &Int 1) <<Int 11) |Int ((I &Int 1) <<Int 10) |Int (((I >>Int 18) &Int 63) <<Int 4) |Int ((I >>Int 1) &Int 15))
rule decodeWithOp(OPCODE:UTypeOpCode, I) =>
UType(OPCODE, I &Int 31, (I >>Int 5) &Int 1048575)
rule decodeWithOp(OPCODE:JTypeOpCode, I) =>
JType(OPCODE, I &Int 31, (((I >>Int 24) &Int 1) <<Int 19) |Int (((I >>Int 5) &Int 255) <<Int 11) |Int (((I >>Int 13) &Int 1) <<Int 10) |Int ((I >>Int 14) &Int 1023))
rule decodeWithOp(_:UnrecognizedOpCode, I) =>
UnrecognizedEncodingType(I)
syntax Instruction ::= disassemble(Int) [symbol(disassemble), function, total, memo]
| disassemble(EncodingType) [function, total]
rule disassemble(I:Int) => disassemble(decode(I))
rule disassemble(RType(OP, 0, 0, RD, RS1, RS2)) => ADD RD , RS1 , RS2
rule disassemble(RType(OP, 0, 32, RD, RS1, RS2)) => SUB RD , RS1 , RS2
rule disassemble(RType(OP, 1, 0, RD, RS1, RS2)) => SLL RD , RS1 , RS2
rule disassemble(RType(OP, 2, 0, RD, RS1, RS2)) => SLT RD , RS1 , RS2
rule disassemble(RType(OP, 3, 0, RD, RS1, RS2)) => SLTU RD , RS1 , RS2
rule disassemble(RType(OP, 4, 0, RD, RS1, RS2)) => XOR RD , RS1 , RS2
rule disassemble(RType(OP, 5, 0, RD, RS1, RS2)) => SRL RD , RS1 , RS2
rule disassemble(RType(OP, 5, 32, RD, RS1, RS2)) => SRA RD , RS1 , RS2
rule disassemble(RType(OP, 6, 0, RD, RS1, RS2)) => OR RD , RS1 , RS2
rule disassemble(RType(OP, 7, 0, RD, RS1, RS2)) => AND RD , RS1 , RS2
rule disassemble(IType(OP-IMM, 0, RD, RS1, IMM)) => ADDI RD , RS1 , chopAndExtend(IMM, 12)
rule disassemble(IType(OP-IMM, 1, RD, RS1, IMM)) => SLLI RD , RS1 , IMM &Int 31 requires (IMM >>Int 5) &Int 127 ==Int 0
rule disassemble(IType(OP-IMM, 2, RD, RS1, IMM)) => SLTI RD , RS1 , chopAndExtend(IMM, 12)
rule disassemble(IType(OP-IMM, 3, RD, RS1, IMM)) => SLTIU RD , RS1 , chopAndExtend(IMM, 12)
rule disassemble(IType(OP-IMM, 4, RD, RS1, IMM)) => XORI RD , RS1 , chopAndExtend(IMM, 12)
rule disassemble(IType(OP-IMM, 5, RD, RS1, IMM)) => SRLI RD , RS1 , IMM &Int 31 requires (IMM >>Int 5) &Int 127 ==Int 0
rule disassemble(IType(OP-IMM, 5, RD, RS1, IMM)) => SRAI RD , RS1 , IMM &Int 31 requires (IMM >>Int 5) &Int 127 ==Int 32
rule disassemble(IType(OP-IMM, 6, RD, RS1, IMM)) => ORI RD , RS1 , chopAndExtend(IMM, 12)
rule disassemble(IType(OP-IMM, 7, RD, RS1, IMM)) => ANDI RD , RS1 , chopAndExtend(IMM, 12)
rule disassemble(IType(JALR, 0, RD, RS1, IMM)) => JALR RD , chopAndExtend(IMM, 12) ( RS1 )
rule disassemble(IType(LOAD, 0, RD, RS1, IMM)) => LB RD , chopAndExtend(IMM, 12) ( RS1 )
rule disassemble(IType(LOAD, 1, RD, RS1, IMM)) => LH RD , chopAndExtend(IMM, 12) ( RS1 )
rule disassemble(IType(LOAD, 2, RD, RS1, IMM)) => LW RD , chopAndExtend(IMM, 12) ( RS1 )
rule disassemble(IType(LOAD, 4, RD, RS1, IMM)) => LBU RD , chopAndExtend(IMM, 12) ( RS1 )
rule disassemble(IType(LOAD, 5, RD, RS1, IMM)) => LHU RD , chopAndExtend(IMM, 12) ( RS1 )
rule disassemble(IType(MISC-MEM, 0, 0, 0, 2099)) => FENCE.TSO
rule disassemble(IType(MISC-MEM, 0, 0, 0, IMM)) => FENCE (IMM >>Int 4) &Int 15 , IMM &Int 15 requires (IMM >>Int 8) &Int 15 ==Int 0
rule disassemble(IType(SYSTEM, 0, 0, 0, 0)) => ECALL
rule disassemble(IType(SYSTEM, 0, 0, 0, 1)) => EBREAK
rule disassemble(SType(STORE, 0, RS1, RS2, IMM)) => SB RS2 , chopAndExtend(IMM, 12) ( RS1 )
rule disassemble(SType(STORE, 1, RS1, RS2, IMM)) => SH RS2 , chopAndExtend(IMM, 12) ( RS1 )
rule disassemble(SType(STORE, 2, RS1, RS2, IMM)) => SW RS2 , chopAndExtend(IMM, 12) ( RS1 )
rule disassemble(BType(BRANCH, 0, RS1, RS2, IMM)) => BEQ RS1 , RS2 , chopAndExtend(IMM, 12) *Int 2
rule disassemble(BType(BRANCH, 1, RS1, RS2, IMM)) => BNE RS1 , RS2 , chopAndExtend(IMM, 12) *Int 2
rule disassemble(BType(BRANCH, 4, RS1, RS2, IMM)) => BLT RS1 , RS2 , chopAndExtend(IMM, 12) *Int 2
rule disassemble(BType(BRANCH, 5, RS1, RS2, IMM)) => BGE RS1 , RS2 , chopAndExtend(IMM, 12) *Int 2
rule disassemble(BType(BRANCH, 6, RS1, RS2, IMM)) => BLTU RS1 , RS2 , chopAndExtend(IMM, 12) *Int 2
rule disassemble(BType(BRANCH, 7, RS1, RS2, IMM)) => BGEU RS1 , RS2 , chopAndExtend(IMM, 12) *Int 2
rule disassemble(UType(LUI, RD, IMM)) => LUI RD , IMM
rule disassemble(UType(AUIPC, RD, IMM)) => AUIPC RD , IMM
rule disassemble(JType(JAL, RD, IMM)) => JAL RD , chopAndExtend(IMM, 20) *Int 2
rule disassemble(_:EncodingType) => INVALID_INSTR [owise]
endmodule
```
Loading

0 comments on commit f6c0156

Please sign in to comment.