Skip to content

Commit 5ae6947

Browse files
disassembly working (to a degree)
1 parent eb150e5 commit 5ae6947

7 files changed

+375
-136
lines changed

README.md

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# `asha` - a RISC-V decompiler written in Rust
2+
// technically it's just a disassembler for now, still a work in progress
3+
4+
`asha` is a decompiler that I'm writing as a final year project.
5+
6+
## Current RISC-V extension support
7+
- RV32I
8+
- RV64I
9+
10+
## Current comparison to `llvm-objdump`
11+
|![](https://github.com/redraincatching/asha/raw/main/images/objdump_comparison.png)|![](https://github.com/redraincatching/asha/raw/main/images/asha_comparison.png)|
12+
|:---------------------:|:-------------:|
13+
| `llvm-objdump` output | `asha` output |

images/asha_comparison.png

25.9 KB
Loading

images/objdump_comparison.png

30 KB
Loading

src/disassembly.rs

+109-35
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,14 @@ use crate::instructions::*;
22

33
/// # instruction decoding
44
/// TODO: explain this
5-
fn disassemble(instruction: Instruction) -> Option<InstructionType> {
5+
pub fn disassemble(instruction: Instruction) -> Option<InstructionType> {
66
// check if the instruction is valid
77
if (instruction == 0) || (instruction == 0xFFFFFFFF) || (instruction & 0b11 != 0b11) {
88
return None
99
}
1010

1111
// determine which function it is
1212
if let Some((name, i_type)) = determine_name(&instruction) {
13-
// TODO: match on instruction type and change register operands using the enum
14-
1513
match i_type {
1614
IT::R => Some(InstructionType::R {
1715
name,
@@ -47,6 +45,7 @@ fn disassemble(instruction: Instruction) -> Option<InstructionType> {
4745
rd: ABIRegister::from(retrieve!(rd instruction) as u8),
4846
imm: retrieve!(jimm instruction)
4947
})
48+
// TODO: extend for R4
5049
}
5150
} else {
5251
None
@@ -55,35 +54,64 @@ fn disassemble(instruction: Instruction) -> Option<InstructionType> {
5554

5655
/// # Determine the function name
5756
/// TODO: better documentation for all of these, honestly
58-
/// TODO: account for R4 instructions here too, could just do it in the table with correct masks?
59-
fn determine_name(instruction: &Instruction) -> Option<(String, IT)> {
57+
/// TODO: account for R4 instructions here too
58+
fn determine_name(instruction: &Instruction) -> Option<(&'static str, IT)> {
6059
// this function was revealed to me in a dream
6160

62-
// what i actually want to do here is combine the three fields into a single binary number
63-
// and get which one it is with a jump table
64-
// something like (funct7.unwrap_or(0) << 8 | funct3.unwrap_or(0) << 5 | opcode as u32)
6561
let opcode: u8 = retrieve!(opcode instruction).try_into().unwrap();
66-
let funct3: u8 = retrieve!(funct3 instruction).try_into().unwrap();
67-
let funct7: u8 = retrieve!(funct7 instruction).try_into().unwrap();
62+
let i_type = determine_type(opcode)?;
63+
64+
let funct3: u8;
65+
let funct7: u8;
66+
67+
// U and J-type only use opcode
68+
if !(i_type == IT::U || i_type == IT::J) {
69+
funct3 = retrieve!(funct3 instruction).try_into().unwrap();
70+
71+
// only R uses funct7 (and also _one_ singular i-type)
72+
if i_type == IT::R || opcode == 0b00100 {
73+
funct7 = retrieve!(funct7 instruction).try_into().unwrap();
74+
} else {
75+
funct7 = 0;
76+
}
77+
78+
} else {
79+
funct3 = 0;
80+
funct7 = 0;
81+
}
6882

6983
// use the from_bits to convert
70-
if let Some((name, i_type)) = from_bits(opcode, funct3, funct7) {
71-
Some((name, i_type))
84+
from_bits(opcode, funct3, funct7).map(|name| (name, i_type))
85+
}
86+
87+
/// Determine the type of instruction, and therefore which fields to match on
88+
fn determine_type(opcode: u8) -> Option<IT> {
89+
let bf = OpcodeBitfield::from_opcode(opcode);
90+
91+
if (bf.op4 && bf.op2) || (bf.op3 && bf.op2 && !bf.op0) {
92+
Some(IT::R)
93+
} else if (!bf.op4 && !bf.op3 && !bf.op2) || (!bf.op4 && !bf.op3 && !bf.op0) || (bf.op4 && bf.op3 && !bf.op1 && bf.op0) {
94+
Some(IT::I)
95+
} else if !bf.op4 && bf.op3 && !bf.op2 {
96+
Some(IT::S)
97+
} else if bf.op4 && bf.op3 && !bf.op0 {
98+
Some(IT::B)
99+
} else if bf.op2 && bf.op0 {
100+
Some(IT::U)
101+
} else if bf.op3 && !bf.op2 && bf.op1 {
102+
Some(IT::J)
103+
//} else if bf.op4 && !bf.op3 && !bf.op2 {
104+
// Some(IT::R4)
72105
} else {
73106
None
74-
}
107+
}
75108
}
76109

77110
/// Convert from bit fields to instruction
78-
/// TODO: swap this from a match statement to a phf, see https://github.com/rust-phf/rust-phf
79-
/// also of note, https://github.com/abonander/mime_guess/pull/14/files
80-
fn from_bits(opcode: u8, funct3: u8, funct7: u8) -> Option<(String, IT)> {
81-
// convert to single value
82-
match (opcode, funct3, funct7) {
83-
// RV32I
84-
(0b01100, 0b000, 0b0000000) => Some(("add".to_string(), IT::R)),
85-
_ => None
86-
}
111+
fn from_bits(opcode: u8, funct3: u8, funct7: u8) -> Option<&'static str> {
112+
// convert to array so that the phf map can use it as a key
113+
let key: [u8; 3] = [opcode, funct3, funct7];
114+
INSTRUCTIONS.get(&key).cloned()
87115
}
88116

89117
#[cfg(test)]
@@ -92,23 +120,69 @@ mod test {
92120

93121
#[test]
94122
fn test_decoding() {
95-
// TODO: replace these with ones with nontrivial operands
96-
let r_type = 0x4000503b; // sraw
97-
let u_type = 0x00000037; // lui
98-
let i_type = 0x00002003; // lw
99-
let b_type = 0x00000063; // beq
100-
let s_type = 0x4000503b; // sraw
101-
let j_type = 0x00003023; // sd
123+
let r_type = 0x40c5d53b; // sraw a0, a1, a2
124+
let u_type = 0x076192b7; // lui t0, 30233
125+
let i_type = 0x05002083; // lw ra, 80
126+
let b_type = 0x00928263; // beq t0, s1, 4
127+
let s_type = 0x01103523; // sd a7, 10
128+
let j_type = 0x04c0016f; // jal sp, 76
102129

103130
assert_eq!(
104131
disassemble(r_type),
105-
// TODO: test versions with ABI and raw identifiers?
106-
// or maybe not
107132
Some(InstructionType::R {
108-
name: "sraw".to_string(),
109-
rd: ABIRegister::zero,
133+
name: "sraw",
134+
rd: ABIRegister::a0,
135+
rs1: ABIRegister::a1,
136+
rs2: ABIRegister::a2
137+
})
138+
);
139+
140+
assert_eq!(
141+
disassemble(u_type),
142+
Some(InstructionType::U {
143+
name: "lui",
144+
rd: ABIRegister::t0,
145+
imm: 30233
146+
})
147+
);
148+
149+
assert_eq!(
150+
disassemble(i_type),
151+
Some(InstructionType::I {
152+
name: "lw",
153+
rd: ABIRegister::ra,
154+
rs1: ABIRegister::zero,
155+
imm: 80
156+
})
157+
);
158+
159+
assert_eq!(
160+
disassemble(b_type),
161+
Some(InstructionType::B {
162+
name: "beq",
163+
rs1: ABIRegister::t0,
164+
rs2: ABIRegister::s1,
165+
imm: 4
166+
})
167+
);
168+
169+
assert_eq!(
170+
disassemble(s_type),
171+
Some(InstructionType::S {
172+
name: "sd",
110173
rs1: ABIRegister::zero,
111-
rs2: ABIRegister::zero
112-
}));
174+
rs2: ABIRegister::a7,
175+
imm: 10
176+
})
177+
);
178+
179+
assert_eq!(
180+
disassemble(j_type),
181+
Some(InstructionType::J {
182+
name: "jal",
183+
rd: ABIRegister::sp,
184+
imm: 76
185+
})
186+
);
113187
}
114188
}

0 commit comments

Comments
 (0)