Skip to content

Commit 642a3f5

Browse files
committed
Initialise the raspi pagetables statically in rust instead of asm.
This is another step towards dynamic generation of aarch64 pagetables as early as possible, avoiding a 2 stage init. Signed-off-by: Graham MacDonald <grahamamacdonald@gmail.com>
1 parent 5c10acc commit 642a3f5

9 files changed

Lines changed: 351 additions & 221 deletions

File tree

Cargo.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

aarch64/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ edition = "2024"
77
default-target = "aarch64-unknown-none"
88

99
[dependencies]
10-
aarch64-cpu = "10.0"
10+
aarch64-cpu = "11.2"
1111
bitstruct = "0.1"
1212
port = { path = "../port" }
1313
num_enum = { version = "0.7", default-features = false }

aarch64/lib/kernel.ld

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ SECTIONS {
5959

6060
/* Reserve section for early pagetables. */
6161
. = ALIGN(4096);
62+
earlyvm_pagetables = .;
63+
. += 32 * 4096;
64+
eearlyvm_pagetables = .;
65+
/* TODO Remove after new vm work */
6266
early_pagetables = .;
6367
. += 32 * 4096;
6468
eearly_pagetables = .;

aarch64/src/l.S

Lines changed: 3 additions & 211 deletions
Original file line numberDiff line numberDiff line change
@@ -25,82 +25,15 @@ SPSR_EL2_D = (1<<9) // Debug exception
2525

2626
CPACR_EL1_FPEN = (1<<21) | (1<<20) // Don't trap FPU instr at EL1,0
2727

28-
TCR_EL1_IPS = (4 << 32) // 48bit physical addresses
29-
TCR_EL1_TG1 = (2 << 30) // TTBR1_EL1 4KiB granule
30-
TCR_EL1_SH1 = (3 << 28) //
31-
TCR_EL1_ORGN1 = (1 << 26) //
32-
TCR_EL1_IRGN1 = (1 << 24) //
33-
TCR_EL1_T1SZ = (16 << 16) // 2^(64-N) size offset of region addressed by TTBR1_EL1: 2^(64-N)
34-
TCR_EL1_TG0 = (0 << 14) // TTBR0_EL1 4KiB granule
35-
TCR_EL1_SH0 = (3 << 12) //
36-
TCR_EL1_ORGN0 = (1 << 10) //
37-
TCR_EL1_IRGN0 = (1 << 8) //
38-
TCR_EL1_T0SZ = (16 << 0) // 2^(64-N) size offset of region addressed by TTBR0_EL1: 2^(64-N)
39-
TCR_EL1 = (TCR_EL1_IPS|TCR_EL1_TG1|TCR_EL1_SH1|TCR_EL1_ORGN1|TCR_EL1_IRGN1|TCR_EL1_T1SZ|TCR_EL1_TG0|TCR_EL1_SH0|TCR_EL1_ORGN0|TCR_EL1_IRGN0|TCR_EL1_T0SZ)
40-
4128
SCTLR_EL1_I = (1 << 12) // Instruction access cacheability
4229
SCTLR_EL1_C = (1 << 2) // Data cacheability
4330
SCTLR_EL1_M = (1 << 0) // Enable MMU
4431
SCTLR_EL1 = (SCTLR_EL1_I|SCTLR_EL1_C|SCTLR_EL1_M)
4532

46-
// Preset memory attributes. This register stores 8 8-bit presets that are
47-
// referenced by index in the page table entries:
48-
// [0] 0xff - Normal
49-
// [1] 0x00 - Device (Non-gathering, non-reordering, no early write acknowledgement (most restrictive))
50-
MAIR_EL1 = 0x00ff
51-
PT_MAIR_NORMAL = (0<<2) // Use normal memory attributes
52-
PT_MAIR_DEVICE = (1<<2) // Use device memory attributes
53-
54-
PT_PAGE = 3 // 4KiB granule
55-
PT_BLOCK = 1 // 2MiB granule
56-
57-
// Page table entry AP Flag
58-
PT_AP_KERNEL_RW = (0<<6) // Kernel: rw
59-
PT_AP_KERNEL_RW_USER_RW = (1<<6) // Kernel: rw, User: rw
60-
PT_AP_KERNEL_RO = (2<<6) // Kernel: r
61-
PT_AP_KERNEL_RO_USER_RO = (3<<6) // Kernel: r, User: r
62-
63-
PT_AF = (1<<10) // Access Flag
64-
65-
PT_UXN = (1<<54) // User execute never
66-
PT_PXN = (1<<53) // Priviledged execute never
67-
68-
// Cache shareability
69-
PT_NOSH = (0<<8) // Non-shareable (single core)
70-
PT_OSH = (2<<8) // Outer shareable (shared across CPUs, GPU)
71-
PT_ISH = (3<<8) // Inner shareable (shared across CPUs)
72-
7333
// This defines the kernel's virtual address location.
7434
// This value splits a 48 bit address space exactly in half, with the half
7535
// beginning with 1 going to the kernel.
7636
KZERO = 0xffff800000000000
77-
MiB = (1<<20)
78-
GiB = (1<<30)
79-
80-
// Constants for early uart setup
81-
MMIO_BASE_RPI4 = 0xfe000000
82-
GPIO = 0x00200000 // Offset from MMIO base
83-
84-
// Exception vector IDs
85-
SYNC_INVALID_EL1t = 0
86-
IRQ_INVALID_EL1t = 1
87-
FIQ_INVALID_EL1t = 2
88-
ERROR_INVALID_EL1t = 3
89-
90-
SYNC_INVALID_EL1h = 4
91-
IRQ_INVALID_EL1h = 5
92-
FIQ_INVALID_EL1h = 6
93-
ERROR_INVALID_EL1h = 7
94-
95-
SYNC_INVALID_EL0_64 = 8
96-
IRQ_INVALID_EL0_64 = 9
97-
FIQ_INVALID_EL0_64 = 10
98-
ERROR_INVALID_EL0_64 = 11
99-
100-
SYNC_INVALID_EL0_32 = 12
101-
IRQ_INVALID_EL0_32 = 13
102-
FIQ_INVALID_EL0_32 = 14
103-
ERROR_INVALID_EL0_32 = 15
10437

10538
.section .boottext, "awx"
10639
.globl start
@@ -110,7 +43,7 @@ start:
11043
// used again. There's also a couple that are best avoided out of
11144
// principle.
11245

113-
// x27: DTB address
46+
// x27: DTB address (physical address)
11447
// x28: Entrypoint address
11548
// x29: Frame pointer
11649
// x30: Link register
@@ -188,92 +121,8 @@ el1: // In EL1
188121
mov x0, #'.'
189122
bl init_early_uart_putc
190123

191-
// AArch64 memory management examples
192-
// https://developer.arm.com/documentation/102416/0100
193-
194-
// AArch64 Address Translation
195-
// https://developer.arm.com/documentation/100940/0101
196-
197-
// The kernel has been loaded at the entrypoint, but the
198-
// addresses used in the elf are virtual addresses in the higher half.
199-
// If we try to access them, the CPU will trap, so the next step is to
200-
// enable the MMU and identity map the kernel virtual addresses to the
201-
// physical addresses that the kernel was loaded into.
202-
203-
// The Aarch64 is super flexible. We can have page tables (granules)
204-
// of 4, 16, or 64KiB. If we assume 4KiB granules, we would have:
205-
// [47-39] Index into L4 table, used to get address of the L3 table
206-
// [38-30] Index into L3 table, used to get address of the L2 table
207-
// [29-21] Index into L2 table, used to get address of the L1 table
208-
// [20-12] Index into L1 table, used to get address of physical page
209-
// [11-0] Offset into physical page corresponding to virtual address
210-
// L4-L1 simply refers to the page table with L1 always being the last
211-
// to be translated, giving the address of the physical page.
212-
// With a 4KiB granule, each index is 9 bits, so there are 512 (2^9)
213-
// entries in each table. In this example the physical page would
214-
// also be 4KiB.
215-
216-
// If we reduce the number of page tables from 4 to 3 (L3 to L1),
217-
// we have 21 bits [20-0] for the physical page offset, giving 2MiB
218-
// pages. If we reduce to 2 tables, we have 30 bits [29-0], giving
219-
// 1GiB pages.
220-
221-
// If we use 16KiB granules, the virtual address is split as follows:
222-
// [46-36] Index into L3 table, used to get address of the L2 table
223-
// [35-25] Index into L2 table, used to get address of the L1 table
224-
// [24-14] Index into L1 table, used to get address of physical page
225-
// [13-0] Offset into physical page corresponding to virtual address
226-
// The 14 bits in the offset results in 16KiB pages. Each table is
227-
// 16KiB, consisting of 2048 entries, so requiring 11 bits per index.
228-
// If we instead use only 2 levels, that gives us bits [24-0] for the
229-
// offset into the physical page, which gives us 32MiB page size.
230-
231-
// Finally, if we use 64KiB granules, the virtual address is split as
232-
// follows:
233-
// [41-29] Index into L2 table, used to get address of the L1 table
234-
// [28-16] Index into L1 table, used to get address of physical page
235-
// [15-0] Offset into physical page corresponding to virtual address
236-
// The 16 bits in the offset results in 64KiB pages. Each table is
237-
// 64KiB, consisting of 8192 entries, so requiring 13 bits per index.
238-
// If we instead use only 1 level, that gives us bits [28-0] for the
239-
// offset into the physical page, which gives us 512MiB page size.
240-
241-
// The address of the top level table is stored in the translation table
242-
// base registers. ttbr0_el1 stores the address for the user space,
243-
// ttbr1_el1 stores the address for the kernel, both for EL1.
244-
// By default, ttbr1_el1 is used when the virtual address bit 55 is 1
245-
// otherwise ttbr0_el1 is used.
246-
247-
// Memory attributes are set per page table entry, and are hierarchical,
248-
// so settings at a higher page affect those they reference.
249-
250-
// Set up root tables for lower (ttbr0_el1) and higher (ttbr1_el1)
251-
// addresses. kernelpt4 is the root of the page hierarchy for addresses
252-
// of the form 0xffff800000000000 (KZERO and above), while physicalpt4
253-
// handles 0x0000000000000000 until KZERO. Although what we really
254-
// want is to move to virtual higher half addresses, we need to have
255-
// ttbr0_el1 identity mapped during the transition until the PC is also
256-
// in the higher half. This is because the PC is still in the lower
257-
// half immediately after the MMU is enabled. Once we enter rust-land,
258-
// we can define a new set of tables.
259-
adrp x0, kernelpt4
260-
msr ttbr1_el1, x0
261-
adrp x0, physicalpt4
262-
msr ttbr0_el1, x0
263-
264-
// Set up the translation control register tcr_el1 as so:
265-
// TCR_EL1_T0SZ: Size offset of region addressed by TTBR0_EL1: 2^30)
266-
// TCR_EL1_T1SZ: Size offset of region addressed by TTBR1_EL1: 2^30)
267-
// TCR_EL1_TG0: 4KiB granule
268-
// TCR_EL1_TG1: 4KiB granule
269-
// TCR_EL1_IPS: 40 bit physical addresses
270-
ldr x0, =(TCR_EL1)
271-
msr tcr_el1, x0
272-
273-
// The mair_el1 register contains 8 different cache settings, to be
274-
// referenced by index by any page table entry.
275-
ldr x0, =(MAIR_EL1)
276-
msr mair_el1, x0
124+
mov x0, x27 // DTB pointer
125+
bl init_vm
277126

278127
// Force changes to be be seen before MMU enabled, then enable MMU
279128
isb
@@ -304,63 +153,6 @@ higher_half:
304153
dnr: wfe
305154
b dnr
306155

307-
// Early page tables for mapping the kernel to the higher half.
308-
// It's assumed that the kernelpt* page tables will only be used until the
309-
// full VM code is running.
310-
311-
// Here we've set up a 2GiB page from the start of the kernel address space.
312-
// This covers 0xffff_8000_0000_0000 - 0xffff_8000_8000_0000, and should be more
313-
// than enough at this stage.
314-
315-
// We also want to map the MMIO section, which for the part of MMIO that we care
316-
// about for Raspberry Pi 4 (to allow us to use the miniuart), is basically
317-
// 2x2MiB sections starting at from 0xfe00_0000. This is all in the lower half,
318-
// so to allow us to abandon the physicalpt4 temp page table quickly, we'll map
319-
// it into the higher half, starting at 0xffff_8000_fe00_0000. Note that this
320-
// is temporary - once we have a rust VM, the MMIO will be mapped somewhere
321-
// else.
322-
323-
// Unfortunately, this is very specific to Raspberry Pi 4. Once we're confident
324-
// that the aarch64 setup code in l.S is solid, we should disable the uart code
325-
// and perhaps have something that can be enabled manually for dev purposes only
326-
// in the future.
327-
328-
// One final note is that we've set up recursive page tables here. This is to
329-
// allow us to use the vm code, which assumes recursive pagetables, e.g. for
330-
// dumping out the page tables.
331-
.balign 4096
332-
kernelpt4:
333-
.space (256*8)
334-
.quad (kernelpt3 - KZERO) + (PT_AF|PT_PAGE) // [256] (for kernel + mmio)
335-
.space (254*8)
336-
.quad (kernelpt4 - KZERO) + (PT_AF|PT_PAGE) // [511] (recursive entry)
337-
338-
.balign 4096
339-
kernelpt3:
340-
.quad (0*2*GiB) + (PT_BLOCK|PT_AF|PT_AP_KERNEL_RW|PT_ISH|PT_UXN|PT_MAIR_NORMAL) // [0] (for kernel)
341-
.space (2*8)
342-
.quad (kernelpt2 - KZERO) + (PT_AF|PT_PAGE) // [3] (for mmio)
343-
.space (508*8)
344-
345-
.balign 4096
346-
kernelpt2:
347-
.space (496*8)
348-
.quad (MMIO_BASE_RPI4) + (PT_BLOCK|PT_AF|PT_AP_KERNEL_RW|PT_ISH|PT_UXN|PT_PXN|PT_MAIR_DEVICE) // [496] (for mmio)
349-
.quad (MMIO_BASE_RPI4 + GPIO) + (PT_BLOCK|PT_AF|PT_AP_KERNEL_RW|PT_ISH|PT_UXN|PT_PXN|PT_MAIR_DEVICE) // [497] (for mmio)
350-
.space (14*8)
351-
352-
// Early page tables for identity mapping the kernel physical addresses.
353-
// Once we've jumped to the higher half, this will no longer be used.
354-
.balign 4096
355-
physicalpt4:
356-
.quad (physicalpt3 - KZERO) + (PT_AF|PT_PAGE) // [0] (for kernel)
357-
.space (511*8)
358-
359-
.balign 4096
360-
physicalpt3:
361-
.quad (0*2*GiB) + (PT_BLOCK|PT_AF|PT_AP_KERNEL_RW|PT_ISH|PT_UXN|PT_MAIR_NORMAL) // [0] (for kernel)
362-
.space (511*8)
363-
364156
.bss
365157
.balign 4096
366158
stack: .space STACKSZ

aarch64/src/main.rs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@ mod devcons;
1212
mod deviceutil;
1313
mod io;
1414
mod kmem;
15-
mod l;
1615
mod mailbox;
1716
mod pagealloc;
1817
mod param;
18+
mod pre_mmu;
1919
mod registers;
2020
mod swtch;
2121
mod trap;
@@ -128,9 +128,6 @@ pub extern "C" fn main9(dtb_va: usize) {
128128
unsafe {
129129
vm::init_kernel_page_tables(&dt, dtb_physrange);
130130
vm::switch(vm::kernel_pagetable(), RootPageTableType::Kernel);
131-
132-
vm::init_user_page_tables();
133-
vm::switch(vm::user_pagetable(), RootPageTableType::User);
134131
}
135132

136133
// From this point we can use the global allocator
@@ -170,6 +167,11 @@ pub extern "C" fn main9(dtb_va: usize) {
170167

171168
println!("Set up a user process");
172169

170+
unsafe {
171+
vm::init_user_page_tables();
172+
vm::switch(vm::user_pagetable(), RootPageTableType::User);
173+
}
174+
173175
test_sysexit();
174176

175177
vmdebug::print_recursive_tables(RootPageTableType::Kernel);

aarch64/src/pre_mmu/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
// Code in this module should only be called before the MMU is enabled.
2+
3+
mod util;
4+
mod vminit;
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#![allow(dead_code)]
2+
13
// Functions to be called in early bootup phase, typically before the MMU has been enabled.
24
// This shouldn't normally be used for anything other than debugging at very early init.
35

@@ -74,6 +76,7 @@ pub extern "C" fn init_early_uart_rpi4() {
7476
}
7577
}
7678

79+
// Extern no mangle so we can call from asm if necessary
7780
#[unsafe(no_mangle)]
7881
pub extern "C" fn init_early_uart_putc(b: u8) {
7982
unsafe {
@@ -84,6 +87,24 @@ pub extern "C" fn init_early_uart_putc(b: u8) {
8487
}
8588
}
8689

90+
pub fn putstr(s: &str) {
91+
for b in s.bytes() {
92+
init_early_uart_putc(b);
93+
}
94+
}
95+
96+
pub fn putu64h(v: u64) {
97+
putstr("0x");
98+
for i in 0..16 {
99+
let a = ((v >> ((15 - i) * 4)) & 0xf) as u8;
100+
if a < 10 {
101+
init_early_uart_putc(('0' as u8 + a) as u8);
102+
} else {
103+
init_early_uart_putc(('a' as u8 + a - 10) as u8);
104+
}
105+
}
106+
}
107+
87108
unsafe fn write_or_volatile<T: BitOrAssign>(dst: *mut T, val: T) {
88109
unsafe {
89110
let mut new_val = read_volatile(dst);

0 commit comments

Comments
 (0)