@@ -25,82 +25,15 @@ SPSR_EL2_D = (1<<9) // Debug exception
2525
2626CPACR_EL1_FPEN = ( 1 << 21 ) | ( 1 << 20 ) // Don't trap FPU instr at EL1 , 0
2727
28- TCR_EL1_IPS = ( 4 << 32 ) // 48bit physical addresses
29- TCR_EL1_TG1 = ( 2 << 30 ) // TTBR1_EL1 4KiB granule
30- TCR_EL1_SH1 = ( 3 << 28 ) //
31- TCR_EL1_ORGN1 = ( 1 << 26 ) //
32- TCR_EL1_IRGN1 = ( 1 << 24 ) //
33- TCR_EL1_T1SZ = ( 16 << 16 ) // 2 ^( 64 - N) size offset of region addressed by TTBR1_EL1: 2 ^( 64 - N)
34- TCR_EL1_TG0 = ( 0 << 14 ) // TTBR0_EL1 4KiB granule
35- TCR_EL1_SH0 = ( 3 << 12 ) //
36- TCR_EL1_ORGN0 = ( 1 << 10 ) //
37- TCR_EL1_IRGN0 = ( 1 << 8 ) //
38- TCR_EL1_T0SZ = ( 16 << 0 ) // 2 ^( 64 - N) size offset of region addressed by TTBR0_EL1: 2 ^( 64 - N)
39- TCR_EL1 = (TCR_EL1_IPS|TCR_EL1_TG1|TCR_EL1_SH1|TCR_EL1_ORGN1|TCR_EL1_IRGN1|TCR_EL1_T1SZ|TCR_EL1_TG0|TCR_EL1_SH0|TCR_EL1_ORGN0|TCR_EL1_IRGN0|TCR_EL1_T0SZ)
40-
4128SCTLR_EL1_I = ( 1 << 12 ) // Instruction access cacheability
4229SCTLR_EL1_C = ( 1 << 2 ) // Data cacheability
4330SCTLR_EL1_M = ( 1 << 0 ) // Enable MMU
4431SCTLR_EL1 = (SCTLR_EL1_I|SCTLR_EL1_C|SCTLR_EL1_M)
4532
46- // Preset memory attributes. This register stores 8 8 - bit presets th at are
47- // referenced by index in the page table entries:
48- // [ 0 ] 0xff - Normal
49- // [ 1 ] 0x00 - Device (Non - gathering , non - reordering , no early write acknowledgement (most restrictive))
50- MAIR_EL1 = 0x00ff
51- PT_MAIR_NORMAL = ( 0 << 2 ) // Use normal memory attributes
52- PT_MAIR_DEVICE = ( 1 << 2 ) // Use device memory attributes
53-
54- PT_PAGE = 3 // 4KiB granule
55- PT_BLOCK = 1 // 2MiB granule
56-
57- // Page table entry AP Flag
58- PT_AP_KERNEL_RW = ( 0 << 6 ) // Kernel: rw
59- PT_AP_KERNEL_RW_USER_RW = ( 1 << 6 ) // Kernel: rw , User: rw
60- PT_AP_KERNEL_RO = ( 2 << 6 ) // Kernel: r
61- PT_AP_KERNEL_RO_USER_RO = ( 3 << 6 ) // Kernel: r , User: r
62-
63- PT_AF = ( 1 << 10 ) // Access Flag
64-
65- PT_UXN = ( 1 << 54 ) // User execute never
66- PT_PXN = ( 1 << 53 ) // Priviledged execute never
67-
68- // Cache shareability
69- PT_NOSH = ( 0 << 8 ) // Non - shareable (single core)
70- PT_OSH = ( 2 << 8 ) // Outer shareable (shared across CPUs , GPU)
71- PT_ISH = ( 3 << 8 ) // Inner shareable (shared across CPUs)
72-
7333// This defines the kernel's virtual address location.
7434// This value splits a 48 bit address space exactly in half , with the half
7535// beginning with 1 going to the kernel.
7636KZERO = 0xffff800000000000
77- MiB = ( 1 << 20 )
78- GiB = ( 1 << 30 )
79-
80- // Constants for early uart setup
81- MMIO_BASE_RPI4 = 0xfe000000
82- GPIO = 0x00200000 // Offset from MMIO base
83-
84- // Exception vector IDs
85- SYNC_INVALID_EL1t = 0
86- IRQ_INVALID_EL1t = 1
87- FIQ_INVALID_EL1t = 2
88- ERROR_INVALID_EL1t = 3
89-
90- SYNC_INVALID_EL1h = 4
91- IRQ_INVALID_EL1h = 5
92- FIQ_INVALID_EL1h = 6
93- ERROR_INVALID_EL1h = 7
94-
95- SYNC_INVALID_EL0_64 = 8
96- IRQ_INVALID_EL0_64 = 9
97- FIQ_INVALID_EL0_64 = 10
98- ERROR_INVALID_EL0_64 = 11
99-
100- SYNC_INVALID_EL0_32 = 12
101- IRQ_INVALID_EL0_32 = 13
102- FIQ_INVALID_EL0_32 = 14
103- ERROR_INVALID_EL0_32 = 15
10437
10538. section .boottext , "awx"
10639.globl start
@@ -110,7 +43,7 @@ start:
11043 // used again. There's also a couple th at are best avoided out of
11144 // principle.
11245
113- // x27: DTB address
46+ // x27: DTB address (physical address)
11447 // x28: Entrypoint address
11548 // x29: Frame pointer
11649 // x30: Link register
@@ -188,92 +121,8 @@ el1: // In EL1
188121 mov x0 , # '.'
189122 bl init_early_uart_putc
190123
191- // AArch64 memory management examples
192- // https://developer.arm.com/documentation/ 102416 / 0100
193-
194- // AArch64 Address Translation
195- // https://developer.arm.com/documentation/ 100940 / 0101
196-
197- // The kernel has been loaded at the entrypoint , but the
198- // addresses used in the elf are virtual addresses in the higher half.
199- // If we try to access them , the CPU will trap , so the next step is to
200- // enable the MMU and identity map the kernel virtual addresses to the
201- // physical addresses th at the kernel was loaded into .
202-
203- // The Aarch64 is super flexible. We can have page tables (granules)
204- // of 4 , 16 , or 64KiB. If we assume 4KiB granules , we would have:
205- // [ 47 - 39 ] Index into L4 table , used to get address of the L3 table
206- // [ 38 - 30 ] Index into L3 table , used to get address of the L2 table
207- // [ 29 - 21 ] Index into L2 table , used to get address of the L1 table
208- // [ 20 - 12 ] Index into L1 table , used to get address of physical page
209- // [ 11 - 0 ] Offset into physical page corresponding to virtual address
210- // L4 - L1 simply refers to the page table with L1 always being the last
211- // to be translated , giving the address of the physical page.
212- // With a 4KiB granule , each index is 9 bits , so there are 512 ( 2 ^ 9 )
213- // entries in each table. In this example the physical page would
214- // also be 4KiB.
215-
216- // If we reduce the number of page tables from 4 to 3 (L3 to L1) ,
217- // we have 21 bits [ 20 - 0 ] for the physical page offset , giving 2MiB
218- // pages. If we reduce to 2 tables , we have 30 bits [ 29 - 0 ], giving
219- // 1GiB pages.
220-
221- // If we use 16KiB granules , the virtual address is split as follows:
222- // [ 46 - 36 ] Index into L3 table , used to get address of the L2 table
223- // [ 35 - 25 ] Index into L2 table , used to get address of the L1 table
224- // [ 24 - 14 ] Index into L1 table , used to get address of physical page
225- // [ 13 - 0 ] Offset into physical page corresponding to virtual address
226- // The 14 bits in the offset results in 16KiB pages. Each table is
227- // 16KiB , consisting of 2048 entries , so requiring 11 bits per index.
228- // If we instead use only 2 levels , th at gives us bits [ 24 - 0 ] for the
229- // offset into the physical page , which gives us 32MiB page size.
230-
231- // Finally , if we use 64KiB granules , the virtual address is split as
232- // follows:
233- // [ 41 - 29 ] Index into L2 table , used to get address of the L1 table
234- // [ 28 - 16 ] Index into L1 table , used to get address of physical page
235- // [ 15 - 0 ] Offset into physical page corresponding to virtual address
236- // The 16 bits in the offset results in 64KiB pages. Each table is
237- // 64KiB , consisting of 8192 entries , so requiring 13 bits per index.
238- // If we instead use only 1 level , th at gives us bits [ 28 - 0 ] for the
239- // offset into the physical page , which gives us 512MiB page size.
240-
241- // The address of the top level table is stored in the translation table
242- // base registers. ttbr0_el1 stores the address for the user space ,
243- // ttbr1_el1 stores the address for the kernel , both for EL1.
244- // By default , ttbr1_el1 is used when the virtual address bit 55 is 1
245- // otherwise ttbr0_el1 is used.
246-
247- // Memory attributes are set per page table entry , and are hierarchical ,
248- // so settings at a higher page affect those they reference.
249-
250- // Set up root tables for lower (ttbr0_el1) and higher (ttbr1_el1)
251- // addresses. kernelpt4 is the root of the page hierarchy for addresses
252- // of the form 0xffff800000000000 (KZERO and above) , while physicalpt4
253- // handles 0x0000000000000000 until KZERO. Although wh at we really
254- // want is to move to virtual higher half addresses , we need to have
255- // ttbr0_el1 identity mapped during the transition until the PC is also
256- // in the higher half. This is because the PC is still in the lower
257- // half immediately after the MMU is enabled. Once we enter rust - land ,
258- // we can define a new set of tables.
259- adrp x0 , kernelpt4
260- msr ttbr1_el1 , x0
261- adrp x0 , physicalpt4
262- msr ttbr0_el1 , x0
263-
264- // Set up the translation control register tcr_el1 as so:
265- // TCR_EL1_T0SZ: Size offset of region addressed by TTBR0_EL1: 2 ^ 30 )
266- // TCR_EL1_T1SZ: Size offset of region addressed by TTBR1_EL1: 2 ^ 30 )
267- // TCR_EL1_TG0: 4KiB granule
268- // TCR_EL1_TG1: 4KiB granule
269- // TCR_EL1_IPS: 40 bit physical addresses
270- ldr x0 , =(TCR_EL1)
271- msr tcr_el1 , x0
272-
273- // The mair_el1 register contains 8 different cache settings , to be
274- // referenced by index by any page table entry.
275- ldr x0 , =(MAIR_EL1)
276- msr mair_el1 , x0
124+ mov x0 , x27 // DTB pointer
125+ bl init_vm
277126
278127 // Force changes to be be seen before MMU enabled , then enable MMU
279128 isb
@@ -304,63 +153,6 @@ higher_half:
304153dnr: wfe
305154 b dnr
306155
307- // Early page tables for mapping the kernel to the higher half.
308- // It's assumed th at the kernelpt * page tables will only be used until the
309- // full VM code is running.
310-
311- // Here we've set up a 2GiB page from the start of the kernel address space.
312- // This covers 0xffff_8000_0000_0000 - 0xffff_8000_8000_0000 , and should be more
313- // than enough at this stage.
314-
315- // We also want to map the MMIO section , which for the part of MMIO th at we care
316- // about for Raspberry Pi 4 (to allow us to use the miniuart) , is basically
317- // 2x2MiB sections starting at from 0xfe00_0000. This is all in the lower half ,
318- // so to allow us to abandon the physicalpt4 temp page table quickly , we'll map
319- // it into the higher half , starting at 0xffff_8000_fe00_0000. Note th at this
320- // is temporary - once we have a rust VM , the MMIO will be mapped somewhere
321- // else.
322-
323- // Unfortunately , this is very specific to Raspberry Pi 4 . Once we're confident
324- // th at the aarch64 setup code in l.S is solid , we should disable the uart code
325- // and perhaps have something th at can be enabled manually for dev purposes only
326- // in the future.
327-
328- // One final note is th at we've set up recursive page tables here. This is to
329- // allow us to use the vm code , which assumes recursive pagetables , e.g. for
330- // dumping out the page tables.
331- .balign 4096
332- kernelpt4:
333- .space ( 256 * 8 )
334- .quad (kernelpt3 - KZERO) + (PT_AF|PT_PAGE) // [ 256 ] (for kernel + mmio)
335- .space ( 254 * 8 )
336- .quad (kernelpt4 - KZERO) + (PT_AF|PT_PAGE) // [ 511 ] (recursive entry)
337-
338- .balign 4096
339- kernelpt3:
340- .quad ( 0 * 2 * GiB) + (PT_BLOCK|PT_AF|PT_AP_KERNEL_RW|PT_ISH|PT_UXN|PT_MAIR_NORMAL) // [ 0 ] (for kernel)
341- .space ( 2 * 8 )
342- .quad (kernelpt2 - KZERO) + (PT_AF|PT_PAGE) // [ 3 ] (for mmio)
343- .space ( 508 * 8 )
344-
345- .balign 4096
346- kernelpt2:
347- .space ( 496 * 8 )
348- .quad (MMIO_BASE_RPI4) + (PT_BLOCK|PT_AF|PT_AP_KERNEL_RW|PT_ISH|PT_UXN|PT_PXN|PT_MAIR_DEVICE) // [ 496 ] (for mmio)
349- .quad (MMIO_BASE_RPI4 + GPIO) + (PT_BLOCK|PT_AF|PT_AP_KERNEL_RW|PT_ISH|PT_UXN|PT_PXN|PT_MAIR_DEVICE) // [ 497 ] (for mmio)
350- .space ( 14 * 8 )
351-
352- // Early page tables for identity mapping the kernel physical addresses.
353- // Once we've jumped to the higher half , this will no longer be used.
354- .balign 4096
355- physicalpt4:
356- .quad (physicalpt3 - KZERO) + (PT_AF|PT_PAGE) // [ 0 ] (for kernel)
357- .space ( 511 * 8 )
358-
359- .balign 4096
360- physicalpt3:
361- .quad ( 0 * 2 * GiB) + (PT_BLOCK|PT_AF|PT_AP_KERNEL_RW|PT_ISH|PT_UXN|PT_MAIR_NORMAL) // [ 0 ] (for kernel)
362- .space ( 511 * 8 )
363-
364156.bss
365157.balign 4096
366158stack: .space STACKSZ
0 commit comments