Skip to content

Commit d7f75c5

Browse files
committed
Fix inline assembly in read_asm_loop_raw
Turns-out llvm-ams does not allow to modify input registers, so all registers are add as outputs as well.
1 parent 5065cb3 commit d7f75c5

File tree

1 file changed

+30
-25
lines changed

1 file changed

+30
-25
lines changed

src/lib.rs

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,7 @@ where
631631
/// done byte-wise, but the non-AVR fallback dose actually use
632632
/// `core::ptr::copy` and therefore the pointers must be aligned.
633633
///
634+
#[cfg_attr(feature = "dev", inline(never))]
634635
unsafe fn read_asm_loop_raw<T>(p_addr: *const T, out: *mut T, len: u8) {
635636
// Here are the general requirements essentially required by the AVR-impl
636637
// However, assume, the non-AVR version is only used in tests, it makes a
@@ -662,29 +663,40 @@ unsafe fn read_asm_loop_raw<T>(p_addr: *const T, out: *mut T, len: u8) {
662663
// TODO: switch to use the extended lpm instruction if >64k
663664
assert!(p_addr as usize <= u16::MAX as usize);
664665

666+
// Some dummy variables so we can define "output" for our assembly.
667+
// In fact, we do not have outputs, but need to modify the
668+
// registers, thus we just mark them as "outputs".
669+
let mut _a: u8;
670+
let mut _b: *const ();
671+
let mut _c: *mut ();
672+
let mut _d: u8;
673+
665674
// A loop to read a slice of T from prog memory
666675
// The prog memory address (addr) is stored in the 16-bit address
667676
// register Z (since this is the default register for the `lpm`
668677
// instruction).
669678
// The output data memory address (out) is stored in the 16-bit
670679
// address register X, because Z is already used and Y seams to be
671-
// used other wise or is callee-save, whatever, it emits more
680+
// used otherwise or is callee-save, whatever, it emits more
672681
// instructions by llvm.
673682
//
674683
// This loop appears in the assembly, because it allows to exploit
675684
// `lpm 0, Z+` instruction that simultaneously increments the
676-
// pointer.
685+
// pointer, and allows to write a very compact loop.
677686
llvm_asm!(
678687
"
679-
// load value from program memory at indirect Z into register 0
680-
// and increment Z by one
681-
lpm 0, Z+
682-
// write register 0 to data memory at indirect X
683-
// and increment X by one
684-
st X+, 0
688+
// load value from program memory at indirect Z into temp
689+
// register $3 and post-increment Z by one
690+
lpm $3, Z+
691+
692+
// write register $3 to data memory at indirect X
693+
// and post-increment X by one
694+
st X+, $3
695+
685696
// Decrement the loop counter in register $0 (size_bytes).
686697
// If zero has been reached the equality flag is set.
687698
subi $0, 1
699+
688700
// Check whether the end has not been reached and if so jump back.
689701
// The end is reached if $0 (size_bytes) == 0, i.e. equality flag
690702
// is set.
@@ -693,23 +705,20 @@ unsafe fn read_asm_loop_raw<T>(p_addr: *const T, out: *mut T, len: u8) {
693705
// Notice: 4 instructions = 8 Byte
694706
brne -8
695707
"
696-
// No direct outputs
697-
:
708+
// Define all registers as outputs, so we may modify them
709+
: "=r"(_a), "=z"(_b), "=x"(_c), "=r"(_d)
698710
// Input the iteration count, input program memory address,
699-
// and output data memory address
700-
: "r"(size_bytes), "z"(p_addr), "x"(out)
701-
// The register 0 is clobbered
702-
: "0"
711+
// and output data memory address (tied to the respective
712+
// "output" registers
713+
: "0"(size_bytes), "1"(p_addr), "2"(out)
714+
// Mark condition-codes and memory as clobbered
715+
: "cc", "memory"
703716
);
704717

705718
} else {
706-
// This is a non-AVR dummy.
707-
// We have to assume that otherwise a normal data or text segment
708-
// would be used, and thus that it is actually save to access it
709-
// directly!
710-
711-
// Notice the above assumption fails and results in UB for any other
712-
// Harvard architecture other than AVR.
719+
// Here, we are on a non-AVR platform.
720+
// We just use normal data or text segment, and thus that it is
721+
// actually save to just access the data.
713722

714723
// Now, just copy the bytes from p_addr to out
715724
// It is save by the way, because we require the user to give use
@@ -922,10 +931,6 @@ pub unsafe fn read_slice(p: &[u8], out: &mut [u8]) {
922931
/// might be done actually use `core::ptr::copy` and therefore the pointers
923932
/// must be aligned.
924933
///
925-
/// Also notice, that the output slice must be correctly initialized, it would
926-
/// be UB if not. If you don't want to initialize the data upfront, the
927-
/// `read_value` might be a good alternative.
928-
///
929934
/// [`read_byte`]: fn.read_byte.html
930935
/// [`read_slice`]: fn.read_slice.html
931936
///

0 commit comments

Comments
 (0)