@@ -631,6 +631,7 @@ where
631
631
/// done byte-wise, but the non-AVR fallback dose actually use
632
632
/// `core::ptr::copy` and therefore the pointers must be aligned.
633
633
///
634
+ #[ cfg_attr( feature = "dev" , inline( never) ) ]
634
635
unsafe fn read_asm_loop_raw < T > ( p_addr : * const T , out : * mut T , len : u8 ) {
635
636
// Here are the general requirements essentially required by the AVR-impl
636
637
// However, assume, the non-AVR version is only used in tests, it makes a
@@ -662,29 +663,40 @@ unsafe fn read_asm_loop_raw<T>(p_addr: *const T, out: *mut T, len: u8) {
662
663
// TODO: switch to use the extended lpm instruction if >64k
663
664
assert!( p_addr as usize <= u16 :: MAX as usize ) ;
664
665
666
+ // Some dummy variables so we can define "output" for our assembly.
667
+ // In fact, we do not have outputs, but need to modify the
668
+ // registers, thus we just mark them as "outputs".
669
+ let mut _a: u8 ;
670
+ let mut _b: * const ( ) ;
671
+ let mut _c: * mut ( ) ;
672
+ let mut _d: u8 ;
673
+
665
674
// A loop to read a slice of T from prog memory
666
675
// The prog memory address (addr) is stored in the 16-bit address
667
676
// register Z (since this is the default register for the `lpm`
668
677
// instruction).
669
678
// The output data memory address (out) is stored in the 16-bit
670
679
// address register X, because Z is already used and Y seams to be
671
- // used other wise or is callee-save, whatever, it emits more
680
+ // used otherwise or is callee-save, whatever, it emits more
672
681
// instructions by llvm.
673
682
//
674
683
// This loop appears in the assembly, because it allows to exploit
675
684
// `lpm 0, Z+` instruction that simultaneously increments the
676
- // pointer.
685
+ // pointer, and allows to write a very compact loop .
677
686
llvm_asm!(
678
687
"
679
- // load value from program memory at indirect Z into register 0
680
- // and increment Z by one
681
- lpm 0, Z+
682
- // write register 0 to data memory at indirect X
683
- // and increment X by one
684
- st X+, 0
688
+ // load value from program memory at indirect Z into temp
689
+ // register $3 and post-increment Z by one
690
+ lpm $3, Z+
691
+
692
+ // write register $3 to data memory at indirect X
693
+ // and post-increment X by one
694
+ st X+, $3
695
+
685
696
// Decrement the loop counter in register $0 (size_bytes).
686
697
// If zero has been reached the equality flag is set.
687
698
subi $0, 1
699
+
688
700
// Check whether the end has not been reached and if so jump back.
689
701
// The end is reached if $0 (size_bytes) == 0, i.e. equality flag
690
702
// is set.
@@ -693,23 +705,20 @@ unsafe fn read_asm_loop_raw<T>(p_addr: *const T, out: *mut T, len: u8) {
693
705
// Notice: 4 instructions = 8 Byte
694
706
brne -8
695
707
"
696
- // No direct outputs
697
- :
708
+ // Define all registers as outputs, so we may modify them
709
+ : "=r" ( _a ) , "=z" ( _b ) , "=x" ( _c ) , "=r" ( _d )
698
710
// Input the iteration count, input program memory address,
699
- // and output data memory address
700
- : "r" ( size_bytes) , "z" ( p_addr) , "x" ( out)
701
- // The register 0 is clobbered
702
- : "0"
711
+ // and output data memory address (tied to the respective
712
+ // "output" registers
713
+ : "0" ( size_bytes) , "1" ( p_addr) , "2" ( out)
714
+ // Mark condition-codes and memory as clobbered
715
+ : "cc" , "memory"
703
716
) ;
704
717
705
718
} else {
706
- // This is a non-AVR dummy.
707
- // We have to assume that otherwise a normal data or text segment
708
- // would be used, and thus that it is actually save to access it
709
- // directly!
710
-
711
- // Notice the above assumption fails and results in UB for any other
712
- // Harvard architecture other than AVR.
719
+ // Here, we are on a non-AVR platform.
720
+ // We just use normal data or text segment, and thus that it is
721
+ // actually save to just access the data.
713
722
714
723
// Now, just copy the bytes from p_addr to out
715
724
// It is save by the way, because we require the user to give use
@@ -922,10 +931,6 @@ pub unsafe fn read_slice(p: &[u8], out: &mut [u8]) {
922
931
/// might be done actually use `core::ptr::copy` and therefore the pointers
923
932
/// must be aligned.
924
933
///
925
- /// Also notice, that the output slice must be correctly initialized, it would
926
- /// be UB if not. If you don't want to initialize the data upfront, the
927
- /// `read_value` might be a good alternative.
928
- ///
929
934
/// [`read_byte`]: fn.read_byte.html
930
935
/// [`read_slice`]: fn.read_slice.html
931
936
///
0 commit comments