Skip to content

Commit 98f8b9c

Browse files
committed
Merge branch 'v0.1' into v0.2
2 parents 3976ba2 + 4d28750 commit 98f8b9c

File tree

1 file changed

+31
-25
lines changed

1 file changed

+31
-25
lines changed

src/raw.rs

+31-25
Original file line numberDiff line numberDiff line change
@@ -141,6 +141,7 @@ where
141141
}
142142
}
143143

144+
144145
/// Read an array of type `T` from progmem into data array.
145146
///
146147
/// This function uses the optimized `read_asm_loop_raw` with a looped
@@ -163,6 +164,7 @@ where
163164
/// done byte-wise, but the non-AVR fallback dose actually use
164165
/// `core::ptr::copy` and therefore the pointers must be aligned.
165166
///
167+
#[cfg_attr(feature = "dev", inline(never))]
166168
unsafe fn read_asm_loop_raw<T>(p_addr: *const T, out: *mut T, len: u8) {
167169
// Here are the general requirements essentially required by the AVR-impl
168170
// However, assume, the non-AVR version is only used in tests, it makes a
@@ -194,29 +196,40 @@ unsafe fn read_asm_loop_raw<T>(p_addr: *const T, out: *mut T, len: u8) {
194196
// TODO: switch to use the extended lpm instruction if >64k
195197
assert!(p_addr as usize <= u16::MAX as usize);
196198

199+
// Some dummy variables so we can define "output" for our assembly.
200+
// In fact, we do not have outputs, but need to modify the
201+
// registers, thus we just mark them as "outputs".
202+
let mut _a: u8;
203+
let mut _b: *const ();
204+
let mut _c: *mut ();
205+
let mut _d: u8;
206+
197207
// A loop to read a slice of T from prog memory
198208
// The prog memory address (addr) is stored in the 16-bit address
199209
// register Z (since this is the default register for the `lpm`
200210
// instruction).
201211
// The output data memory address (out) is stored in the 16-bit
202212
// address register X, because Z is already used and Y seams to be
203-
// used other wise or is callee-save, whatever, it emits more
213+
// used otherwise or is callee-save, whatever, it emits more
204214
// instructions by llvm.
205215
//
206216
// This loop appears in the assembly, because it allows to exploit
207217
// `lpm 0, Z+` instruction that simultaneously increments the
208-
// pointer.
218+
// pointer, and allows to write a very compact loop.
209219
llvm_asm!(
210220
"
211-
// load value from program memory at indirect Z into register 0
212-
// and increment Z by one
213-
lpm 0, Z+
214-
// write register 0 to data memory at indirect X
215-
// and increment X by one
216-
st X+, 0
221+
// load value from program memory at indirect Z into temp
222+
// register $3 and post-increment Z by one
223+
lpm $3, Z+
224+
225+
// write register $3 to data memory at indirect X
226+
// and post-increment X by one
227+
st X+, $3
228+
217229
// Decrement the loop counter in register $0 (size_bytes).
218230
// If zero has been reached the equality flag is set.
219231
subi $0, 1
232+
220233
// Check whether the end has not been reached and if so jump back.
221234
// The end is reached if $0 (size_bytes) == 0, i.e. equality flag
222235
// is set.
@@ -225,23 +238,20 @@ unsafe fn read_asm_loop_raw<T>(p_addr: *const T, out: *mut T, len: u8) {
225238
// Notice: 4 instructions = 8 Byte
226239
brne -8
227240
"
228-
// No direct outputs
229-
:
241+
// Define all registers as outputs, so we may modify them
242+
: "=r"(_a), "=z"(_b), "=x"(_c), "=r"(_d)
230243
// Input the iteration count, input program memory address,
231-
// and output data memory address
232-
: "r"(size_bytes), "z"(p_addr), "x"(out)
233-
// The register 0 is clobbered
234-
: "0"
244+
// and output data memory address (tied to the respective
245+
// "output" registers
246+
: "0"(size_bytes), "1"(p_addr), "2"(out)
247+
// Mark condition-codes and memory as clobbered
248+
: "cc", "memory"
235249
);
236250

237251
} else {
238-
// This is a non-AVR dummy.
239-
// We have to assume that otherwise a normal data or text segment
240-
// would be used, and thus that it is actually save to access it
241-
// directly!
242-
243-
// Notice the above assumption fails and results in UB for any other
244-
// Harvard architecture other than AVR.
252+
// Here, we are on a non-AVR platform.
253+
// We just use normal data or text segment, and thus that it is
254+
// actually save to just access the data.
245255

246256
// Now, just copy the bytes from p_addr to out
247257
// It is save by the way, because we require the user to give use
@@ -372,10 +382,6 @@ where
372382
/// might be done actually use `core::ptr::copy` and therefore the pointers
373383
/// must be aligned.
374384
///
375-
/// Also notice, that the output slice must be correctly initialized, it would
376-
/// be UB if not. If you don't want to initialize the data upfront, the
377-
/// `read_value` might be a good alternative.
378-
///
379385
/// [`read_byte`]: fn.read_byte.html
380386
/// [`read_slice`]: fn.read_slice.html
381387
///

0 commit comments

Comments
 (0)