Skip to content

Commit d32a91e

Browse files
committed
Add a lazily loaded PmByteString that loads one char at a time.
See #3
1 parent c748179 commit d32a91e

File tree

4 files changed

+347
-0
lines changed

4 files changed

+347
-0
lines changed

examples/uno-pm-string.rs

+114
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
//
2+
// This file provides a example on how to use strings on an Arduino Uno.
3+
//
4+
5+
6+
// Define no_std only for AVR
7+
#![cfg_attr(target_arch = "avr", no_std)]
8+
#![no_main]
9+
//
10+
// To unwrap the Option in const context
11+
#![feature(const_option)]
12+
13+
14+
use avr_progmem::string::PmByteString; // A progmem wrapper for strings
15+
#[cfg(target_arch = "avr")]
16+
use panic_halt as _; // halting panic implementation for AVR
17+
18+
19+
/// A string directly in progmem
20+
#[cfg_attr(target_arch = "avr", link_section = ".progmem.data")]
21+
static UNICODE_TEXT: PmByteString<137> = unsafe {
22+
PmByteString::new(
23+
"dai 大賢者 kenja, Völlerei lässt grüßen, le garçon de théâtre, Ελληνική Δημοκρατία, \
24+
Слава Україні",
25+
)
26+
.unwrap()
27+
};
28+
29+
/// A string directly in progmem
30+
#[cfg_attr(target_arch = "avr", link_section = ".progmem.data")]
31+
static LONG_TEXT: PmByteString<242> = unsafe {
32+
PmByteString::new(
33+
"
34+
A long test string literal, that is stored in progmem instead of DRAM.
35+
Of course, it needs to be temporarily load into DRAM.
36+
However, unlike a `ByteString`, it will be only read a char at a time,
37+
thus a `PmByteString` can never be too long.
38+
",
39+
)
40+
.unwrap()
41+
};
42+
43+
/// A single string that is over 2 KiB is size
44+
#[cfg_attr(target_arch = "avr", link_section = ".progmem.data")]
45+
static MUCH_LONGER_TEXT: PmByteString<2177> =
46+
unsafe { PmByteString::new(include_str!("./test_text.txt")).unwrap() };
47+
48+
49+
// Include a fancy printer supporting Arduino Uno's USB-Serial output as well
50+
// as stdout on non-AVR targets.
51+
mod printer;
52+
use printer::Printer;
53+
54+
#[no_mangle]
55+
fn main() -> ! {
56+
let mut printer = {
57+
#[cfg(target_arch = "avr")]
58+
{
59+
// Initialize the USB-Serial output on the Arduino Uno
60+
61+
let dp = arduino_hal::Peripherals::take().unwrap();
62+
let pins = arduino_hal::pins!(dp);
63+
let serial = arduino_hal::default_serial!(dp, pins, 9600);
64+
65+
Printer(serial)
66+
}
67+
#[cfg(not(target_arch = "avr"))]
68+
{
69+
// Just use stdout for non-AVR targets
70+
Printer
71+
}
72+
};
73+
74+
// Print some introduction text
75+
printer.println("Hello from Arduino!");
76+
printer.println("");
77+
printer.println("--------------------------");
78+
printer.println("");
79+
80+
// Read string from progmem char-by-char
81+
for c in LONG_TEXT.chars() {
82+
printer.print(c);
83+
}
84+
85+
printer.println("");
86+
87+
// Or just use the `ufmt::uDisplay` impl
88+
#[cfg(feature = "ufmt")]
89+
ufmt::uwrite!(&mut printer, "{}", &UNICODE_TEXT).unwrap();
90+
91+
printer.println("");
92+
93+
// Thus loading 2 KiB with ease
94+
#[cfg(feature = "ufmt")]
95+
ufmt::uwrite!(&mut printer, "{}", MUCH_LONGER_TEXT).unwrap();
96+
97+
// Print some final lines
98+
printer.println("");
99+
printer.println("--------------------------");
100+
printer.println("");
101+
printer.println("DONE");
102+
103+
// It is very convenient to just exit on non-AVR platforms, otherwise users
104+
// might get the impression that the program hangs, whereas it already
105+
// succeeded.
106+
#[cfg(not(target_arch = "avr"))]
107+
std::process::exit(0);
108+
109+
// Otherwise, that is on AVR, just go into an infinite loop, because on AVR
110+
// we just can't exit!
111+
loop {
112+
// Done, just do nothing
113+
}
114+
}

src/string.rs

+113
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
use core::fmt;
22
use core::ops::Deref;
33

4+
use crate::wrapper::PmIter;
5+
use crate::ProgMem;
46

57

68
mod from_slice;
9+
mod validations;
710

811

912

@@ -56,6 +59,11 @@ impl<const N: usize> ByteString<N> {
5659
Err(_e) => None,
5760
}
5861
}
62+
63+
/// Returns the underlying byte array.
64+
pub fn as_bytes(&self) -> &[u8; N] {
65+
&self.0
66+
}
5967
}
6068

6169
impl<const N: usize> Deref for ByteString<N> {
@@ -123,3 +131,108 @@ macro_rules! progmem_str {
123131
&*TEXT.load()
124132
}};
125133
}
134+
135+
136+
137+
/// A byte string in progmem
138+
///
139+
/// Not to be confused with a [`ByteString`].
140+
/// A `ByteString` is just a wrapper around a byte array (`[u8;N]`) that can
141+
/// be put into a [`ProgMem`].
142+
/// A `PmByteString` on the other hand, is a wrapper around a
143+
/// `ProgMem<[u8;N]>`.
144+
///
145+
#[repr(transparent)]
146+
pub struct PmByteString<const N: usize>(pub ProgMem<[u8; N]>);
147+
148+
impl<const N: usize> PmByteString<N> {
149+
/// Creates a new byte array from the given string
150+
///
151+
/// # Safety
152+
///
153+
/// This function is only sound to call, if the value is
154+
/// stored in a static that is for instance attributed with
155+
/// `#[link_section = ".progmem.data"]`.
156+
pub const unsafe fn new(s: &str) -> Option<Self> {
157+
Self::from_bytes(s.as_bytes())
158+
}
159+
160+
/// Wraps the given byte slice
161+
///
162+
/// # Safety
163+
///
164+
/// This function is only sound to call, if the value is
165+
/// stored in a static that is for instance attributed with
166+
/// `#[link_section = ".progmem.data"]`.
167+
pub const unsafe fn from_bytes(bytes: &[u8]) -> Option<Self> {
168+
let res = from_slice::array_ref_try_from_slice(bytes);
169+
170+
match res {
171+
Ok(array) => {
172+
let array = *array;
173+
let pm = unsafe { ProgMem::new(array) };
174+
Some(Self(pm))
175+
},
176+
Err(_e) => None,
177+
}
178+
}
179+
180+
/// Returns the underlying progmem byte array.
181+
pub fn as_bytes(&self) -> &ProgMem<[u8; N]> {
182+
&self.0
183+
}
184+
185+
/// Lazily iterate over the `char`s of the string.
186+
///
187+
/// This function is analog to [`ProgMem::iter`], except it is over the
188+
/// `char`s of this string.
189+
pub fn chars(&self) -> PmChars<N> {
190+
PmChars::new(self)
191+
}
192+
}
193+
194+
impl<const N: usize> fmt::Display for PmByteString<N> {
195+
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
196+
for c in self.chars() {
197+
write!(fmt, "{}", c)?
198+
}
199+
Ok(())
200+
}
201+
}
202+
203+
#[cfg(any(feature = "ufmt", doc))]
204+
#[doc(cfg(feature = "ufmt"))]
205+
impl<const N: usize> ufmt::uDisplay for PmByteString<N> {
206+
fn fmt<W: ?Sized>(&self, fmt: &mut ufmt::Formatter<W>) -> Result<(), W::Error>
207+
where
208+
W: ufmt::uWrite,
209+
{
210+
for c in self.chars() {
211+
ufmt::uwrite!(fmt, "{}", c)?
212+
}
213+
Ok(())
214+
}
215+
}
216+
217+
218+
/// An iterator over a [`PmByteString`]
219+
pub struct PmChars<'a, const N: usize> {
220+
bytes: PmIter<'a, u8, N>,
221+
}
222+
223+
impl<'a, const N: usize> PmChars<'a, N> {
224+
pub fn new(pm: &'a PmByteString<N>) -> Self {
225+
PmChars {
226+
bytes: pm.0.iter(),
227+
}
228+
}
229+
}
230+
231+
impl<'a, const N: usize> Iterator for PmChars<'a, N> {
232+
type Item = char;
233+
234+
fn next(&mut self) -> Option<Self::Item> {
235+
unsafe { validations::next_code_point(&mut self.bytes) }
236+
.map(|u| core::char::from_u32(u).unwrap())
237+
}
238+
}

src/string/validations.rs

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
// This file as a partial copy of the str/validations.rs of the Rust core lib.
2+
//
3+
// A copy was needed, because the original `next_code_point` takes an iterator
4+
// of `&u8`, which is not an option for as, because we only have `u8` by-value.
5+
//
6+
// Source:
7+
// https://github.com/rust-lang/rust/blob/03b17b181af4945fa24e0df79676e89454546440/library/core/src/str/validations.rs
8+
9+
10+
/// Mask of the value bits of a continuation byte.
11+
const CONT_MASK: u8 = 0b0011_1111;
12+
13+
/// Returns the initial codepoint accumulator for the first byte.
14+
/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
15+
/// for width 3, and 3 bits for width 4.
16+
#[inline]
17+
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
18+
(byte & (0x7F >> width)) as u32
19+
}
20+
21+
/// Returns the value of `ch` updated with continuation byte `byte`.
22+
#[inline]
23+
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
24+
(ch << 6) | (byte & CONT_MASK) as u32
25+
}
26+
27+
28+
/// Reads the next code point out of a byte iterator (assuming a
29+
/// UTF-8-like encoding).
30+
///
31+
/// # Safety
32+
///
33+
/// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string
34+
#[inline]
35+
pub(super) unsafe fn next_code_point<I: Iterator<Item = u8>>(bytes: &mut I) -> Option<u32> {
36+
// Decode UTF-8
37+
let x = bytes.next()?;
38+
if x < 128 {
39+
return Some(x as u32);
40+
}
41+
42+
// Multibyte case follows
43+
// Decode from a byte combination out of: [[[x y] z] w]
44+
// NOTE: Performance is sensitive to the exact formulation here
45+
let init = utf8_first_byte(x, 2);
46+
// SAFETY: `bytes` produces an UTF-8-like string,
47+
// so the iterator must produce a value here.
48+
let y = unsafe { bytes.next().unwrap() };
49+
let mut ch = utf8_acc_cont_byte(init, y);
50+
if x >= 0xE0 {
51+
// [[x y z] w] case
52+
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
53+
// SAFETY: `bytes` produces an UTF-8-like string,
54+
// so the iterator must produce a value here.
55+
let z = unsafe { bytes.next().unwrap() };
56+
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
57+
ch = init << 12 | y_z;
58+
if x >= 0xF0 {
59+
// [x y z w] case
60+
// use only the lower 3 bits of `init`
61+
// SAFETY: `bytes` produces an UTF-8-like string,
62+
// so the iterator must produce a value here.
63+
let w = unsafe { bytes.next().unwrap() };
64+
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
65+
}
66+
}
67+
68+
Some(ch)
69+
}

src/wrapper.rs

+51
Original file line numberDiff line numberDiff line change
@@ -198,8 +198,59 @@ impl<T: Copy, const N: usize> ProgMem<[T; N]> {
198198
//
199199
unsafe { read_value(array) }
200200
}
201+
202+
/// Lazily iterate over all elements
203+
///
204+
/// Returns an iterator which lazily loads the elements one at a time
205+
/// from progmem.
206+
/// This means this iterator can be used to access huge arrays while
207+
/// only requiring `size_of::<T>()` amount of stack memory.
208+
///
209+
/// # Panics
210+
///
211+
/// This method panics, if the size of an element (i.e. `size_of::<T>()`)
212+
/// is beyond 255 bytes.
213+
/// However, this is currently just a implementation limitation, which may
214+
/// be lifted in the future.
215+
///
216+
pub fn iter(&self) -> PmIter<T, N> {
217+
PmIter::new(self)
218+
}
219+
}
220+
221+
222+
/// An iterator over an array in progmem.
223+
pub struct PmIter<'a, T, const N: usize> {
224+
progmem: &'a ProgMem<[T; N]>,
225+
current_idx: usize,
226+
}
227+
228+
impl<'a, T, const N: usize> PmIter<'a, T, N> {
229+
/// Creates a new iterator over the given progmem array.
230+
pub const fn new(pm: &'a ProgMem<[T; N]>) -> Self {
231+
Self {
232+
progmem: pm,
233+
current_idx: 0,
234+
}
235+
}
201236
}
202237

238+
impl<'a, T: Copy, const N: usize> Iterator for PmIter<'a, T, N> {
239+
type Item = T;
240+
241+
fn next(&mut self) -> Option<Self::Item> {
242+
// Check for iterator end
243+
if self.current_idx < N {
244+
// Load next item from progmem
245+
let b = self.progmem.load_at(self.current_idx);
246+
self.current_idx += 1;
247+
248+
Some(b)
249+
} else {
250+
None
251+
}
252+
}
253+
}
203254

204255

205256
/// Define a static in progmem.

0 commit comments

Comments
 (0)