Add a lazily loaded PmByteString that loads one char at a time.

Cryptjar · Cryptjar · commit d32a91eb6312 · 2022-04-04T22:31:51.000+02:00
See #3
diff --git a/examples/uno-pm-string.rs b/examples/uno-pm-string.rs
@@ -0,0 +1,114 @@
+//
+// This file provides a example on how to use strings on an Arduino Uno.
+//
+
+
+// Define no_std only for AVR
+#![cfg_attr(target_arch = "avr", no_std)]
+#![no_main]
+//
+// To unwrap the Option in const context
+#![feature(const_option)]
+
+
+use avr_progmem::string::PmByteString; // A progmem wrapper for strings
+#[cfg(target_arch = "avr")]
+use panic_halt as _; // halting panic implementation for AVR
+
+
+/// A string directly in progmem
+#[cfg_attr(target_arch = "avr", link_section = ".progmem.data")]
+static UNICODE_TEXT: PmByteString<137> = unsafe {
+	PmByteString::new(
+		"dai 大賢者 kenja, Völlerei lässt grüßen, le garçon de théâtre, Ελληνική Δημοκρατία, \
+		 Слава Україні",
+	)
+	.unwrap()
+};
+
+/// A string directly in progmem
+#[cfg_attr(target_arch = "avr", link_section = ".progmem.data")]
+static LONG_TEXT: PmByteString<242> = unsafe {
+	PmByteString::new(
+		"
+A long test string literal, that is stored in progmem instead of DRAM.
+Of course, it needs to be temporarily load into DRAM.
+However, unlike a `ByteString`, it will be only read a char at a time,
+thus a `PmByteString` can never be too long.
+",
+	)
+	.unwrap()
+};
+
+/// A single string that is over 2 KiB is size
+#[cfg_attr(target_arch = "avr", link_section = ".progmem.data")]
+static MUCH_LONGER_TEXT: PmByteString<2177> =
+	unsafe { PmByteString::new(include_str!("./test_text.txt")).unwrap() };
+
+
+// Include a fancy printer supporting Arduino Uno's USB-Serial output as well
+// as stdout on non-AVR targets.
+mod printer;
+use printer::Printer;
+
+#[no_mangle]
+fn main() -> ! {
+	let mut printer = {
+		#[cfg(target_arch = "avr")]
+		{
+			// Initialize the USB-Serial output on the Arduino Uno
+
+			let dp = arduino_hal::Peripherals::take().unwrap();
+			let pins = arduino_hal::pins!(dp);
+			let serial = arduino_hal::default_serial!(dp, pins, 9600);
+
+			Printer(serial)
+		}
+		#[cfg(not(target_arch = "avr"))]
+		{
+			// Just use stdout for non-AVR targets
+			Printer
+		}
+	};
+
+	// Print some introduction text
+	printer.println("Hello from Arduino!");
+	printer.println("");
+	printer.println("--------------------------");
+	printer.println("");
+
+	// Read string from progmem char-by-char
+	for c in LONG_TEXT.chars() {
+		printer.print(c);
+	}
+
+	printer.println("");
+
+	// Or just use the `ufmt::uDisplay` impl
+	#[cfg(feature = "ufmt")]
+	ufmt::uwrite!(&mut printer, "{}", &UNICODE_TEXT).unwrap();
+
+	printer.println("");
+
+	// Thus loading 2 KiB with ease
+	#[cfg(feature = "ufmt")]
+	ufmt::uwrite!(&mut printer, "{}", MUCH_LONGER_TEXT).unwrap();
+
+	// Print some final lines
+	printer.println("");
+	printer.println("--------------------------");
+	printer.println("");
+	printer.println("DONE");
+
+	// It is very convenient to just exit on non-AVR platforms, otherwise users
+	// might get the impression that the program hangs, whereas it already
+	// succeeded.
+	#[cfg(not(target_arch = "avr"))]
+	std::process::exit(0);
+
+	// Otherwise, that is on AVR, just go into an infinite loop, because on AVR
+	// we just can't exit!
+	loop {
+		// Done, just do nothing
+	}
+}
diff --git a/src/string.rs b/src/string.rs
@@ -1,9 +1,12 @@
 use core::fmt;
 use core::ops::Deref;
 
+use crate::wrapper::PmIter;
+use crate::ProgMem;
 
 
 mod from_slice;
+mod validations;
 
 
 
@@ -56,6 +59,11 @@ impl<const N: usize> ByteString<N> {
 			Err(_e) => None,
 		}
 	}
+
+	/// Returns the underlying byte array.
+	pub fn as_bytes(&self) -> &[u8; N] {
+		&self.0
+	}
 }
 
 impl<const N: usize> Deref for ByteString<N> {
@@ -123,3 +131,108 @@ macro_rules! progmem_str {
 		&*TEXT.load()
 	}};
 }
+
+
+
+/// A byte string in progmem
+///
+/// Not to be confused with a [`ByteString`].
+/// A `ByteString` is just a wrapper around a byte array (`[u8;N]`) that can
+/// be put into a [`ProgMem`].
+/// A `PmByteString` on the other hand, is a wrapper around a
+/// `ProgMem<[u8;N]>`.
+///
+#[repr(transparent)]
+pub struct PmByteString<const N: usize>(pub ProgMem<[u8; N]>);
+
+impl<const N: usize> PmByteString<N> {
+	/// Creates a new byte array from the given string
+	///
+	/// # Safety
+	///
+	/// This function is only sound to call, if the value is
+	/// stored in a static that is for instance attributed with
+	/// `#[link_section = ".progmem.data"]`.
+	pub const unsafe fn new(s: &str) -> Option<Self> {
+		Self::from_bytes(s.as_bytes())
+	}
+
+	/// Wraps the given byte slice
+	///
+	/// # Safety
+	///
+	/// This function is only sound to call, if the value is
+	/// stored in a static that is for instance attributed with
+	/// `#[link_section = ".progmem.data"]`.
+	pub const unsafe fn from_bytes(bytes: &[u8]) -> Option<Self> {
+		let res = from_slice::array_ref_try_from_slice(bytes);
+
+		match res {
+			Ok(array) => {
+				let array = *array;
+				let pm = unsafe { ProgMem::new(array) };
+				Some(Self(pm))
+			},
+			Err(_e) => None,
+		}
+	}
+
+	/// Returns the underlying progmem byte array.
+	pub fn as_bytes(&self) -> &ProgMem<[u8; N]> {
+		&self.0
+	}
+
+	/// Lazily iterate over the `char`s of the string.
+	///
+	/// This function is analog to [`ProgMem::iter`], except it is over the
+	/// `char`s of this string.
+	pub fn chars(&self) -> PmChars<N> {
+		PmChars::new(self)
+	}
+}
+
+impl<const N: usize> fmt::Display for PmByteString<N> {
+	fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
+		for c in self.chars() {
+			write!(fmt, "{}", c)?
+		}
+		Ok(())
+	}
+}
+
+#[cfg(any(feature = "ufmt", doc))]
+#[doc(cfg(feature = "ufmt"))]
+impl<const N: usize> ufmt::uDisplay for PmByteString<N> {
+	fn fmt<W: ?Sized>(&self, fmt: &mut ufmt::Formatter<W>) -> Result<(), W::Error>
+	where
+		W: ufmt::uWrite,
+	{
+		for c in self.chars() {
+			ufmt::uwrite!(fmt, "{}", c)?
+		}
+		Ok(())
+	}
+}
+
+
+/// An iterator over a [`PmByteString`]
+pub struct PmChars<'a, const N: usize> {
+	bytes: PmIter<'a, u8, N>,
+}
+
+impl<'a, const N: usize> PmChars<'a, N> {
+	pub fn new(pm: &'a PmByteString<N>) -> Self {
+		PmChars {
+			bytes: pm.0.iter(),
+		}
+	}
+}
+
+impl<'a, const N: usize> Iterator for PmChars<'a, N> {
+	type Item = char;
+
+	fn next(&mut self) -> Option<Self::Item> {
+		unsafe { validations::next_code_point(&mut self.bytes) }
+			.map(|u| core::char::from_u32(u).unwrap())
+	}
+}
diff --git a/src/string/validations.rs b/src/string/validations.rs
@@ -0,0 +1,69 @@
+// This file as a partial copy of the str/validations.rs of the Rust core lib.
+//
+// A copy was needed, because the original `next_code_point` takes an iterator
+// of `&u8`, which is not an option for as, because we only have `u8` by-value.
+//
+// Source:
+// https://github.com/rust-lang/rust/blob/03b17b181af4945fa24e0df79676e89454546440/library/core/src/str/validations.rs
+
+
+/// Mask of the value bits of a continuation byte.
+const CONT_MASK: u8 = 0b0011_1111;
+
+/// Returns the initial codepoint accumulator for the first byte.
+/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
+/// for width 3, and 3 bits for width 4.
+#[inline]
+const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
+	(byte & (0x7F >> width)) as u32
+}
+
+/// Returns the value of `ch` updated with continuation byte `byte`.
+#[inline]
+const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
+	(ch << 6) | (byte & CONT_MASK) as u32
+}
+
+
+/// Reads the next code point out of a byte iterator (assuming a
+/// UTF-8-like encoding).
+///
+/// # Safety
+///
+/// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string
+#[inline]
+pub(super) unsafe fn next_code_point<I: Iterator<Item = u8>>(bytes: &mut I) -> Option<u32> {
+	// Decode UTF-8
+	let x = bytes.next()?;
+	if x < 128 {
+		return Some(x as u32);
+	}
+
+	// Multibyte case follows
+	// Decode from a byte combination out of: [[[x y] z] w]
+	// NOTE: Performance is sensitive to the exact formulation here
+	let init = utf8_first_byte(x, 2);
+	// SAFETY: `bytes` produces an UTF-8-like string,
+	// so the iterator must produce a value here.
+	let y = unsafe { bytes.next().unwrap() };
+	let mut ch = utf8_acc_cont_byte(init, y);
+	if x >= 0xE0 {
+		// [[x y z] w] case
+		// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
+		// SAFETY: `bytes` produces an UTF-8-like string,
+		// so the iterator must produce a value here.
+		let z = unsafe { bytes.next().unwrap() };
+		let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
+		ch = init << 12 | y_z;
+		if x >= 0xF0 {
+			// [x y z w] case
+			// use only the lower 3 bits of `init`
+			// SAFETY: `bytes` produces an UTF-8-like string,
+			// so the iterator must produce a value here.
+			let w = unsafe { bytes.next().unwrap() };
+			ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
+		}
+	}
+
+	Some(ch)
+}
diff --git a/src/wrapper.rs b/src/wrapper.rs
@@ -198,8 +198,59 @@ impl<T: Copy, const N: usize> ProgMem<[T; N]> {
 		//
 		unsafe { read_value(array) }
 	}
+
+	/// Lazily iterate over all elements
+	///
+	/// Returns an iterator which lazily loads the elements one at a time
+	/// from progmem.
+	/// This means this iterator can be used to access huge arrays while
+	/// only requiring `size_of::<T>()` amount of stack memory.
+	///
+	/// # Panics
+	///
+	/// This method panics, if the size of an element (i.e. `size_of::<T>()`)
+	/// is beyond 255 bytes.
+	/// However, this is currently just a implementation limitation, which may
+	/// be lifted in the future.
+	///
+	pub fn iter(&self) -> PmIter<T, N> {
+		PmIter::new(self)
+	}
+}
+
+
+/// An iterator over an array in progmem.
+pub struct PmIter<'a, T, const N: usize> {
+	progmem: &'a ProgMem<[T; N]>,
+	current_idx: usize,
+}
+
+impl<'a, T, const N: usize> PmIter<'a, T, N> {
+	/// Creates a new iterator over the given progmem array.
+	pub const fn new(pm: &'a ProgMem<[T; N]>) -> Self {
+		Self {
+			progmem: pm,
+			current_idx: 0,
+		}
+	}
 }
 
+impl<'a, T: Copy, const N: usize> Iterator for PmIter<'a, T, N> {
+	type Item = T;
+
+	fn next(&mut self) -> Option<Self::Item> {
+		// Check for iterator end
+		if self.current_idx < N {
+			// Load next item from progmem
+			let b = self.progmem.load_at(self.current_idx);
+			self.current_idx += 1;
+
+			Some(b)
+		} else {
+			None
+		}
+	}
+}
 
 
 /// Define a static in progmem.