From 14d739d3b8022c01f6b3b1bba175f65be33cd152 Mon Sep 17 00:00:00 2001 From: Paulo Cabral Sanz Date: Fri, 23 Dec 2022 19:27:50 -0300 Subject: [PATCH 1/2] Improve docs, no-panic feature, avoid panic in more functions --- Cargo.toml | 9 ++- README.md | 61 ++++------------- src/arraystring.rs | 146 ++++++++++++++++++----------------------- src/implementations.rs | 26 ++++++++ src/integration.rs | 16 ++++- src/lib.rs | 121 +++------------------------------- src/utils.rs | 9 ++- tests/string_parity.rs | 44 ------------- 8 files changed, 135 insertions(+), 297 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 5c9c618..d52aba6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,11 +27,12 @@ criterion = { version = "0.4", features = ["html_reports"] } env_logger = "0.10" serde = { version = "1.0", features = ["derive"] } diesel = { version = "2", features = ["sqlite", "postgres", "mysql"] } + [dependencies] log = { version = "0.4", optional = true } serde = { version = "1", optional = true } diesel = { version = "2", optional = true } -no-panic = "0.1" +no-panic = { version = "0.1", optional = true } [features] default = ["std"] @@ -39,10 +40,8 @@ std = [] logs = ["log"] serde-traits = ["serde"] diesel-traits = ["diesel"] +no-panic = ["dep:no-panic"] [package.metadata.docs.rs] rustdoc-args = ["--cfg", "docs_rs_workaraound"] -features = ["logs", "serde-traits", "std", "diesel-traits"] - -[profile.dev] -opt-level = 3 \ No newline at end of file +features = ["logs", "serde-traits", "std", "diesel-traits", "no-panic"] diff --git a/README.md b/README.md index 72b1e27..9d8a66e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -*For experimentation with `const generics` try [`staticvec`](https://github.com/slightlyoutofphase/staticvec/)* +*For experimentation with nightly only `const generics` features try [`staticvec`](https://github.com/slightlyoutofphase/staticvec/)* # ArrayString @@ -8,9 +8,11 @@ Can't outgrow initial capacity (defined at compile time), always occupies `capac *Maximum Capacity is 255* -*Doesn't allocate memory on the heap and never panics in release (all panic branches are stripped at compile time - except `Index`/`IndexMut` traits, since they are supposed to)* +*Doesn't allocate memory on the heap and should never panic in release (except in `Index`/`IndexMut` traits, since they are supposed to)* -* [Documentation](https://docs.rs/arraystring/0.3.0/arraystring) +*The no panic garantee can be ensured at compilation time with the `no-panic` feature, just be aware that a compiler update might break this garantee, therefore making the crate uncompilable, open an issue if you notice.* + +* [Documentation](https://docs.rs/arraystring/latest/arraystring) ## Why @@ -22,9 +24,9 @@ Stack based strings are generally faster to create, clone and append to than hea But that becomes less true as you increase the array size, `CacheString` occuppies a full cache line, 255 bytes is the maximum we accept - `MaxString` and it's probably already slower than heap based strings of that size (like in `std::string::String`) -There are other stack based strings out there, they generally can have "unlimited" capacity (heap allocate), but the stack based size is defined by the library implementor, we go through a different route by implementing a string based in a generic array. +There are other stack based strings out there, they generally can have "unlimited" capacity using small string optimizations, but the stack based size is defined by the library implementor. We go through a different route by implementing a string based in a generic array. -Array based strings always occupies the full space in memory, so they may use more memory (although in the stack) than dynamic strings. +Be aware that array based strings always occupy the full space in memory, so they may use more memory (although in the stack) than dynamic strings. ## Features @@ -39,6 +41,10 @@ Array based strings always occupies the full space in memory, so they may use mo Opperates like `String`, but truncates it if it's bigger than capacity + - `no-panic` checks at compile time that the panic function is not linked by the library + + Only works when all optimizations are enabled, and may break in future compiler updates. Please open an issue if you notice. + - `logs` enables internal logging You will probably only need this if you are debugging this library @@ -66,51 +72,6 @@ fn main() -> Result<(), Error> { Ok(()) } -``` - - ## Comparisons - -*These benchmarks ran while I streamed video and used my computer (with* **non-disclosed specs**) *as usual, so don't take the actual times too seriously, just focus on the comparison* - -```my_custom_benchmark -small-string (23 bytes) clone 4.837 ns -small-string (23 bytes) try_from_str 13.552 ns -small-string (23 bytes) from_str_truncate 11.360 ns -small-string (23 bytes) from_str_unchecked 11.291 ns -small-string (23 bytes) try_push_str 1.162 ns -small-string (23 bytes) push_str 3.490 ns -small-string (23 bytes) push_str_unchecked 1.098 ns -------------------------------------------------------------- -cache-string (63 bytes) clone 10.170 ns -cache-string (63 bytes) try_from_str 25.579 ns -cache-string (63 bytes) from_str_truncate 16.977 ns -cache-string (63 bytes) from_str_unchecked 17.201 ns -cache-string (63 bytes) try_push_str 1.160 ns -cache-string (63 bytes) push_str 3.486 ns -cache-string (63 bytes) push_str_unchecked 1.115 ns -------------------------------------------------------------- -max-string (255 bytes) clone 147.410 ns -max-string (255 bytes) try_from_str 157.340 ns -max-string (255 bytes) from_str_truncate 158.000 ns -max-string (255 bytes) from_str_unchecked 158.420 ns -max-string (255 bytes) try_push_str 1.167 ns -max-string (255 bytes) push_str 4.337 ns -max-string (255 bytes) push_str_unchecked 1.103 ns -------------------------------------------------------------- -string (19 bytes) clone 33.295 ns -string (19 bytes) from 32.512 ns -string (19 bytes) push str 28.128 ns -------------------------------------------------------------- -arrayvec string (23 bytes) clone 7.725 ns -arrayvec string (23 bytes) from 14.794 ns -arrayvec string (23 bytes) push str 1.363 ns -------------------------------------------------------------- -inlinable-string (30 bytes) clone 16.751 ns -inlinable-string (30 bytes) from_str 29.310 ns -inlinable-string (30 bytes) push_str 2.865 ns -------------------------------------------------------------- -smallstring crate (20 bytes) clone 60.988 ns -smallstring crate (20 bytes) from_str 50.233 ns ``` ## Licenses diff --git a/src/arraystring.rs b/src/arraystring.rs index 71d322f..78faa9a 100644 --- a/src/arraystring.rs +++ b/src/arraystring.rs @@ -5,11 +5,10 @@ use crate::utils::{is_char_boundary, is_inside_boundary}; use crate::utils::{truncate_str, IntoLossy}; use crate::{prelude::*, Error}; use core::char::{decode_utf16, REPLACEMENT_CHARACTER}; -use core::str::from_utf8; use core::{cmp::min, ops::*}; #[cfg(feature = "logs")] use log::{debug, trace}; -#[cfg(not(debug_assertions))] +#[cfg(all(feature = "no-panic", not(debug_assertions)))] use no_panic::no_panic; /// String based on a generic array (size defined at compile time through `const generics`) @@ -70,6 +69,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_from_str(string: impl AsRef) -> Result { trace!("Try from str: {}", string.as_ref()); let mut s = Self::new(); @@ -94,6 +94,7 @@ where /// assert_eq!(string.as_str(), truncated); /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn from_str_truncate(string: impl AsRef) -> Self { trace!("FromStr truncate: {}", string.as_ref()); let mut s = Self::new(); @@ -117,6 +118,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_from_iterator( iter: impl IntoIterator>, ) -> Result { @@ -148,6 +150,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn from_iterator_truncate(iter: impl IntoIterator>) -> Self { trace!("FromIterator truncate"); let mut out = Self::new(); @@ -177,6 +180,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_from_chars(iter: impl IntoIterator) -> Result { trace!("TryFrom chars"); let mut out = Self::new(); @@ -203,6 +207,7 @@ where /// assert_eq!(truncate.as_str(), truncated.as_str()); /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn from_chars_truncate(iter: impl IntoIterator) -> Self { trace!("From chars truncate"); let mut out = Self::new(); @@ -214,60 +219,6 @@ where out } - /// Creates new `ArrayString` from byte slice, returning [`Utf8`] on invalid utf-8 data or [`OutOfBounds`] if bigger than [`capacity`] - /// - /// [`Utf8`]: ./error/enum.Error.html#variant.Utf8 - /// [`OutOfBounds`]: ./error/enum.Error.html#variant.OutOfBounds - /// [`capacity`]: ./struct.ArrayString.html#method.capacity - /// - /// ```rust - /// # use arraystring::{Error, prelude::*}; - /// # fn main() -> Result<(), Error> { - /// # #[cfg(not(miri))] let _ = env_logger::try_init(); - /// let string = ArrayString::<23>::try_from_utf8("My String")?; - /// assert_eq!(string.as_str(), "My String"); - /// - /// let invalid_utf8 = [0, 159, 146, 150]; - /// assert_eq!(ArrayString::<23>::try_from_utf8(invalid_utf8), Err(Error::Utf8)); - /// - /// let out_of_bounds = "0000".repeat(400); - /// assert_eq!(ArrayString::<23>::try_from_utf8(out_of_bounds.as_bytes()), Err(Error::OutOfBounds)); - /// # Ok(()) - /// # } - /// ``` - #[inline] - pub fn try_from_utf8(slice: impl AsRef<[u8]>) -> Result { - debug!("From utf8: {:?}", slice.as_ref()); - Ok(Self::try_from_str(from_utf8(slice.as_ref())?)?) - } - - /// Creates new `ArrayString` from byte slice, returning [`Utf8`] on invalid utf-8 data, truncating if bigger than [`capacity`]. - /// - /// [`Utf8`]: ./error/struct.Utf8.html - /// [`capacity`]: ./struct.ArrayString.html#method.capacity - /// - /// ```rust - /// # use arraystring::{Error, prelude::*}; - /// # fn main() -> Result<(), Error> { - /// # #[cfg(not(miri))] let _ = env_logger::try_init(); - /// let string = ArrayString::<23>::from_utf8_truncate("My String")?; - /// assert_eq!(string.as_str(), "My String"); - /// - /// let invalid_utf8 = [0, 159, 146, 150]; - /// assert_eq!(ArrayString::<23>::from_utf8_truncate(invalid_utf8), Err(Utf8)); - /// - /// let out_of_bounds = "0".repeat(300); - /// assert_eq!(ArrayString::<23>::from_utf8_truncate(out_of_bounds.as_bytes())?.as_str(), - /// "0".repeat(ArrayString::<23>::capacity()).as_str()); - /// # Ok(()) - /// # } - /// ``` - #[inline] - pub fn from_utf8_truncate(slice: impl AsRef<[u8]>) -> Result { - debug!("From utf8: {:?}", slice.as_ref()); - Ok(Self::from_str_truncate(from_utf8(slice.as_ref())?)) - } - /// Creates new `ArrayString` from `u16` slice, returning [`Utf16`] on invalid utf-16 data or [`OutOfBounds`] if bigger than [`capacity`] /// /// [`Utf16`]: ./error/enum.Error.html#variant.Utf16 @@ -291,6 +242,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_from_utf16(slice: impl AsRef<[u16]>) -> Result { debug!("From utf16: {:?}", slice.as_ref()); let mut out = Self::new(); @@ -323,6 +275,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn from_utf16_truncate(slice: impl AsRef<[u16]>) -> Result { debug!("From utf16: {:?}", slice.as_ref()); let mut out = Self::new(); @@ -356,6 +309,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn from_utf16_lossy_truncate(slice: impl AsRef<[u16]>) -> Self { debug!("From utf16 lossy: {:?}", slice.as_ref()); let mut out = Self::new(); @@ -379,6 +333,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn as_str(&self) -> &str { trace!("As str: {self}"); self.as_ref() @@ -396,6 +351,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn as_mut_str(&mut self) -> &mut str { trace!("As mut str: {self}"); self.as_mut() @@ -413,6 +369,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn as_bytes(&self) -> &[u8] { trace!("As bytes"); self.as_ref() @@ -433,6 +390,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub unsafe fn as_mut_bytes(&mut self) -> &mut [u8] { trace!("As mut str"); let len = self.len(); @@ -468,7 +426,7 @@ where /// # } /// ``` #[inline] - #[cfg_attr(not(debug_assertions), no_panic)] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_push_str(&mut self, string: impl AsRef) -> Result<(), OutOfBounds> { trace!("Push str: {}", string.as_ref()); let str = string.as_ref(); @@ -499,7 +457,7 @@ where /// # } /// ``` #[inline] - #[cfg_attr(not(debug_assertions), no_panic)] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn push_str_truncate(&mut self, string: impl AsRef) { trace!("Push str truncate: {}", string.as_ref()); let str = string.as_ref().as_bytes(); @@ -511,6 +469,7 @@ where unsafe { self.push_str_unchecked(str) }; } + #[inline] unsafe fn push_str_unchecked(&mut self, bytes: &[u8]) { core::ptr::copy_nonoverlapping( bytes.as_ptr(), @@ -538,6 +497,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_push(&mut self, ch: char) -> Result<(), OutOfBounds> { trace!("Push: {}", ch); let mut buf = [0; 4]; @@ -565,6 +525,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn truncate(&mut self, size: usize) -> Result<(), Utf8> { debug!("Truncate: {}", size); let len = min(self.len(), size); @@ -585,6 +546,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn pop(&mut self) -> Option { debug!("Pop"); self.as_str().chars().last().map(|ch| { @@ -610,6 +572,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn trim(&mut self) { trace!("Trim"); let mut start = self.len(); @@ -630,9 +593,13 @@ where } end = pos; } - let range = start..end; - self.size = range.clone().count().into_lossy(); - self.array.copy_within(range, 0); + + self.size = end.saturating_sub(start).into_lossy(); + + unsafe { + let ptr = self.array.as_mut_ptr(); + core::ptr::copy(ptr.add(start), ptr, self.len()); + } } /// Removes specified char from `ArrayString` @@ -653,6 +620,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn remove(&mut self, idx: usize) -> Result { debug!("Remove: {}", idx); is_inside_boundary(idx, self.len())?; @@ -685,6 +653,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn retain(&mut self, mut f: impl FnMut(char) -> bool) { trace!("Retain"); // Not the most efficient solution, we could shift left during batch mismatch @@ -716,6 +685,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_insert(&mut self, idx: usize, ch: char) -> Result<(), Error> { let mut buf = [0; 4]; self.try_insert_str(idx, ch.encode_utf8(&mut buf)) @@ -746,19 +716,24 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_insert_str(&mut self, idx: usize, string: impl AsRef) -> Result<(), Error> { trace!("Try insert at {idx} str: {}", string.as_ref()); let str = string.as_ref().as_bytes(); is_inside_boundary(idx, self.len())?; - is_inside_boundary(str.len() + self.len(), Self::capacity())?; + is_inside_boundary(idx + str.len() + self.len(), Self::capacity())?; is_char_boundary(self, idx)?; if str.is_empty() { return Ok(()); } - let this_len = self.len(); - self.array.copy_within(idx..this_len, idx + str.len()); - self.array[idx..idx + str.len()].copy_from_slice(str); - self.size += str.len().into_lossy(); + + unsafe { + let ptr = self.array.as_mut_ptr().add(idx); + core::ptr::copy(ptr, ptr.add(str.len()), self.len().saturating_sub(idx)); + core::ptr::copy(str.as_ptr(), ptr, str.len()); + } + self.size = self.len().saturating_add(str.len()).into_lossy(); + Ok(()) } @@ -789,6 +764,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn insert_str_truncate( &mut self, idx: usize, @@ -805,16 +781,13 @@ where if str.is_empty() { return Ok(()); } - let remaining = usize::min( - size.saturating_sub(str.len()), - self.len().saturating_sub(idx), - ); - if remaining > 0 { - self.array - .copy_within(idx..(idx + remaining), idx + str.len()) + + unsafe { + let ptr = self.array.as_mut_ptr().add(idx); + core::ptr::copy(ptr, ptr.add(str.len()), self.len().saturating_sub(idx)); + core::ptr::copy(str.as_ptr(), ptr, str.len()); } - self.array[idx..idx + str.len()].copy_from_slice(str); - self.size = (idx + str.len() + remaining).into_lossy(); + self.size = self.len().saturating_add(str.len()).into_lossy(); Ok(()) } @@ -833,9 +806,10 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn len(&self) -> usize { trace!("Len"); - self.size.into() + self.size as usize } /// Checks if `ArrayString` is empty. @@ -852,6 +826,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn is_empty(&self) -> bool { trace!("Is empty"); self.len() == 0 @@ -877,6 +852,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn split_off(&mut self, at: usize) -> Result { debug!("Split off"); is_inside_boundary(at, self.len())?; @@ -902,6 +878,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn clear(&mut self) { trace!("Clear"); self.size = 0; @@ -925,6 +902,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn drain(&mut self, range: impl RangeBounds) -> Result, Error> { let start = match range.start_bound() { Bound::Included(t) => *t, @@ -965,7 +943,7 @@ where /// # #[cfg(not(miri))] let _ = env_logger::try_init(); /// let mut s = ArrayString::<23>::try_from_str("ABCD🤔")?; /// s.replace_range(2..4, "EFGHI")?; - /// assert_eq!(s.as_str(), "ABEFGHI🤔"); + /// assert_eq!(s.as_bytes(), "ABEFGHI🤔".as_bytes()); /// /// assert_eq!(s.replace_range(9.., "J"), Err(Error::Utf8)); /// assert_eq!(s.replace_range(..90, "K"), Err(Error::OutOfBounds)); @@ -974,6 +952,7 @@ where /// # } /// ``` #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn replace_range( &mut self, r: impl RangeBounds, @@ -1009,18 +988,19 @@ where (start + str.len() + self.len()).saturating_sub(end), Self::capacity(), )?; - let dest = start + str.len(); + let this_len = self.len(); - self.array.copy_within(end..this_len, dest); - if !str.is_empty() { - self.array[start..start + str.len()].copy_from_slice(str); + unsafe { + let ptr = self.array.as_mut_ptr(); + core::ptr::copy(ptr.add(end), ptr.add(str.len().saturating_add(start)), this_len.saturating_sub(end)); + core::ptr::copy(str.as_ptr(), ptr.add(start), str.len()); } - self.size -= (end.saturating_sub(start)).into_lossy(); - self.size += str.len().into_lossy(); + self.size = self.len().saturating_add(str.len()).saturating_add(start).saturating_sub(end).into_lossy(); Ok(()) } } +/// Temporary hack until const generics constraints are stable pub(crate) mod sealed { use super::*; pub trait ValidCapacity {} diff --git a/src/implementations.rs b/src/implementations.rs index 6b845c1..62f560e 100644 --- a/src/implementations.rs +++ b/src/implementations.rs @@ -7,12 +7,15 @@ use core::ops::{Add, Deref, DerefMut, Index, IndexMut}; use core::ops::{Range, RangeFrom, RangeFull, RangeInclusive, RangeTo, RangeToInclusive}; use core::str::{self, FromStr}; use core::{borrow::Borrow, borrow::BorrowMut, cmp::Ordering, hash::Hash, hash::Hasher}; +#[cfg(all(feature = "no-panic", not(debug_assertions)))] +use no_panic::no_panic; impl Default for ArrayString where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn default() -> Self { Self::new() } @@ -23,6 +26,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn as_ref(&self) -> &str { // Safety: our inner initialized slice should only contain valid utf-8 // There is no way to invalidate the utf-8 of it from safe functions @@ -37,6 +41,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn as_mut(&mut self) -> &mut str { let len = self.len(); // Safety: size will always be between 0 and capacity, so get_unchecked_mut will never fail @@ -55,6 +60,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn as_ref(&self) -> &[u8] { // Safety: size will always be between 0 and capacity, so get_unchecked_mut will never fail debug_assert!(self.len() <= N); @@ -67,6 +73,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn from(s: &str) -> Self { Self::from_str_truncate(s) } @@ -79,6 +86,7 @@ where type Err = OutOfBounds; #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn from_str(s: &str) -> Result { Self::try_from_str(s) } @@ -89,6 +97,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn fmt(&self, f: &mut Formatter) -> fmt::Result { f.debug_struct("ArrayString") .field("array", &self.as_str()) @@ -102,6 +111,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn eq(&self, other: &str) -> bool { self.as_str().eq(other) } @@ -112,6 +122,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn borrow(&self) -> &str { self.as_str() } @@ -122,6 +133,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn borrow_mut(&mut self) -> &mut str { self.as_mut_str() } @@ -132,6 +144,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn hash(&self, hasher: &mut H) { self.as_str().hash(hasher); } @@ -142,6 +155,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn eq(&self, other: &Self) -> bool { self.as_str().eq(other.as_str()) } @@ -154,6 +168,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn cmp(&self, other: &Self) -> Ordering { self.as_str().cmp(other.as_str()) } @@ -164,6 +179,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn partial_cmp(&self, other: &Self) -> Option { Some(self.cmp(other)) } @@ -176,6 +192,7 @@ where type Output = Self; #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn add(mut self, other: &str) -> Self::Output { self.push_str_truncate(other); self @@ -187,6 +204,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn write_str(&mut self, slice: &str) -> fmt::Result { self.try_push_str(slice).map_err(|_| fmt::Error) } @@ -197,6 +215,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn fmt(&self, f: &mut Formatter) -> fmt::Result { write!(f, "{}", self.as_str()) } @@ -209,6 +228,7 @@ where type Target = str; #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn deref(&self) -> &Self::Target { self.as_ref() } @@ -219,6 +239,7 @@ where Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn deref_mut(&mut self) -> &mut Self::Target { self.as_mut() } @@ -228,6 +249,7 @@ impl FromIterator for ArrayString where Self: ValidCapacity, { + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn from_iter>(iter: I) -> Self { Self::from_chars_truncate(iter) } @@ -237,6 +259,7 @@ impl<'a, const N: usize> FromIterator<&'a str> for ArrayString where Self: ValidCapacity, { + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn from_iter>(iter: I) -> Self { Self::from_iterator_truncate(iter) } @@ -246,6 +269,7 @@ impl Extend for ArrayString where Self: ValidCapacity, { + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn extend>(&mut self, iterable: I) { self.push_str_truncate(Self::from_chars_truncate(iterable)) } @@ -255,6 +279,7 @@ impl<'a, const N: usize> Extend<&'a char> for ArrayString where Self: ValidCapacity, { + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn extend>(&mut self, iter: I) { self.extend(iter.into_iter().cloned()); } @@ -264,6 +289,7 @@ impl<'a, const N: usize> Extend<&'a str> for ArrayString where Self: ValidCapacity, { + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn extend>(&mut self, iterable: I) { self.push_str_truncate(Self::from_iterator_truncate(iterable)) } diff --git a/src/integration.rs b/src/integration.rs index 694676d..4f9100c 100644 --- a/src/integration.rs +++ b/src/integration.rs @@ -3,6 +3,9 @@ #[cfg_attr(docs_rs_workaround, doc(cfg(feature = "diesel-traits")))] #[cfg(feature = "diesel-traits")] mod diesel_impl { + #[cfg(all(feature = "no-panic", not(debug_assertions)))] + use no_panic::no_panic; + pub use crate::{arraystring::sealed::ValidCapacity, prelude::*}; #[cfg(feature = "std")] @@ -24,6 +27,8 @@ mod diesel_impl { *const str: FromSql, Self: ValidCapacity, { + #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn from_sql(bytes: RawValue<'_, DB>) -> deserialize::Result { let ptr = <*const str as FromSql>::from_sql(bytes)?; // Safety: We know that the pointer impl will never return null, it's just how diesel implements @@ -37,6 +42,8 @@ mod diesel_impl { str: ToSql, Self: ValidCapacity, { + #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, DB>) -> serialize::Result { self.as_str().to_sql(out) } @@ -48,6 +55,7 @@ mod diesel_impl { *const str: FromSql, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn from_sql(bytes: RawValue<'_, DB>) -> deserialize::Result { Ok(Self(FromSql::from_sql(bytes)?)) } @@ -59,6 +67,7 @@ mod diesel_impl { str: ToSql, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn to_sql<'b>(&'b self, out: &mut Output<'b, '_, DB>) -> serialize::Result { ToSql::::to_sql(&self.0, out) } @@ -68,8 +77,9 @@ mod diesel_impl { #[cfg_attr(docs_rs_workaround, doc(cfg(feature = "serde-traits")))] #[cfg(feature = "serde-traits")] mod serde_impl { + #[cfg(all(feature = "no-panic", not(debug_assertions)))] + use no_panic::no_panic; pub use crate::{arraystring::sealed::ValidCapacity, prelude::*}; - pub use serde::{de::Deserializer, ser::Serializer, Deserialize, Serialize}; impl Serialize for ArrayString @@ -77,6 +87,7 @@ mod serde_impl { Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn serialize(&self, ser: S) -> Result { Serialize::serialize(self.as_str(), ser) } @@ -87,6 +98,7 @@ mod serde_impl { Self: ValidCapacity, { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn deserialize>(des: D) -> Result { <&str>::deserialize(des).map(Self::from_str_truncate) } @@ -94,6 +106,7 @@ mod serde_impl { impl Serialize for CacheString { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn serialize(&self, ser: S) -> Result { self.0.serialize(ser) } @@ -101,6 +114,7 @@ mod serde_impl { impl<'a> Deserialize<'a> for CacheString { #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn deserialize>(des: D) -> Result { Ok(CacheString(Deserialize::deserialize(des)?)) } diff --git a/src/lib.rs b/src/lib.rs index 026c7a8..236a5f5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,9 @@ //! //! *Maximum Capacity is 255* //! -//! *Doesn't allocate memory on the heap and never panics in release (all panic branches are stripped at compile time - except for `Index`/`IndexMut` traits, since they are supposed to)* +//! *Doesn't allocate memory on the heap and should never panic in release (except in `Index`/`IndexMut` traits, since they are supposed to)* +//! +//! *The no panic garantee can be ensured at compilation time with the `no-panic` feature, just be aware that a compiler update might break this garantee, therefore making the crate uncompilable, open an issue if you notice.* //! //! ## Why //! @@ -16,9 +18,9 @@ //! //! But that becomes less true as you increase the array size, 255 bytes is the maximum we accept - [`MaxString`] and it's probably already slower than heap based strings of that size (like in `std::string::String`) //! -//! There are other stack based strings out there, they generally can have "unlimited" capacity (heap allocate), but the stack based size is defined by the library implementor, we go through a different route by implementing a string based in a generic array. +//! There are other stack based strings out there, they generally can have "unlimited" capacity using small string optimizations, but the stack based size is defined by the library implementor. We go through a different route by implementing a string based in a generic array. //! -//! Array based strings always occupies the full space in memory, so they may use more memory (although in the stack) than dynamic strings. +//! Be aware that array based strings always occupy the full space in memory, so they may use more memory (although in the stack) than dynamic strings. //! //! [`capacity`]: ./struct.ArrayString.html#method.capacity //! [`MaxString`]: ./type.MaxString.html @@ -35,6 +37,9 @@ //! - `diesel-traits` enables diesel traits integration //! //! Opperates like `String`, but truncates it if it's bigger than capacity +//! - `no-panic` checks at compile time that the panic function is not linked by the library +//! +//! Only works when all optimizations are enabled, and may break in future compiler updates. Please open an issue if you notice. //! //! - `logs` enables internal logging //! @@ -65,51 +70,6 @@ //! } //! ``` //! -//! ## Comparisons -//! -//! *These benchmarks ran while I streamed video and used my computer (with* **non-disclosed specs**) *as usual, so don't take the actual times too seriously, just focus on the comparison* -//! -//! ```my_custom_benchmark -//! small-string (23 bytes) clone 4.837 ns -//! small-string (23 bytes) try_from_str 14.777 ns -//! small-string (23 bytes) from_str_truncate 11.360 ns -//! small-string (23 bytes) from_str_unchecked 11.291 ns -//! small-string (23 bytes) try_push_str 1.162 ns -//! small-string (23 bytes) push_str 3.490 ns -//! small-string (23 bytes) push_str_unchecked 1.098 ns -//! ------------------------------------------------------------- -//! cache-string (63 bytes) clone 10.170 ns -//! cache-string (63 bytes) try_from_str 25.579 ns -//! cache-string (63 bytes) from_str_truncate 16.977 ns -//! cache-string (63 bytes) from_str_unchecked 17.201 ns -//! cache-string (63 bytes) try_push_str 1.160 ns -//! cache-string (63 bytes) push_str 3.486 ns -//! cache-string (63 bytes) push_str_unchecked 1.115 ns -//! ------------------------------------------------------------- -//! max-string (255 bytes) clone 147.410 ns -//! max-string (255 bytes) try_from_str 157.340 ns -//! max-string (255 bytes) from_str_truncate 158.000 ns -//! max-string (255 bytes) from_str_unchecked 158.420 ns -//! max-string (255 bytes) try_push_str 1.167 ns -//! max-string (255 bytes) push_str 4.337 ns -//! max-string (255 bytes) push_str_unchecked 1.103 ns -//! ------------------------------------------------------------- -//! string (19 bytes) clone 33.295 ns -//! string (19 bytes) from 32.512 ns -//! string (19 bytes) push str 28.128 ns -//! ------------------------------------------------------------- -//! arrayvec string (23 bytes) clone 7.725 ns -//! arrayvec string (23 bytes) from 14.794 ns -//! arrayvec string (23 bytes) push str 1.363 ns -//! ------------------------------------------------------------- -//! inlinable-string (30 bytes) clone 16.751 ns -//! inlinable-string (30 bytes) from_str 29.310 ns -//! inlinable-string (30 bytes) push_str 2.865 ns -//! ------------------------------------------------------------- -//! smallstring crate (20 bytes) clone 60.988 ns -//! smallstring crate (20 bytes) from_str 50.233 ns -//! ``` -//! //! ## Licenses //! //! `MIT` and `Apache-2.0` @@ -174,21 +134,12 @@ pub mod prelude { pub use crate::arraystring::ArrayString; pub use crate::error::Error; -/// String with the same `mem::size_of` of a `String` +/// String with the same `std::mem::size_of` of a `String` (`std::mem::size_of:: * 3`) /// /// 24 bytes in 64 bits architecture /// /// 12 bytes in 32 bits architecture (or others) -#[cfg(target_pointer_width = "64")] -pub type SmallString = ArrayString<23>; - -/// String with the same `mem::size_of` of a `String` -/// -/// 24 bytes in 64 bits architecture -/// -/// 12 bytes in 32 bits architecture (or others) -#[cfg(not(target_pointer_width = "64"))] -pub type SmallString = ArrayString<11>; +pub type SmallString = ArrayString<{ std::mem::size_of::() * 3 }>; /// Biggest array based string (255 bytes of string) pub type MaxString = ArrayString<255>; @@ -358,58 +309,6 @@ mod cache_string { Self(ArrayString::from_chars_truncate(iter)) } - /// Creates new `CacheString` from byte slice, returning [`Utf8`] on invalid utf-8 data or [`OutOfBounds`] if bigger than [`capacity`] - /// - /// [`Utf8`]: ./error/enum.Error.html#variant.Utf8 - /// [`OutOfBounds`]: ./error/enum.Error.html#variant.OutOfBounds - /// [`capacity`]: ./struct.CacheString.html#method.capacity - /// - /// ```rust - /// # use arraystring::{Error, prelude::*}; - /// # fn main() -> Result<(), Error> { - /// # #[cfg(not(miri))] let _ = env_logger::try_init(); - /// let string = CacheString::try_from_utf8("My String")?; - /// assert_eq!(string.as_str(), "My String"); - /// - /// let invalid_utf8 = [0, 159, 146, 150]; - /// assert_eq!(CacheString::try_from_utf8(invalid_utf8), Err(Error::Utf8)); - /// - /// let out_of_bounds = "0000".repeat(400); - /// assert_eq!(CacheString::try_from_utf8(out_of_bounds.as_bytes()), Err(Error::OutOfBounds)); - /// # Ok(()) - /// # } - /// ``` - #[inline] - pub fn try_from_utf8(slice: impl AsRef<[u8]>) -> Result { - Ok(Self(ArrayString::try_from_utf8(slice)?)) - } - - /// Creates new `CacheString` from byte slice, returning [`Utf8`] on invalid utf-8 data, truncating if bigger than [`capacity`]. - /// - /// [`Utf8`]: ./error/struct.Utf8.html - /// [`capacity`]: ./struct.CacheString.html#method.capacity - /// - /// ```rust - /// # use arraystring::{Error, prelude::*}; - /// # fn main() -> Result<(), Error> { - /// # #[cfg(not(miri))] let _ = env_logger::try_init(); - /// let string = CacheString::from_utf8_truncate("My String")?; - /// assert_eq!(string.as_str(), "My String"); - /// - /// let invalid_utf8 = [0, 159, 146, 150]; - /// assert_eq!(CacheString::from_utf8_truncate(invalid_utf8), Err(Utf8)); - /// - /// let out_of_bounds = "0".repeat(300); - /// assert_eq!(CacheString::from_utf8_truncate(out_of_bounds.as_bytes())?.as_str(), - /// "0".repeat(CacheString::capacity()).as_str()); - /// # Ok(()) - /// # } - /// ``` - #[inline] - pub fn from_utf8_truncate(slice: impl AsRef<[u8]>) -> Result { - Ok(Self(ArrayString::from_utf8_truncate(slice)?)) - } - /// Creates new `CacheString` from `u16` slice, returning [`Utf16`] on invalid utf-16 data or [`OutOfBounds`] if bigger than [`capacity`] /// /// [`Utf16`]: ./error/enum.Error.html#variant.Utf16 diff --git a/src/utils.rs b/src/utils.rs index 73b5826..4465b6c 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,7 +3,7 @@ use crate::{arraystring::sealed::ValidCapacity, prelude::*}; #[cfg(feature = "logs")] use log::trace; -#[cfg(not(debug_assertions))] +#[cfg(all(feature = "no-panic", not(debug_assertions)))] use no_panic::no_panic; pub(crate) trait IntoLossy: Sized { @@ -11,8 +11,8 @@ pub(crate) trait IntoLossy: Sized { } /// Returns error if size is outside of specified boundary -#[cfg_attr(not(debug_assertions), no_panic)] #[inline] +#[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub(crate) fn is_inside_boundary(size: usize, limit: usize) -> Result<(), OutOfBounds> { trace!("Out of bounds: ensures {} <= {}", size, limit); (size <= limit).then_some(()).ok_or(OutOfBounds) @@ -20,6 +20,7 @@ pub(crate) fn is_inside_boundary(size: usize, limit: usize) -> Result<(), OutOfB /// Returns error if index is not at a valid utf-8 char boundary #[inline] +#[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub(crate) fn is_char_boundary(s: &ArrayString, idx: usize) -> Result<(), Utf8> where ArrayString: ValidCapacity, @@ -32,6 +33,7 @@ where } #[inline] +#[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub(crate) unsafe fn is_char_boundary_at(arr: &[u8], index: usize) -> bool { if index == 0 { return true; @@ -41,7 +43,7 @@ pub(crate) unsafe fn is_char_boundary_at(arr: &[u8], index: usize) -> bool { /// Truncates string to specified size (ignoring last bytes if they form a partial `char`) #[inline] -#[cfg_attr(not(debug_assertions), no_panic)] +#[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub(crate) fn truncate_str(slice: &[u8], mut size: usize) -> &[u8] { trace!("Truncate str: {:?} at {size}", core::str::from_utf8(slice),); if size >= slice.len() { @@ -70,6 +72,7 @@ pub(crate) fn truncate_str(slice: &[u8], mut size: usize) -> &[u8] { impl IntoLossy for usize { #[allow(clippy::cast_possible_truncation)] #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn into_lossy(self) -> u8 { self as u8 } diff --git a/tests/string_parity.rs b/tests/string_parity.rs index e568611..05c09f9 100644 --- a/tests/string_parity.rs +++ b/tests/string_parity.rs @@ -64,50 +64,6 @@ fn from_iter() { ); } -#[test] -fn try_from_utf8() { - assert( - |s| String::from_utf8(s.as_bytes().to_vec()), - |s| TestString::try_from_utf8(s.as_bytes()), - ); -} - -#[test] -fn from_utf8() { - assert( - |s| String::from_utf8(s.as_bytes().to_vec()), - |s| TestString::from_utf8_truncate(s.as_bytes()), - ); -} - -#[inline] -fn invalidate_utf8(buf: &str) -> Vec { - let mut buf = buf.as_bytes().to_vec(); - if buf.len() >= 4 { - buf[0] = 0; - buf[1] = 159; - buf[2] = 146; - buf[3] = 150; - } - buf -} - -#[test] -fn try_from_utf8_invalid() { - assert( - |s| String::from_utf8(invalidate_utf8(s)), - |s| TestString::try_from_utf8(invalidate_utf8(s)), - ); -} - -#[test] -fn from_utf8_invalid() { - assert( - |s| String::from_utf8(invalidate_utf8(s)), - |s| TestString::from_utf8_truncate(invalidate_utf8(s)), - ); -} - #[test] fn try_from_utf16() { let utf16 = |s: &str| s.encode_utf16().collect::>(); From 2fbba4f12f9f872ce8be237f8b3d59e878530309 Mon Sep 17 00:00:00 2001 From: Paulo Cabral Sanz Date: Sat, 24 Dec 2022 02:28:37 -0300 Subject: [PATCH 2/2] Use replace_range internally for most things --- README.md | 22 ++++++ src/arraystring.rs | 150 +++++++++++++++-------------------------- src/implementations.rs | 19 ++++-- src/integration.rs | 14 ++-- src/lib.rs | 11 ++- src/utils.rs | 27 +++----- 6 files changed, 115 insertions(+), 128 deletions(-) diff --git a/README.md b/README.md index 9d8a66e..3b1364c 100644 --- a/README.md +++ b/README.md @@ -74,6 +74,28 @@ fn main() -> Result<(), Error> { } ``` +# Miri + +Tests can be run through Miri to ensure Undefined Behavior isn't triggered by them. It excludes diesel's integration `sqlite` tests as it's impossible to link to C libraries from Miri. And logs won't be persisted in doc tests as `env_logger` isn't supported by Miri either. + +To run the tests with it do (requires nightly installed): + +`cargo +nightly miri test --release --all-features` + +# No Panic + +There is a feature to enable the `no_panic` dependency, that will be enforced in every function. To be sure every panicking branch is removed. This depends on compiler optimizations and compilation may break on updates, or in different environments. We generally don't recommend using it, but it's useful in environments with high restrictions. + +This feature will only be enforced in `release` builds (it checks for `not(debug_assertions)`) + +But it's mostly used to test the library. + +To run the tests with it do: + +`cargo test --lib --tests --release --features=no-panic` + +Both serde and diesel integrations can panic. Index trait implementations too. + ## Licenses [MIT](master/license/MIT) and [Apache-2.0](master/license/APACHE) diff --git a/src/arraystring.rs b/src/arraystring.rs index 78faa9a..21037ec 100644 --- a/src/arraystring.rs +++ b/src/arraystring.rs @@ -335,7 +335,7 @@ where #[inline] #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn as_str(&self) -> &str { - trace!("As str: {self}"); + trace!("As str"); self.as_ref() } @@ -429,13 +429,9 @@ where #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_push_str(&mut self, string: impl AsRef) -> Result<(), OutOfBounds> { trace!("Push str: {}", string.as_ref()); - let str = string.as_ref(); - if str.is_empty() { - return Ok(()); - } - is_inside_boundary(str.len() + self.len(), Self::capacity())?; - unsafe { self.push_str_unchecked(str.as_bytes()) }; - Ok(()) + let len = self.len(); + self.replace_range(len..len, string) + .map_err(|_| OutOfBounds) } /// Pushes string slice to the end of the `ArrayString` truncating total size if bigger than [`capacity`]. @@ -460,23 +456,7 @@ where #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn push_str_truncate(&mut self, string: impl AsRef) { trace!("Push str truncate: {}", string.as_ref()); - let str = string.as_ref().as_bytes(); - let size = Self::capacity().saturating_sub(self.len()); - if size == 0 || str.is_empty() { - return; - } - let str = truncate_str(str, size); - unsafe { self.push_str_unchecked(str) }; - } - - #[inline] - unsafe fn push_str_unchecked(&mut self, bytes: &[u8]) { - core::ptr::copy_nonoverlapping( - bytes.as_ptr(), - self.array.as_mut_ptr().add(self.len()), - bytes.len(), - ); - self.size += bytes.len().into_lossy(); + let _ = self.try_push_str(truncate_str(string.as_ref(), Self::capacity() - self.len())); } /// Inserts character to the end of the `ArrayString` erroring if total size if bigger than [`capacity`]. @@ -574,7 +554,7 @@ where #[inline] #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn trim(&mut self) { - trace!("Trim"); + trace!("Trim: {self:?}"); let mut start = self.len(); for (pos, char) in self.as_str().char_indices() { if !char.is_whitespace() { @@ -594,12 +574,9 @@ where end = pos; } - self.size = end.saturating_sub(start).into_lossy(); - - unsafe { - let ptr = self.array.as_mut_ptr(); - core::ptr::copy(ptr.add(start), ptr, self.len()); - } + let _ = self.replace_range(..start, ""); + // Note: Garanteed to not overflow but that should not be the bottleneck + let _ = self.replace_range(end.saturating_sub(start).., ""); } /// Removes specified char from `ArrayString` @@ -623,21 +600,23 @@ where #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn remove(&mut self, idx: usize) -> Result { debug!("Remove: {}", idx); - is_inside_boundary(idx, self.len())?; + is_inside_boundary(idx.saturating_add(1), self.len())?; is_char_boundary(self, idx)?; - let char = unsafe { - self.as_str() - .get_unchecked(idx..) - .chars() - .next() - .ok_or(OutOfBounds)? - }; - let len = char.len_utf8(); - if idx + len < self.len() { - self.array.copy_within(idx + len.., idx); + + let mut end = if idx == self.len() { idx } else { idx + 1 }; + while !self.as_str().is_char_boundary(end) { + end += 1; } - self.size -= len.into_lossy(); - Ok(char) + + let ch = self + .as_str() + .get(idx..end) + .ok_or(Error::OutOfBounds)? + .chars() + .next() + .ok_or(Error::Utf8)?; + self.replace_range(idx..idx + ch.len_utf8(), "")?; + Ok(ch) } /// Retains only the characters specified by the predicate. @@ -707,8 +686,7 @@ where /// let mut s = ArrayString::<23>::try_from_str("ABCD🤔")?; /// s.try_insert_str(1, "AB")?; /// s.try_insert_str(1, "BC")?; - /// assert_eq!(s.try_insert_str(1, "0".repeat(ArrayString::<23>::capacity())), - /// Err(Error::OutOfBounds)); + /// assert_eq!(s.try_insert_str(1, "0".repeat(ArrayString::<23>::capacity())), Err(Error::OutOfBounds)); /// assert_eq!(s.as_str(), "ABCABBCD🤔"); /// assert_eq!(s.try_insert_str(20, "C"), Err(Error::OutOfBounds)); /// assert_eq!(s.try_insert_str(10, "D"), Err(Error::Utf8)); @@ -718,23 +696,9 @@ where #[inline] #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn try_insert_str(&mut self, idx: usize, string: impl AsRef) -> Result<(), Error> { - trace!("Try insert at {idx} str: {}", string.as_ref()); - let str = string.as_ref().as_bytes(); + trace!("Try insert at {idx} str: {:?} to {self:?}", string.as_ref()); is_inside_boundary(idx, self.len())?; - is_inside_boundary(idx + str.len() + self.len(), Self::capacity())?; - is_char_boundary(self, idx)?; - if str.is_empty() { - return Ok(()); - } - - unsafe { - let ptr = self.array.as_mut_ptr().add(idx); - core::ptr::copy(ptr, ptr.add(str.len()), self.len().saturating_sub(idx)); - core::ptr::copy(str.as_ptr(), ptr, str.len()); - } - self.size = self.len().saturating_add(str.len()).into_lossy(); - - Ok(()) + self.replace_range(idx..idx, string) } /// Inserts string slice at specified index, truncating size if bigger than [`capacity`]. @@ -770,24 +734,11 @@ where idx: usize, string: impl AsRef, ) -> Result<(), Error> { - trace!("Insert str at {idx}: {}", string.as_ref()); - is_inside_boundary(idx, self.len())?; - is_char_boundary(self, idx)?; - let size = Self::capacity().saturating_sub(idx); - if size == 0 { - return Ok(()); - } - let str = truncate_str(string.as_ref().as_bytes(), size); - if str.is_empty() { - return Ok(()); - } - - unsafe { - let ptr = self.array.as_mut_ptr().add(idx); - core::ptr::copy(ptr, ptr.add(str.len()), self.len().saturating_sub(idx)); - core::ptr::copy(str.as_ptr(), ptr, str.len()); - } - self.size = self.len().saturating_add(str.len()).into_lossy(); + trace!("Insert str at {idx}: {} to {self}", string.as_ref()); + self.try_insert_str( + idx, + truncate_str(string.as_ref(), Self::capacity() - self.len()), + )?; Ok(()) } @@ -855,13 +806,8 @@ where #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] pub fn split_off(&mut self, at: usize) -> Result { debug!("Split off"); - is_inside_boundary(at, self.len())?; - is_char_boundary(self, at)?; - debug_assert!(at <= self.len() && self.as_str().is_char_boundary(at)); - let new = - unsafe { Self::try_from_str(self.as_str().get_unchecked(at..)).unwrap_unchecked() }; - self.size = at.into_lossy(); - Ok(new) + let drained = self.drain(at..)?.0; + Ok(drained) } /// Empties `ArrayString` @@ -926,7 +872,7 @@ where let drain = unsafe { let slice = self.as_str().get_unchecked(start..end); - Self::try_from_str(slice).unwrap_unchecked() + Self::try_from_str(slice)? }; self.array.copy_within(end.., start); self.size = self @@ -943,7 +889,7 @@ where /// # #[cfg(not(miri))] let _ = env_logger::try_init(); /// let mut s = ArrayString::<23>::try_from_str("ABCD🤔")?; /// s.replace_range(2..4, "EFGHI")?; - /// assert_eq!(s.as_bytes(), "ABEFGHI🤔".as_bytes()); + /// assert_eq!(s, "ABEFGHI🤔"); /// /// assert_eq!(s.replace_range(9.., "J"), Err(Error::Utf8)); /// assert_eq!(s.replace_range(..90, "K"), Err(Error::OutOfBounds)); @@ -982,20 +928,30 @@ where } is_inside_boundary(start, end)?; is_inside_boundary(end, self.len())?; + is_inside_boundary(str.len(), Self::capacity())?; is_char_boundary(self, start)?; is_char_boundary(self, end)?; - is_inside_boundary( - (start + str.len() + self.len()).saturating_sub(end), - Self::capacity(), - )?; + // Will never overflow since start < end and str.len() cannot be bigger than 255 + is_inside_boundary(self.len() + str.len() + start - end, Self::capacity())?; let this_len = self.len(); unsafe { let ptr = self.array.as_mut_ptr(); - core::ptr::copy(ptr.add(end), ptr.add(str.len().saturating_add(start)), this_len.saturating_sub(end)); - core::ptr::copy(str.as_ptr(), ptr.add(start), str.len()); + core::ptr::copy( + ptr.add(end), + ptr.add(str.len().saturating_add(start)), + this_len.saturating_sub(end), + ); + if !str.is_empty() { + core::ptr::copy(str.as_ptr(), ptr.add(start), str.len()); + } } - self.size = self.len().saturating_add(str.len()).saturating_add(start).saturating_sub(end).into_lossy(); + self.size = self + .len() + .saturating_add(str.len()) + .saturating_add(start) + .saturating_sub(end) + .into_lossy(); Ok(()) } } diff --git a/src/implementations.rs b/src/implementations.rs index 62f560e..4a9c3db 100644 --- a/src/implementations.rs +++ b/src/implementations.rs @@ -28,7 +28,7 @@ where #[inline] #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn as_ref(&self) -> &str { - // Safety: our inner initialized slice should only contain valid utf-8 + // Safety: our byte slice should only contain valid utf-8 // There is no way to invalidate the utf-8 of it from safe functions // And it's a invariant expected to be kept in unsafe functions debug_assert!(str::from_utf8(self.as_ref()).is_ok()); @@ -44,10 +44,10 @@ where #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn as_mut(&mut self) -> &mut str { let len = self.len(); - // Safety: size will always be between 0 and capacity, so get_unchecked_mut will never fail + // Safety: len will always be between 0 and capacity, so get_unchecked_mut will never fail debug_assert!(len <= N); let slice = unsafe { self.array.as_mut_slice().get_unchecked_mut(..len) }; - // Safety: our inner initialized slice should only contain valid utf-8 + // Safety: our byte slice should only contain valid utf-8 // There is no way to invalidate the utf-8 of it from safe functions // And it's a invariant expected to be kept in unsafe functions debug_assert!(str::from_utf8(slice).is_ok()); @@ -62,7 +62,7 @@ where #[inline] #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn as_ref(&self) -> &[u8] { - // Safety: size will always be between 0 and capacity, so get_unchecked_mut will never fail + // Safety: self.len() will always be between 0 and capacity, so get_unchecked_mut will never fail debug_assert!(self.len() <= N); unsafe { self.array.as_slice().get_unchecked(..self.len()) } } @@ -117,6 +117,17 @@ where } } +impl PartialEq<&str> for ArrayString +where + Self: ValidCapacity, +{ + #[inline] + #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] + fn eq(&self, other: &&str) -> bool { + self.eq(*other) + } +} + impl Borrow for ArrayString where Self: ValidCapacity, diff --git a/src/integration.rs b/src/integration.rs index 4f9100c..9e1b32f 100644 --- a/src/integration.rs +++ b/src/integration.rs @@ -31,7 +31,7 @@ mod diesel_impl { #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] fn from_sql(bytes: RawValue<'_, DB>) -> deserialize::Result { let ptr = <*const str as FromSql>::from_sql(bytes)?; - // Safety: We know that the pointer impl will never return null, it's just how diesel implements + // Safety: We know that the pointer impl will never return null. We copied diesel's implementation for String debug_assert!(!ptr.is_null()); Ok(Self::from_str_truncate(unsafe { &*ptr })) } @@ -77,9 +77,9 @@ mod diesel_impl { #[cfg_attr(docs_rs_workaround, doc(cfg(feature = "serde-traits")))] #[cfg(feature = "serde-traits")] mod serde_impl { + pub use crate::{arraystring::sealed::ValidCapacity, prelude::*}; #[cfg(all(feature = "no-panic", not(debug_assertions)))] use no_panic::no_panic; - pub use crate::{arraystring::sealed::ValidCapacity, prelude::*}; pub use serde::{de::Deserializer, ser::Serializer, Deserialize, Serialize}; impl Serialize for ArrayString @@ -180,7 +180,7 @@ mod tests { } #[cfg(all(feature = "diesel-traits", feature = "std"))] - use diesel::{dsl, insert_into, mysql, pg, prelude::*}; + use diesel::{dsl, mysql, pg, prelude::*}; #[cfg(all(feature = "diesel-traits", feature = "std"))] table! { @@ -262,7 +262,7 @@ mod tests { } #[test] - #[cfg(all(feature = "diesel-traits", feature = "std"))] + #[cfg(all(feature = "diesel-traits", feature = "std", not(miri)))] fn diesel_derive_query_sqlite() { let mut conn = diesel::sqlite::SqliteConnection::establish(":memory:").unwrap(); let _ = diesel::sql_query("CREATE TABLE derives (id INTEGER, name VARCHAR(32));") @@ -273,7 +273,7 @@ mod tests { name: ArrayString::try_from_str("Name1").unwrap(), }; - let _ = insert_into(derives::table) + let _ = diesel::insert_into(derives::table) .values(&string) .execute(&mut conn) .unwrap(); @@ -283,7 +283,7 @@ mod tests { } #[test] - #[cfg(all(feature = "diesel-traits", feature = "std"))] + #[cfg(all(feature = "diesel-traits", feature = "std", not(miri)))] fn diesel_derive2_query_sqlite() { let mut conn = diesel::sqlite::SqliteConnection::establish(":memory:").unwrap(); let _ = diesel::sql_query("CREATE TABLE derives (id INTEGER, name VARCHAR(32));") @@ -294,7 +294,7 @@ mod tests { name: CacheString(ArrayString::try_from_str("Name1").unwrap()), }; - let _ = insert_into(derives::table) + let _ = diesel::insert_into(derives::table) .values(&string) .execute(&mut conn) .unwrap(); diff --git a/src/lib.rs b/src/lib.rs index 236a5f5..9e92a31 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -134,12 +134,12 @@ pub mod prelude { pub use crate::arraystring::ArrayString; pub use crate::error::Error; -/// String with the same `std::mem::size_of` of a `String` (`std::mem::size_of:: * 3`) +/// String with the same `core::mem::size_of` of a `String` (`core::mem::size_of:: * 3`) /// /// 24 bytes in 64 bits architecture /// /// 12 bytes in 32 bits architecture (or others) -pub type SmallString = ArrayString<{ std::mem::size_of::() * 3 }>; +pub type SmallString = ArrayString<{ core::mem::size_of::() * 3 }>; /// Biggest array based string (255 bytes of string) pub type MaxString = ArrayString<255>; @@ -987,6 +987,13 @@ mod cache_string { } } + impl PartialEq<&str> for CacheString { + #[inline] + fn eq(&self, other: &&str) -> bool { + self.0.eq(other) + } + } + impl Borrow for CacheString { #[inline] fn borrow(&self) -> &str { diff --git a/src/utils.rs b/src/utils.rs index 4465b6c..bd58802 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -32,36 +32,27 @@ where Err(Utf8) } -#[inline] -#[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] -pub(crate) unsafe fn is_char_boundary_at(arr: &[u8], index: usize) -> bool { - if index == 0 { - return true; - } - (*arr.get_unchecked(index) as i8) >= -0x40 -} - /// Truncates string to specified size (ignoring last bytes if they form a partial `char`) #[inline] #[cfg_attr(all(feature = "no-panic", not(debug_assertions)), no_panic)] -pub(crate) fn truncate_str(slice: &[u8], mut size: usize) -> &[u8] { - trace!("Truncate str: {:?} at {size}", core::str::from_utf8(slice),); +pub(crate) fn truncate_str(slice: &str, mut size: usize) -> &str { + trace!("Truncate str: {slice} at {size}"); if size >= slice.len() { return slice; } // Safety: size will always be between 0 and capacity, so get_unchecked will never fail // We unroll the loop here as a utf-8 character can have at most 4 bytes, so decreasing 4 - // times ensures we will find the char boundary. `is_char_boundary_at` returns true for 0 index + // times ensures we will always find the char boundary. `str::is_char_boundary` returns true for 0 index unsafe { - if is_char_boundary_at(slice, size) { + if slice.is_char_boundary(size) { return slice.get_unchecked(..size); } size -= 1; - if is_char_boundary_at(slice, size) { + if slice.is_char_boundary(size) { return slice.get_unchecked(..size); } size -= 1; - if is_char_boundary_at(slice, size) { + if slice.is_char_boundary(size) { return slice.get_unchecked(..size); } size -= 1; @@ -84,8 +75,8 @@ mod tests { #[test] fn truncate() { - assert_eq!(truncate_str(b"i", 10), b"i"); - assert_eq!(truncate_str(b"iiiiii", 3), b"iii"); - assert_eq!(truncate_str("🤔🤔🤔".as_bytes(), 5), "🤔".as_bytes()); + assert_eq!(truncate_str("i", 10), "i"); + assert_eq!(truncate_str("iiiiii", 3), "iii"); + assert_eq!(truncate_str("🤔🤔🤔", 5), "🤔"); } }