Skip to content

Commit

Permalink
add new trait to encode values to bytes
Browse files Browse the repository at this point in the history
still lots of TODOs
  • Loading branch information
antonilol committed Aug 17, 2024
1 parent 817b373 commit 98c1031
Show file tree
Hide file tree
Showing 10 changed files with 245 additions and 117 deletions.
63 changes: 62 additions & 1 deletion heed-traits/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,80 @@
use std::borrow::Cow;
use std::cmp::{Ord, Ordering};
use std::error::Error as StdError;
use std::fmt;

/// A boxed `Send + Sync + 'static` error.
pub type BoxedError = Box<dyn StdError + Send + Sync + 'static>;

/// A trait that represents an encoding structure.
pub trait BytesEncode<'a> {
#[deprecated = "replaced by `ToBytes` to allow for more optimization"]
#[allow(deprecated)] // deprecated BoxedErrorWrapper is used in a bound
pub trait BytesEncode<'a>:
// TODO are these bound needed?
ToBytes<'a, SelfType = Self::EItem, ReturnBytes = Cow<'a, [u8]>, Error = BoxedErrorWrapper>
{
/// The type to encode.
type EItem: ?Sized + 'a;

/// Encode the given item as bytes.
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError>;
}

/// A trait that represents an encoding structure.
pub trait ToBytes<'a> {
/// The type to encode to bytes.
type SelfType: ?Sized + 'a;

/// The type containing the encoded bytes.
type ReturnBytes: Into<Vec<u8>> + AsRef<[u8]> + 'a;

/// The error type to return when decoding goes wrong.
type Error: StdError + Send + Sync + 'static;

/// Encode the given item as bytes.
fn to_bytes(item: &'a Self::SelfType) -> Result<Self::ReturnBytes, Self::Error>;
}

#[allow(deprecated)]
impl<'a, T: BytesEncode<'a>> ToBytes<'a> for T {
type SelfType = <Self as BytesEncode<'a>>::EItem;

type ReturnBytes = Cow<'a, [u8]>;

type Error = BoxedErrorWrapper;

fn to_bytes(item: &'a Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
Self::bytes_encode(item).map_err(BoxedErrorWrapper)
}
}

/// Wraps the [`BoxedError`] type alias because for complicated reasons it does not implement
/// [`Error`][StdError]. This wrapper forwards [`Debug`][fmt::Debug], [`Display`][fmt::Display]
/// and [`Error`][StdError] through the wrapper and the [`Box`].
#[deprecated = "this wrapper was added for backwards compatibility of BytesEncode only"]
pub struct BoxedErrorWrapper(BoxedError);

#[allow(deprecated)]
impl fmt::Debug for BoxedErrorWrapper {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
<BoxedError as fmt::Debug>::fmt(&self.0, f)
}
}

#[allow(deprecated)]
impl fmt::Display for BoxedErrorWrapper {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
<BoxedError as fmt::Display>::fmt(&self.0, f)
}
}

#[allow(deprecated)]
impl StdError for BoxedErrorWrapper {
fn source(&self) -> Option<&(dyn StdError + 'static)> {
self.0.source()
}
}

/// A trait that represents a decoding structure.
pub trait BytesDecode<'a> {
/// The type to decode.
Expand Down
39 changes: 33 additions & 6 deletions heed-types/src/bytes.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
use std::borrow::Cow;
use std::convert::Infallible;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use heed_traits::{BoxedError, BytesDecode, ToBytes};

/// Describes a byte slice `[u8]` that is totally borrowed and doesn't depend on
/// any [memory alignment].
///
/// [memory alignment]: std::mem::align_of()
pub enum Bytes {}

impl<'a> BytesEncode<'a> for Bytes {
type EItem = [u8];
impl<'a> ToBytes<'a> for Bytes {
type SelfType = [u8];

fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(item))
type ReturnBytes = &'a [u8];

type Error = Infallible;

fn to_bytes(item: &'a Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
Ok(item)
}
}

Expand All @@ -23,3 +27,26 @@ impl<'a> BytesDecode<'a> for Bytes {
Ok(bytes)
}
}

/// Like [`Bytes`], but always contains exactly `N` (the generic parameter) bytes.
pub enum FixedSizeBytes<const N: usize> {}

impl<'a, const N: usize> ToBytes<'a> for FixedSizeBytes<N> {
type SelfType = [u8; N];

type ReturnBytes = [u8; N]; // TODO &'a [u8; N] or [u8; N]

type Error = Infallible;

fn to_bytes(item: &'a Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
Ok(*item)
}
}

impl<'a, const N: usize> BytesDecode<'a> for FixedSizeBytes<N> {
type DItem = [u8; N]; // TODO &'a [u8; N] or [u8; N]

fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
bytes.try_into().map_err(Into::into)
}
}
42 changes: 27 additions & 15 deletions heed-types/src/integer.rs
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
use std::borrow::Cow;
use std::convert::Infallible;
use std::marker::PhantomData;
use std::mem::size_of;

use byteorder::{ByteOrder, ReadBytesExt};
use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use heed_traits::{BoxedError, BytesDecode, ToBytes};

/// Encodable version of [`u8`].
pub struct U8;

impl BytesEncode<'_> for U8 {
type EItem = u8;
impl ToBytes<'_> for U8 {
type SelfType = u8;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::from([*item].to_vec()))
type ReturnBytes = [u8; 1];

type Error = Infallible;

fn to_bytes(item: &Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
Ok([*item])
}
}

Expand All @@ -27,11 +31,15 @@ impl BytesDecode<'_> for U8 {
/// Encodable version of [`i8`].
pub struct I8;

impl BytesEncode<'_> for I8 {
type EItem = i8;
impl ToBytes<'_> for I8 {
type SelfType = i8;

type ReturnBytes = [u8; 1];

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::from([*item as u8].to_vec()))
type Error = Infallible;

fn to_bytes(item: &Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
Ok([*item as u8])
}
}

Expand All @@ -51,13 +59,17 @@ macro_rules! define_type {

pub struct $name<O>(PhantomData<O>);

impl<O: ByteOrder> BytesEncode<'_> for $name<O> {
type EItem = $native;
impl<O: ByteOrder> ToBytes<'_> for $name<O> {
type SelfType = $native;

type ReturnBytes = [u8; size_of::<$native>()];

type Error = Infallible;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut buf = vec![0; size_of::<Self::EItem>()];
fn to_bytes(item: &Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
let mut buf = [0; size_of::<$native>()];
O::$write_method(&mut buf, *item);
Ok(Cow::from(buf))
Ok(buf)
}
}

Expand Down
16 changes: 9 additions & 7 deletions heed-types/src/serde_bincode.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
use std::borrow::Cow;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use heed_traits::{BoxedError, BytesDecode, ToBytes};
use serde::{Deserialize, Serialize};

/// Describes a type that is [`Serialize`]/[`Deserialize`] and uses `bincode` to do so.
///
/// It can borrow bytes from the original slice.
pub struct SerdeBincode<T>(std::marker::PhantomData<T>);

impl<'a, T: 'a> BytesEncode<'a> for SerdeBincode<T>
impl<'a, T: 'a> ToBytes<'a> for SerdeBincode<T>
where
T: Serialize,
{
type EItem = T;
type SelfType = T;

type ReturnBytes = Vec<u8>;

type Error = bincode::Error;

fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
bincode::serialize(item).map(Cow::Owned).map_err(Into::into)
fn to_bytes(item: &'a Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
bincode::serialize(item)
}
}

Expand Down
16 changes: 9 additions & 7 deletions heed-types/src/serde_json.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
use std::borrow::Cow;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use heed_traits::{BoxedError, BytesDecode, ToBytes};
use serde::{Deserialize, Serialize};

/// Describes a type that is [`Serialize`]/[`Deserialize`] and uses `serde_json` to do so.
///
/// It can borrow bytes from the original slice.
pub struct SerdeJson<T>(std::marker::PhantomData<T>);

impl<'a, T: 'a> BytesEncode<'a> for SerdeJson<T>
impl<'a, T: 'a> ToBytes<'a> for SerdeJson<T>
where
T: Serialize,
{
type EItem = T;
type SelfType = T;

type ReturnBytes = Vec<u8>;

type Error = serde_json::Error;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
serde_json::to_vec(item).map(Cow::Owned).map_err(Into::into)
fn to_bytes(item: &'a Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
serde_json::to_vec(item)
}
}

Expand Down
16 changes: 9 additions & 7 deletions heed-types/src/serde_rmp.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,23 @@
use std::borrow::Cow;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use heed_traits::{BoxedError, BytesDecode, ToBytes};
use serde::{Deserialize, Serialize};

/// Describes a type that is [`Serialize`]/[`Deserialize`] and uses `rmp_serde` to do so.
///
/// It can borrow bytes from the original slice.
pub struct SerdeRmp<T>(std::marker::PhantomData<T>);

impl<'a, T: 'a> BytesEncode<'a> for SerdeRmp<T>
impl<'a, T: 'a> ToBytes<'a> for SerdeRmp<T>
where
T: Serialize,
{
type EItem = T;
type SelfType = T;

type ReturnBytes = Vec<u8>;

type Error = rmp_serde::encode::Error;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
rmp_serde::to_vec(item).map(Cow::Owned).map_err(Into::into)
fn to_bytes(item: &'a Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
rmp_serde::to_vec(item)
}
}

Expand Down
21 changes: 12 additions & 9 deletions heed-types/src/str.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,26 @@
use std::borrow::Cow;
use std::str;
use std::convert::Infallible;

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use heed_traits::{BoxedError, BytesDecode, ToBytes};

/// Describes a [`prim@str`].
/// Describes a [`str`].
pub enum Str {}

impl BytesEncode<'_> for Str {
type EItem = str;
impl<'a> ToBytes<'a> for Str {
type SelfType = str;

fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
type ReturnBytes = &'a [u8];

type Error = Infallible;

fn to_bytes(item: &'a Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
Ok(item.as_bytes())
}
}

impl<'a> BytesDecode<'a> for Str {
type DItem = &'a str;

fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
str::from_utf8(bytes).map_err(Into::into)
std::str::from_utf8(bytes).map_err(Into::into)
}
}
16 changes: 10 additions & 6 deletions heed-types/src/unit.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
use std::borrow::Cow;
use std::convert::Infallible;
use std::{error, fmt};

use heed_traits::{BoxedError, BytesDecode, BytesEncode};
use heed_traits::{BoxedError, BytesDecode, ToBytes};

/// Describes the unit `()` type.
pub enum Unit {}

impl BytesEncode<'_> for Unit {
type EItem = ();
impl ToBytes<'_> for Unit {
type SelfType = ();

fn bytes_encode(_item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
Ok(Cow::Borrowed(&[]))
type ReturnBytes = [u8; 0];

type Error = Infallible;

fn to_bytes(_item: &'_ Self::SelfType) -> Result<Self::ReturnBytes, Self::Error> {
Ok([])
}
}

Expand Down
Loading

0 comments on commit 98c1031

Please sign in to comment.