diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2c96eb1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +target/ +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..6650136 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,6 @@ +[workspace] + +members = [ + "haskell-ffi", + "haskell-ffi-derive", +] diff --git a/README.md b/README.md new file mode 100644 index 0000000..0701c68 --- /dev/null +++ b/README.md @@ -0,0 +1,2 @@ +# Rust library for easy interop with Haskell + diff --git a/haskell-ffi-derive/Cargo.toml b/haskell-ffi-derive/Cargo.toml new file mode 100644 index 0000000..1ca1dc5 --- /dev/null +++ b/haskell-ffi-derive/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "haskell-ffi-derive" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +syn = "1.0" +quote = "1.0" +proc-macro2 = "1.0" \ No newline at end of file diff --git a/haskell-ffi-derive/src/lib.rs b/haskell-ffi-derive/src/lib.rs new file mode 100644 index 0000000..671a594 --- /dev/null +++ b/haskell-ffi-derive/src/lib.rs @@ -0,0 +1,85 @@ +//! Macro for deriving `HaskellSize` instances for structs +//! +//! Implementation is adapted from the `heapsize` example in the `syn` crate. +//! The implementation is not identical, however: `haskell_size` does not take +//! any value as input, but is entirely type-based. + +use proc_macro2::TokenStream; +use quote::quote; +use syn::{ + parse_macro_input, parse_quote, punctuated::Iter, Data, DeriveInput, Field, Fields, + GenericParam, Generics, +}; + +/// Derive `HaskellSize` instance +/// +/// NOTE: Only structs are currently supported. +#[proc_macro_derive(HaskellSize)] +pub fn haskell_size_derive(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + // Parse the input tokens into a syntax tree. + let input: DeriveInput = parse_macro_input!(input as DeriveInput); + + // Used in the quasi-quotation below as `#name`. + let name = &input.ident; + + // Add a bound `T: HaskellSize` to every type parameter T. + let without_tag: Generics = add_trait_bounds(input.generics); + + // The instance itself must get an additional `Tag` argument + // + // NOTE: Things will go badly if one of the user's parameters is also named `Tag`. + let mut including_tag: Generics = without_tag.clone(); + including_tag + .params + .push(GenericParam::Type(parse_quote!(Tag))); + + let (including_tag_impl, _, _) = including_tag.split_for_impl(); + let (_, without_tag_tys, without_tag_where) = without_tag.split_for_impl(); + + // Generate an expression to sum up the size of each field. + let sum = haskell_size_sum(&input.data); + + let expanded = quote! { + impl #including_tag_impl HaskellSize for #name #without_tag_tys #without_tag_where { + fn haskell_size(tag: PhantomData) -> usize { + #sum + } + } + }; + + // Hand the output tokens back to the compiler. + proc_macro::TokenStream::from(expanded) +} + +/// Add a bound `T: HaskellSize` to every type parameter T. +fn add_trait_bounds(mut generics: Generics) -> Generics { + for param in &mut generics.params { + if let GenericParam::Type(ref mut type_param) = *param { + type_param.bounds.push(parse_quote!(HaskellSize)); + } + } + generics +} + +/// Generate an expression to sum up the size of each field. +fn haskell_size_sum(data: &Data) -> TokenStream { + match data { + Data::Struct(ref data) => match &data.fields { + Fields::Named(fields) => haskell_size_fields(fields.named.iter()), + Fields::Unnamed(fields) => haskell_size_fields(fields.unnamed.iter()), + Fields::Unit => quote!(0), + }, + Data::Enum(_) | Data::Union(_) => unimplemented!(), + } +} + +/// Auxiliary to `haskell_size_sum` +fn haskell_size_fields(fields: Iter) -> TokenStream { + let recurse = fields.map(|f| { + let t = &f.ty; + quote! { <#t> :: haskell_size(tag) } + }); + quote! { + 0 #(+ #recurse)* + } +} diff --git a/haskell-ffi/Cargo.toml b/haskell-ffi/Cargo.toml new file mode 100644 index 0000000..aee7d20 --- /dev/null +++ b/haskell-ffi/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "haskell-ffi" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bincode = "1.3" +borsh = "0.9" +haskell-ffi-derive = { path = "../haskell-ffi-derive" } +ref-cast = "1.0" +serde = "1.0" diff --git a/haskell-ffi/src/bincode.rs b/haskell-ffi/src/bincode.rs new file mode 100644 index 0000000..86ec825 --- /dev/null +++ b/haskell-ffi/src/bincode.rs @@ -0,0 +1,36 @@ +use std::{ + io::{Error, ErrorKind, Write}, + marker::PhantomData, +}; + +/// Implement `to_haskell` using `bincode` +/// +/// The result will be length-prefixed ("bincode-in-Borsh"). +pub fn bincode_to_haskell( + t: &T, + writer: &mut W, + _: PhantomData, +) -> Result<(), Error> +where + T: serde::ser::Serialize, + W: Write, +{ + match bincode::serialize(t) { + Ok(vec) => borsh::BorshSerialize::serialize(&vec, writer), + Err(e) => Err(Error::new(ErrorKind::InvalidData, e)), + } +} + +/// Implement `from_haskell` using `bincode` +/// +/// See als `bincode_to_haskell` +pub fn bincode_from_haskell(buf: &mut &[u8], _: PhantomData) -> Result +where + T: serde::de::DeserializeOwned, +{ + let vec: Vec = borsh::BorshDeserialize::deserialize(buf)?; + match bincode::deserialize(vec.as_ref()) { + Ok(x) => Ok(x), + Err(e) => Err(Error::new(ErrorKind::InvalidData, e)), + } +} diff --git a/haskell-ffi/src/deriving_via.rs b/haskell-ffi/src/deriving_via.rs new file mode 100644 index 0000000..2182360 --- /dev/null +++ b/haskell-ffi/src/deriving_via.rs @@ -0,0 +1,113 @@ +use borsh::{BorshDeserialize, BorshSerialize}; +use ref_cast::RefCast; +use std::{ + cmp::Ordering, + fmt::Debug, + hash::{Hash, Hasher}, + io::{Error, Write}, + marker::PhantomData, +}; + +use crate::{from_haskell::FromHaskell, to_haskell::ToHaskell}; + +/******************************************************************************* + Deriving-via support +*******************************************************************************/ + +#[derive(RefCast)] +#[repr(transparent)] +/// Newtype for "deriving-via" instances +/// +/// The purpose of this newtype is best illustrated through its instances: +/// +/// ```ignore +/// impl> BorshSerialize for Haskell +/// impl> BorshDeserialize for Haskell +/// ``` +/// +/// This is primarily used internally: when deriving `ToHaskell`/`FromHaskell` +/// instances for standard types, we want to re-use the logic from `borsh`, +/// rather than re-implement everything here. We do this by turning say a +/// `Vec` into a `Vec>`, and then call functions from +/// `borsh`. The use of the newtype wrapper then ensures that the constraint +/// on `T` will be in terms of `ToHaskell`/`FromHaskell` again. +pub struct Haskell(pub T, PhantomData); + +pub fn tag_val(t: T) -> Haskell { + Haskell(t, PhantomData) +} + +pub fn tag_ref(t: &T) -> &Haskell { + RefCast::ref_cast(t) +} + +pub fn untag_val(tagged: Haskell) -> T { + tagged.0 +} + +pub fn untag_ref(tagged: &Haskell) -> &T { + &tagged.0 +} + +/******************************************************************************* + Standard instances +*******************************************************************************/ + +impl Debug for Haskell { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.0.fmt(f) + } +} + +impl PartialEq for Haskell { + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } +} + +impl Eq for Haskell {} + +impl PartialOrd for Haskell { + fn partial_cmp(&self, other: &Self) -> Option { + self.0.partial_cmp(&other.0) + } +} + +impl Hash for Haskell { + fn hash(&self, state: &mut H) { + self.0.hash(state); + } +} + +impl Default for Haskell { + fn default() -> Self { + Self(Default::default(), PhantomData) + } +} + +impl Clone for Haskell { + fn clone(&self) -> Self { + Self(self.0.clone(), PhantomData) + } +} + +impl Copy for Haskell {} + +/******************************************************************************* + Forwarding instances + + NOTE: We do not expect _additional_ forwarding instances to be defined. +*******************************************************************************/ + +impl> BorshSerialize for Haskell { + fn serialize(&self, writer: &mut W) -> Result<(), Error> { + self.0.to_haskell(writer, PhantomData) + } +} + +impl> BorshDeserialize for Haskell { + fn deserialize(buf: &mut &[u8]) -> std::io::Result { + let tag: PhantomData = PhantomData; + T::from_haskell(buf, tag).map(tag_val) + } +} diff --git a/haskell-ffi/src/from_haskell.rs b/haskell-ffi/src/from_haskell.rs new file mode 100644 index 0000000..277e539 --- /dev/null +++ b/haskell-ffi/src/from_haskell.rs @@ -0,0 +1,76 @@ +use std::{ + io::{Error, ErrorKind}, + marker::PhantomData, +}; + +use crate::HaskellSize; + +/******************************************************************************* + Main class definition +*******************************************************************************/ + +const ERROR_NOT_ALL_BYTES_READ: &str = "Not all bytes read"; + +pub trait FromHaskell: Sized { + /// Deserialize data sent from Haskell + /// + /// This is the analogue of `BorshDeserialize::deserialize`. + // + /// See `ToHaskell` for a detailed discussion of the `tag` argument. + fn from_haskell(buf: &mut &[u8], tag: PhantomData) -> Result; + + fn from_haskell_slice(slice: &[u8], tag: PhantomData) -> Result { + let mut slice_mut = slice; + let result = Self::from_haskell(&mut slice_mut, tag)?; + if !slice_mut.is_empty() { + return Err(Error::new(ErrorKind::InvalidData, ERROR_NOT_ALL_BYTES_READ)); + } + Ok(result) + } +} + +/******************************************************************************* + Derived functionality + + See comments in `to_haskell` for why these functions do not live inside the + trait. +*******************************************************************************/ + +/// Marshall value with variable-sized encoding +pub fn marshall_from_haskell_var(inp: *const u8, len: usize, tag: PhantomData) -> T +where + T: FromHaskell, +{ + let mut vec: Vec = vec![0; len]; + unsafe { + std::ptr::copy(inp, vec.as_mut_ptr(), len); + } + match T::from_haskell_slice(vec.as_ref(), tag) { + Ok(t) => t, + Err(e) => panic!("{}", e), + } +} + +/// Marshall value with fixed-size encoding +/// +/// The `len` argument here is only to verify that the Haskell-side and +/// Rust-side agree on the size of the encoding. +pub fn marshall_from_haskell_fixed( + inp: *const u8, + inp_len: usize, + tag: PhantomData, +) -> T +where + T: FromHaskell + HaskellSize, +{ + let expected_len = T::haskell_size(tag); + + if inp_len != expected_len { + panic!( + "expected buffer of size {}, but got {}", + expected_len, inp_len + ) + } else { + marshall_from_haskell_var(inp, inp_len, tag) + } +} diff --git a/haskell-ffi/src/haskell_size.rs b/haskell-ffi/src/haskell_size.rs new file mode 100644 index 0000000..810db74 --- /dev/null +++ b/haskell-ffi/src/haskell_size.rs @@ -0,0 +1,223 @@ +use std::marker::PhantomData; + +use crate::{derive_size_tuple_instance, fold_types}; + +pub use haskell_ffi_derive::HaskellSize; + +/******************************************************************************* + Main class definition +*******************************************************************************/ + +pub trait HaskellSize { + /// Statically known size (in bytes) + fn haskell_size(tag: PhantomData) -> usize; +} + +/******************************************************************************* + Simple instances + + Note: the following types in the Borsh spec do _not_ have statically known sizes: + + - Vec + - HashMap + - HashSet + - Option + - String +*******************************************************************************/ + +impl HaskellSize for u8 { + fn haskell_size(_tag: PhantomData) -> usize { + 1 + } +} + +impl HaskellSize for u16 { + fn haskell_size(_tag: PhantomData) -> usize { + 2 + } +} + +impl HaskellSize for u32 { + fn haskell_size(_tag: PhantomData) -> usize { + 4 + } +} + +impl HaskellSize for u64 { + fn haskell_size(_tag: PhantomData) -> usize { + 8 + } +} + +impl HaskellSize for u128 { + fn haskell_size(_tag: PhantomData) -> usize { + 16 + } +} + +impl HaskellSize for i8 { + fn haskell_size(_tag: PhantomData) -> usize { + 1 + } +} + +impl HaskellSize for i16 { + fn haskell_size(_tag: PhantomData) -> usize { + 2 + } +} + +impl HaskellSize for i32 { + fn haskell_size(_tag: PhantomData) -> usize { + 4 + } +} + +impl HaskellSize for i64 { + fn haskell_size(_tag: PhantomData) -> usize { + 8 + } +} + +impl HaskellSize for i128 { + fn haskell_size(_tag: PhantomData) -> usize { + 16 + } +} + +impl HaskellSize for f32 { + fn haskell_size(_tag: PhantomData) -> usize { + 4 + } +} + +impl HaskellSize for f64 { + fn haskell_size(_tag: PhantomData) -> usize { + 8 + } +} + +impl HaskellSize for () { + fn haskell_size(_tag: PhantomData) -> usize { + 0 + } +} + +impl, const N: usize> HaskellSize for [T; N] { + fn haskell_size(tag: PhantomData) -> usize { + T::haskell_size(tag) * N + } +} + +/******************************************************************************* + Tuples + + We support the same sizes of tuples as `borsh` does. +*******************************************************************************/ + +derive_size_tuple_instance!(T0, T1); +derive_size_tuple_instance!(T0, T1, T2); +derive_size_tuple_instance!(T0, T1, T2, T3); +derive_size_tuple_instance!(T0, T1, T2, T3, T4); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6, T7); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6, T7, T8); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14); +derive_size_tuple_instance!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15); +derive_size_tuple_instance!( + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16 +); +derive_size_tuple_instance!( + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17 +); +derive_size_tuple_instance!( + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18 +); +derive_size_tuple_instance!( + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19 +); + +/******************************************************************************* + Sanity checks +*******************************************************************************/ + +#[cfg(test)] +mod tests { + use std::io::Error; + + use borsh::BorshSerialize; + + use super::*; + + enum ExampleTag {} + + #[derive(HaskellSize, BorshSerialize)] + struct EmptyStruct; + + #[derive(HaskellSize, BorshSerialize)] + struct UnnamedStruct(u16, (u8, u32)); + + #[derive(HaskellSize, BorshSerialize)] + struct NamedStruct { + a: u8, + b: u16, + c: (u32, u64), + } + + #[derive(HaskellSize, BorshSerialize)] + struct ParamStruct { + a: u8, + b: (T, T, T), + } + + #[test] + fn empty() -> Result<(), Error> { + let tag: PhantomData = PhantomData; + assert_eq!(EmptyStruct::haskell_size(tag), 0); + let encoded = EmptyStruct.try_to_vec()?; + assert_eq!(encoded.len(), EmptyStruct::haskell_size(tag)); + Ok(()) + } + + #[test] + fn unnamed() -> Result<(), Error> { + let tag: PhantomData = PhantomData; + assert_eq!(UnnamedStruct::haskell_size(tag), 7); + let encoded = UnnamedStruct(1, (2, 3)).try_to_vec()?; + assert_eq!(encoded.len(), UnnamedStruct::haskell_size(tag)); + Ok(()) + } + + #[test] + fn named() -> Result<(), Error> { + let tag: PhantomData = PhantomData; + assert_eq!(NamedStruct::haskell_size(tag), 15); + let encoded = NamedStruct { + a: 1, + b: 2, + c: (3, 4), + } + .try_to_vec()?; + assert_eq!(encoded.len(), NamedStruct::haskell_size(tag)); + Ok(()) + } + + #[test] + fn param() -> Result<(), Error> { + let tag: PhantomData = PhantomData; + assert_eq!(>::haskell_size(tag), 25); + let encoded = ParamStruct { + a: 1, + b: (1.0, 2.0, 3.0), + } + .try_to_vec()?; + assert_eq!(encoded.len(), >::haskell_size(tag)); + Ok(()) + } +} diff --git a/haskell-ffi/src/instances.rs b/haskell-ffi/src/instances.rs new file mode 100644 index 0000000..beed2b8 --- /dev/null +++ b/haskell-ffi/src/instances.rs @@ -0,0 +1,262 @@ +//! ToHaskell and FromHaskell instances for the various standard types mandated +//! by the [Borsh spec](https://borsh.io/), piggy-backing on the implementation +//! in the `borsh` crate. The only spec-described types _not_ provided are +//! user-defined structs and enums. + +use borsh::{BorshDeserialize, BorshSerialize}; +use std::{ + collections::{HashMap, HashSet}, + hash::Hash, + io::{Error, ErrorKind, Write}, + marker::PhantomData, +}; + +use crate::{ + derive_array_instances, derive_simple_instances, derive_tuple_instances, + deriving_via::{tag_ref, untag_val, Haskell}, + from_haskell::FromHaskell, + map_tuple, map_tuple_ref, + to_haskell::ToHaskell, + HaskellSize, +}; + +/******************************************************************************* + Simple (non-composite) instances +*******************************************************************************/ + +derive_simple_instances!(u8); +derive_simple_instances!(u16); +derive_simple_instances!(u32); +derive_simple_instances!(u64); +derive_simple_instances!(u128); +derive_simple_instances!(i8); +derive_simple_instances!(i16); +derive_simple_instances!(i32); +derive_simple_instances!(i64); +derive_simple_instances!(i128); +derive_simple_instances!(f32); +derive_simple_instances!(f64); +derive_simple_instances!(()); +derive_simple_instances!(String); + +/******************************************************************************* + Array instances + + This is the same set of sizes as supported by borsh. +*******************************************************************************/ + +derive_array_instances!(0); +derive_array_instances!(1); +derive_array_instances!(2); +derive_array_instances!(3); +derive_array_instances!(4); +derive_array_instances!(5); +derive_array_instances!(6); +derive_array_instances!(7); +derive_array_instances!(8); +derive_array_instances!(9); +derive_array_instances!(10); +derive_array_instances!(11); +derive_array_instances!(12); +derive_array_instances!(13); +derive_array_instances!(14); +derive_array_instances!(15); +derive_array_instances!(16); +derive_array_instances!(17); +derive_array_instances!(18); +derive_array_instances!(19); +derive_array_instances!(20); +derive_array_instances!(21); +derive_array_instances!(22); +derive_array_instances!(23); +derive_array_instances!(24); +derive_array_instances!(25); +derive_array_instances!(26); +derive_array_instances!(27); +derive_array_instances!(28); +derive_array_instances!(29); +derive_array_instances!(30); +derive_array_instances!(31); +derive_array_instances!(32); + +derive_array_instances!(64); +derive_array_instances!(65); + +derive_array_instances!(128); +derive_array_instances!(256); +derive_array_instances!(512); +derive_array_instances!(1024); +derive_array_instances!(2048); + +/******************************************************************************* + Composite instances + + This is the same set of tuple sizes as supported by `borsh.` +*******************************************************************************/ + +derive_tuple_instances!(T0, T1); +derive_tuple_instances!(T0, T1, T2); +derive_tuple_instances!(T0, T1, T2, T3); +derive_tuple_instances!(T0, T1, T2, T3, T4); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7, T8); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15); +derive_tuple_instances!(T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16); +derive_tuple_instances!( + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17 +); +derive_tuple_instances!( + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18 +); +derive_tuple_instances!( + T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15, T16, T17, T18, T19 +); + +/******************************************************************************* + Vec +*******************************************************************************/ + +impl> ToHaskell for Vec { + fn to_haskell(&self, writer: &mut W, _: PhantomData) -> Result<(), Error> { + let tagged: Vec<&Haskell> = self.iter().map(tag_ref).collect(); + tagged.serialize(writer) + } +} + +impl> FromHaskell for Vec { + fn from_haskell(buf: &mut &[u8], _: PhantomData) -> Result { + let tagged: Vec> = BorshDeserialize::deserialize(buf)?; + Ok(tagged.into_iter().map(untag_val).collect()) + } +} + +/******************************************************************************* + HashMap +*******************************************************************************/ + +impl ToHaskell for HashMap +where + K: Eq + PartialOrd + Hash + ToHaskell, + V: ToHaskell, +{ + fn to_haskell(&self, writer: &mut W, _: PhantomData) -> Result<(), Error> { + let tagged: HashMap<&Haskell, &Haskell> = + self.iter().map(|(k, v)| (tag_ref(k), tag_ref(v))).collect(); + tagged.serialize(writer) + } +} + +impl FromHaskell for HashMap +where + K: Eq + Hash + FromHaskell, + V: FromHaskell, +{ + fn from_haskell(buf: &mut &[u8], _: PhantomData) -> Result { + let tagged: HashMap, Haskell> = BorshDeserialize::deserialize(buf)?; + Ok(tagged + .into_iter() + .map(|(k, v)| (untag_val(k), untag_val(v))) + .collect()) + } +} + +/******************************************************************************* + HashSet +*******************************************************************************/ + +impl ToHaskell for HashSet +where + T: Eq + PartialOrd + Hash + ToHaskell, +{ + fn to_haskell(&self, writer: &mut W, _: PhantomData) -> Result<(), Error> { + let tagged: HashSet<&Haskell> = self.iter().map(tag_ref).collect(); + tagged.serialize(writer) + } +} + +impl FromHaskell for HashSet +where + T: Eq + Hash + FromHaskell, +{ + fn from_haskell(buf: &mut &[u8], _: PhantomData) -> Result { + let tagged: HashSet> = BorshDeserialize::deserialize(buf)?; + Ok(tagged.into_iter().map(untag_val).collect()) + } +} + +/******************************************************************************* + Option +*******************************************************************************/ + +impl> ToHaskell for Option { + fn to_haskell(&self, writer: &mut W, _: PhantomData) -> Result<(), Error> { + let tagged: Option<&Haskell> = self.as_ref().map(tag_ref); + tagged.serialize(writer) + } +} + +impl> FromHaskell for Option { + fn from_haskell(buf: &mut &[u8], _: PhantomData) -> Result { + let tagged: Option> = BorshDeserialize::deserialize(buf)?; + Ok(tagged.map(untag_val)) + } +} + +/******************************************************************************* + Result + + `Result` is not explicitly mentioned by the Borsh spec, but it's ubiquitous + and so we provide an instance for it, following the standard rule for enum. + + There is no need for an instance of `FromHaskell`, since this is indicating + the result of some Rust-side operation. +*******************************************************************************/ + +impl, E: ToHaskell> ToHaskell for Result { + fn to_haskell(&self, writer: &mut W, _: PhantomData) -> Result<(), Error> { + let tagged: Result<&Haskell, &Haskell> = match self { + Ok(t) => Ok(tag_ref(t)), + Err(e) => Err(tag_ref(e)), + }; + tagged.serialize(writer) + } +} + +/******************************************************************************* + Bool + + The Borsh spec does not mention Bool; we encode `true` as 1 and `false` as 0; + this matches what the Haskell `borsh` library does. +*******************************************************************************/ + +impl HaskellSize for bool { + fn haskell_size(tag: PhantomData) -> usize { + u8::haskell_size(tag) + } +} + +impl ToHaskell for bool { + fn to_haskell(&self, writer: &mut W, tag: PhantomData) -> Result<(), Error> { + let as_u8: u8 = if *self { 1 } else { 0 }; + as_u8.to_haskell(writer, tag) + } +} + +impl FromHaskell for bool { + fn from_haskell(buf: &mut &[u8], tag: PhantomData) -> Result { + let as_u8 = u8::from_haskell(buf, tag)?; + match as_u8 { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(Error::new(ErrorKind::InvalidData, "Invalid bool")), + } + } +} diff --git a/haskell-ffi/src/lib.rs b/haskell-ffi/src/lib.rs new file mode 100644 index 0000000..be325a3 --- /dev/null +++ b/haskell-ffi/src/lib.rs @@ -0,0 +1,16 @@ +#![feature(array_methods)] +#![feature(trace_macros)] + +mod instances; +mod macros; + +pub mod bincode; +pub mod deriving_via; +pub mod from_haskell; +pub mod haskell_size; +pub mod to_haskell; +pub mod use_borsh; + +pub use from_haskell::FromHaskell; +pub use haskell_size::HaskellSize; +pub use to_haskell::ToHaskell; diff --git a/haskell-ffi/src/macros.rs b/haskell-ffi/src/macros.rs new file mode 100644 index 0000000..e69d867 --- /dev/null +++ b/haskell-ffi/src/macros.rs @@ -0,0 +1,194 @@ +/******************************************************************************* + Auxiliary general-purpose macros + + The `map_tuple` macro is adapted from + https://stackoverflow.com/questions/66396814/generating-tuple-indices-based-on-macro-rules-repetition-expansion . +*******************************************************************************/ + +/// Map function across all elements of a tuple +/// +/// ```ignore +/// map_tuple( [T0, T1], tuple, f ) +/// ``` +/// +/// will become +/// +/// ```ignore +/// ( f(tuple.0) , f(tuple.1) ) +/// ``` +/// +/// See also `map_tuple_ref`. +#[macro_export] +macro_rules! map_tuple { + // Base-case: we are done. Return the accumulator + // + // We explicitly allow the list of indices to be non-empty (not all indices might be used) + ( @, $tuple:ident, $fn:ident, [], [ $($ixs:tt)* ], [ $($acc:tt)* ] ) => { + ( $($acc),* ) + }; + + // Recursive-case: add entry to accumulator + ( @, $tuple:ident, $fn:ident, [ $t:ident $(,$ts:ident)* ], [ ($ix:tt) $($ixs:tt)* ], [ $($acc:tt)* ] ) => { + map_tuple!(@, $tuple, $fn, [ $($ts),* ], [ $($ixs)* ], [ $($acc)* ($fn($tuple . $ix)) ]) + }; + + // Entry-point into the macro + ( [ $($ts:ident),* ], $tuple:ident, $fn:ident ) => { + map_tuple!(@, $tuple, $fn, + // Pass original list of identifiers (only used to determine tuple length) + [ $($ts),* ] + + // Pre-defined list of tuple indices + , [(0) (1) (2) (3) (4) (5) (6) (7) (8) (9) (10) (11) (12) (13) (14) (15) (16) (17) (18) (19)] + + // Empty accumulator + , [] + ) + } +} + +/// Variation on `map_tuple` that uses a _reference_ to a tuple +/// +/// TODO: It seems I cannot unify these two macros, because `&self.0` and `(&self).0` are not +/// equivalent expressions. Is that true..? +#[macro_export] +macro_rules! map_tuple_ref { + // Base-case: we are done. Return the accumulator + // + // We explicitly allow the list of indices to be non-empty (not all indices might be used) + ( @, $tuple:ident, $fn:ident, [], [ $($ixs:tt)* ], [ $($acc:tt)* ] ) => { + ( $($acc),* ) + }; + + // Recursive-case: add entry to accumulator + ( @, $tuple:ident, $fn:ident, [ $t:ident $(,$ts:ident)* ], [ ($ix:tt) $($ixs:tt)* ], [ $($acc:tt)* ] ) => { + map_tuple_ref!(@, $tuple, $fn, [ $($ts),* ], [ $($ixs)* ], [ $($acc)* ($fn(&$tuple . $ix)) ]) + }; + + // Entry-point into the macro + ( [ $($ts:ident),* ], $tuple:ident, $fn:ident ) => { + map_tuple_ref!(@, $tuple, $fn, + // Pass original list of identifiers (only used to determine tuple length) + [ $($ts),* ] + + // Pre-defined list of tuple indices + , [(0) (1) (2) (3) (4) (5) (6) (7) (8) (9) (10) (11) (12) (13) (14) (15) (16) (17) (18) (19)] + + // Empty accumulator + , [] + ) + } +} + +/// Fold a list of types +/// +/// ```ignore +/// fold_types!( [T0, T1], haskell_size, tag, +, 0); +/// ``` +/// +/// expands to +/// +/// ```ignore +/// 0 + ::haskell_size(tag) + ::haskell_size(tag) +/// ``` +#[macro_export] +macro_rules! fold_types { + // Base-case: we are done. Return the accumulator + ( @, $f:ident, $arg:ident, $op:tt, [], $acc:tt ) => { + $acc + }; + + // Recursive-case: add entry to accumulator + ( @, $f:ident, $arg:ident, $op:tt, [ $t:ty $(,$ts:ty)* ], $acc:tt ) => { + fold_types!(@, $f, $arg, $op, [ $($ts),* ], ( $acc $op (<$t> :: $f($arg)) )) + }; + + // Entry-point into the macro + ( [ $($ts:ty),* ], $f:ident, $arg:ident, $op:tt, $e:tt ) => { + fold_types!(@, $f, $arg, $op, [ $($ts),* ], $e) + }; +} + +/******************************************************************************* + Macros for deriving specific kinds of instances +*******************************************************************************/ + +/// Derive `ToHaskell` and `FromHaskell` instances for simple types: types with +/// no type arguments. +#[macro_export] +macro_rules! derive_simple_instances { + ($t:ty) => { + impl ToHaskell for $t { + fn to_haskell( + &self, + writer: &mut W, + _: PhantomData, + ) -> Result<(), Error> { + self.serialize(writer) + } + } + + impl FromHaskell for $t { + fn from_haskell(buf: &mut &[u8], _tag: PhantomData) -> Result { + <$t>::deserialize(buf) + } + } + }; +} + +/// Derive `ToHaskell` and `FromHaskell` instances for arrays of the specified size. +#[macro_export] +macro_rules! derive_array_instances { + ($sz : literal) => { + impl> ToHaskell for [T; $sz] { + fn to_haskell( + &self, + writer: &mut W, + _: PhantomData, + ) -> Result<(), Error> { + let tagged: [&Haskell; $sz] = self.each_ref().map(tag_ref); + tagged.serialize(writer) + } + } + + impl + Default + Copy> FromHaskell for [T; $sz] { + fn from_haskell(buf: &mut &[u8], _: PhantomData) -> Result { + let tagged: [Haskell; $sz] = BorshDeserialize::deserialize(buf)?; + Ok(tagged.map(untag_val)) + } + } + }; +} + +/// Derive `ToHaskell` and `FromHaskell` for tuples with the specified number of type arguments +/// (i.e., for tuples of the specified size). +#[macro_export] +macro_rules! derive_tuple_instances { + ($($ts:ident),*) => { + impl ),* > ToHaskell for ( $($ts ),* ) { + fn to_haskell(&self, writer: &mut W,_: PhantomData) -> Result<(), Error> { + let tagged: ( $(&Haskell ),* ) = map_tuple_ref!( [ $($ts),* ], self, tag_ref ); + tagged.serialize(writer) + } + } + + impl ),* > FromHaskell for ( $($ts ),* ) { + fn from_haskell(buf: &mut &[u8], _: PhantomData) -> Result { + let tagged: ( $(Haskell ),* ) = BorshDeserialize::deserialize(buf)?; + Ok( map_tuple!( [ $($ts),* ], tagged, untag_val ) ) + } + } + }; +} + +/// Derive `HaskellSize` instance for tuple with the specified type arguments. +#[macro_export] +macro_rules! derive_size_tuple_instance { + ($($ts:ident),*) => { + impl ),* > HaskellSize for ( $($ts),* ) { + fn haskell_size(tag: PhantomData) -> usize { + fold_types!( [ $($ts),* ], haskell_size, tag, +, 0) + } + } + }; +} diff --git a/haskell-ffi/src/tagged.rs b/haskell-ffi/src/tagged.rs new file mode 100644 index 0000000..11cf416 --- /dev/null +++ b/haskell-ffi/src/tagged.rs @@ -0,0 +1,16 @@ +pub mod borsh_instances; +mod macros; + +use std::marker::PhantomData; + +pub struct Tagged { + pub value: T, + pub tag: PhantomData, +} + +pub fn tag(t: T) -> Tagged { + Tagged { + value: t, + tag: PhantomData, + } +} diff --git a/haskell-ffi/src/to_haskell.rs b/haskell-ffi/src/to_haskell.rs new file mode 100644 index 0000000..da91378 --- /dev/null +++ b/haskell-ffi/src/to_haskell.rs @@ -0,0 +1,93 @@ +use std::{ + io::{Error, Write}, + marker::PhantomData, +}; + +use crate::HaskellSize; + +/******************************************************************************* + Main class definition +*******************************************************************************/ + +// Copied from `borsh` +const DEFAULT_SERIALIZER_CAPACITY: usize = 1024; + +pub trait ToHaskell { + /// Serialize data to be sent to Haskell + /// + /// This is the analogue of `BorshSerialize::serialize`. + /// + /// The `tag` argument allows client libraries to define additional + /// instances of `ToHaskell` for foreign (non-local) types. For example, the + /// `solana-sdk-haskell` library can define a `ToHaskell` instance for + /// `Keypair`, defined in `solana-sdk`, as long as it uses a tag `Solana` + /// defined locally in the `solana-haskell-sdk` package. + fn to_haskell(&self, writer: &mut W, tag: PhantomData) -> Result<(), Error>; + + fn to_haskell_vec(&self, tag: PhantomData) -> Result, Error> { + let mut result = Vec::with_capacity(DEFAULT_SERIALIZER_CAPACITY); + self.to_haskell(&mut result, tag)?; + Ok(result) + } +} + +impl> ToHaskell for &T { + fn to_haskell(&self, writer: &mut W, tag: PhantomData) -> Result<(), Error> { + (*self).to_haskell(writer, tag) + } +} + +/******************************************************************************* + Derived functionality + + These functions are not defined in the trait itself, to make it clear that + they only exist at top-level calls, and will not be recursively called + in various `ToHaskell` instances. This is important, because the `len` + parameter that gives the length of the buffer only applies to the _overall_ + buffer. +*******************************************************************************/ + +/// Marshall value with fixed-sized encoding +/// +/// The `out_len` parameter is only used to verify that the Haskell-side and +/// the Rust side agree on the length of the encoding. +pub fn marshall_to_haskell_fixed(t: &T, out: *mut u8, out_len: usize, tag: PhantomData) +where + T: HaskellSize + ToHaskell, +{ + let expected_len: usize = T::haskell_size(tag); + if out_len != expected_len { + panic!( + "marshall_to_haskell_fixed: expected buffer of size {}, but got {}", + expected_len, out_len + ) + } else { + let mut out_len_copy = out_len; + marshall_to_haskell_var(t, out, &mut out_len_copy, tag); + } +} + +/// Marshall value with variable-sized encoding +pub fn marshall_to_haskell_var( + t: &T, + out: *mut u8, + out_len: &mut usize, + tag: PhantomData, +) where + T: ToHaskell, +{ + match t.to_haskell_vec(tag) { + Ok(vec) => { + let slice: &[u8] = vec.as_ref(); + + if slice.len() <= *out_len { + unsafe { + std::ptr::copy(slice.as_ptr(), out, slice.len()); + } + } + + *out_len = slice.len(); + } + Err(e) => panic!("{}", e), + } +} diff --git a/haskell-ffi/src/use_borsh.rs b/haskell-ffi/src/use_borsh.rs new file mode 100644 index 0000000..04047f1 --- /dev/null +++ b/haskell-ffi/src/use_borsh.rs @@ -0,0 +1,54 @@ +use borsh::{BorshDeserialize, BorshSerialize}; +use std::{ + io::{Error, Write}, + marker::PhantomData, +}; + +use crate::{FromHaskell, ToHaskell}; + +/// Newtype wrapper for defaulting to `borsh` for `ToHaskell`/`FromHaskell` +/// +/// `ToHaskell`/`FromHaskell` have instances for types such as `Vec`, but +/// those instances depend on `ToHaskell`/`FromHaskell` for `T`. This +/// indirection is not always necessary, and may be expensive. The `UseBorsh` +/// newtype wrapper can be used to mark values where `ToHaskell`/`FromHaskell` +/// should just piggy-back on Borsh. +pub struct UseBorsh(pub T); + +pub fn unwrap_use_borsh(use_borsh: UseBorsh) -> T { + let UseBorsh(t) = use_borsh; + t +} + +pub fn unwrap_use_borsh_ref(use_borsh: &UseBorsh) -> &T { + let UseBorsh(t) = use_borsh; + t +} + +/******************************************************************************* + Forwarding instances + + These instances _define_ the `UseBorsh` type +*******************************************************************************/ + +impl ToHaskell for UseBorsh { + fn to_haskell(&self, writer: &mut W, _: PhantomData) -> Result<(), Error> { + unwrap_use_borsh_ref(self).serialize(writer) + } +} + +impl FromHaskell for UseBorsh { + fn from_haskell(buf: &mut &[u8], _: PhantomData) -> Result { + T::deserialize(buf).map(UseBorsh) + } +} + +/******************************************************************************* + Additional standard instances +*******************************************************************************/ + +impl> AsRef for UseBorsh { + fn as_ref(&self) -> &T { + unwrap_use_borsh_ref(self).as_ref() + } +} diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..f116d8a --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,2 @@ +[toolchain] +channel ="nightly"