Skip to content

Commit

Permalink
Add derive for Schema (#13)
Browse files Browse the repository at this point in the history
Fixes #3

Introduces two new crates:
- `serde_avro_derive` the user-facing crate that contains the necessary traits/structs and reexports the macro
- `serde_avro_derive_macros` the proc-macro crate that contains the actual derive (not user-facing)

The new traits and construction structs are not in the main crate for several reasons:
- `serde_avro_fast` can be built before `serde_avro_derive`, which improves compilation time
- The API of `serde_avro_derive` is not deemed stable and/or private enough for the 1.0 release of `serde_avro_fast`
  • Loading branch information
Ten0 authored Mar 10, 2024
1 parent 4d88149 commit 3111e8a
Show file tree
Hide file tree
Showing 65 changed files with 1,059 additions and 52 deletions.
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
/target
/Cargo.lock
Cargo.lock
58 changes: 7 additions & 51 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,51 +1,7 @@
[package]
authors = ["Thomas BESSOU <[email protected]>"]
description = "An idiomatic implementation of serde/avro (de)serialization"
edition = "2021"
license = "LGPL-3.0-only"
name = "serde_avro_fast"
repository = "https://github.com/Ten0/serde_avro_fast"
version = "1.0.0-rc.4"

[features]
default = ["deflate"]
deflate = ["flate2"]
snappy = ["snap", "crc32fast"]
xz = ["xz2"]
zstandard = ["zstd"]

[dependencies]
bzip2 = { version = "0.4", optional = true }
crc32fast = { version = "1", optional = true }
flate2 = { version = "1", optional = true }
integer-encoding = { default-features = false, version = "4" }
num-traits = "0.2"
rand = "0.8"
rust_decimal = { version = "1", default-features = false, features = ["std", "serde-with-str"] }
serde = "1"
serde-transcode = "1"
serde_derive = "1"
serde_json = "1"
serde_serializer_quick_unsupported = "0.1"
snap = { version = "1", optional = true }
thiserror = "1"
xz2 = { version = "0.1", optional = true }
zstd = { version = "0.13", optional = true }

[dev-dependencies]
anyhow = "1"
apache-avro = { version = "0.14", features = ["bzip", "snappy", "xz", "zstandard"] }
criterion = "0.5"
lazy_static = "1"
paste = "1"
pretty_assertions = "1"
serde-tuple-vec-map = "1"
serde_bytes = "0.11"

[[bench]]
harness = false
name = "single"

[[bench]]
harness = false
name = "object_container_file_encoding"
[workspace]
members = [
"serde_avro_derive",
"serde_avro_derive_macros",
"serde_avro_fast",
]
resolver = "2"
19 changes: 19 additions & 0 deletions serde_avro_derive/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
[package]
authors = ["Thomas BESSOU <[email protected]>"]
description = "Derive avro schema for Rust structs for serde_avro_fast"
edition = "2021"
license = "LGPL-3.0-only"
name = "serde_avro_derive"
repository = "https://github.com/Ten0/serde_avro_fast"
version = "0.1.0"
workspace = ".."

[dependencies]
serde_avro_derive_macros = { path = "../serde_avro_derive_macros", version = "0.1" }
serde_avro_fast = { path = "../serde_avro_fast", version = "1.0.0-rc.4" }

[dev-dependencies]
lazy_static = "1"
pretty_assertions = "1"
regex = "1"
serde_json = "1"
257 changes: 257 additions & 0 deletions serde_avro_derive/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
//! Bring automatic Avro Schema generation to [`serde_avro_fast`]
//!
//! See the [`#[derive(Schema)]`](derive@Schema) documentation for more
//! information
pub use serde_avro_fast;

pub use serde_avro_derive_macros::*;

use std::{any::TypeId, collections::HashMap};

use serde_avro_fast::schema::*;

/// We can automatically build a schema for this type (can be `derive`d)
///
/// This trait can be derived using [`#[derive(Schema)]`](derive@Schema)
pub trait BuildSchema {
/// Build a [`struct@Schema`] for this type
fn schema() -> Result<Schema, SchemaError> {
Self::schema_mut().try_into()
}
/// Build a [`SchemaMut`] for this type
fn schema_mut() -> SchemaMut {
let mut builder = SchemaBuilder::default();
Self::append_schema(&mut builder);
SchemaMut::from_nodes(builder.nodes)
}

/// Largely internal method to build the schema. Registers the schema within
/// the builder.
///
/// This does not check if this type already exists in the builder, so it
/// should never be called directly (instead, use
/// [`SchemaBuilder::find_or_build`])
///
/// The [`SchemaNode`] for this type should be put at the current end of the
/// `nodes` array, and its non-already-built dependencies should be put
/// after in the array.
fn append_schema(builder: &mut SchemaBuilder);

/// Largely internal type used by [`#[derive(Schema)]`](derive@Schema)
///
/// The TypeId of this type will be used to lookup whether the
/// [`SchemaNode`] for this type has already been built in the
/// [`SchemaBuilder`].
///
/// This indirection is required to allow non-static types to implement
/// [`BuildSchema`], and also enables using the same node for types that we
/// know map to the same schema.
type TypeLookup: std::any::Any;
}

/// Largely internal type used by [`#[derive(Schema)]`](derive@Schema)
///
/// You should typically not use this directly
#[derive(Default)]
pub struct SchemaBuilder {
pub nodes: Vec<SchemaNode>,
pub already_built_types: HashMap<TypeId, SchemaKey>,
_private: (),
}

impl SchemaBuilder {
/// Reserve a slot in the `nodes` array
///
/// After building the `SchemaNode`, it should be put at the corresponding
/// position in `nodes`.
pub fn reserve(&mut self) -> usize {
let idx = self.nodes.len();
self.nodes.push(SchemaNode::RegularType(RegularType::Null));
idx
}

pub fn find_or_build<T: BuildSchema + ?Sized>(&mut self) -> SchemaKey {
match self
.already_built_types
.entry(TypeId::of::<T::TypeLookup>())
{
std::collections::hash_map::Entry::Occupied(entry) => *entry.get(),
std::collections::hash_map::Entry::Vacant(entry) => {
let idx = SchemaKey::from_idx(self.nodes.len());
entry.insert(idx);
T::append_schema(self);
assert!(
self.nodes.len() > idx.idx(),
"append_schema should always insert at least a node \
(and its dependencies below itself)"
);
idx
}
}
}

pub fn build_logical_type<T: BuildSchema + ?Sized>(
&mut self,
logical_type: LogicalType,
) -> SchemaKey {
let reserved_schema_key = self.reserve();
let new_node = SchemaNode::LogicalType {
logical_type,
inner: self.find_or_build::<T>(),
};
self.nodes[reserved_schema_key] = new_node;
SchemaKey::from_idx(reserved_schema_key)
}
}

macro_rules! impl_primitive {
($($ty:ty, $variant:ident;)+) => {
$(
impl BuildSchema for $ty {
fn append_schema(builder: &mut SchemaBuilder) {
builder.nodes.push(SchemaNode::RegularType(RegularType::$variant));
}
type TypeLookup = Self;
}
)*
};
}
impl_primitive!(
(), Null;
bool, Boolean;
i32, Int;
i64, Long;
f32, Float;
f64, Double;
String, String;
Vec<u8>, Bytes;
);

macro_rules! impl_forward {
($($ty:ty, $to:ty;)+) => {
$(
impl BuildSchema for $ty {
fn append_schema(builder: &mut SchemaBuilder) {
<$to as BuildSchema>::append_schema(builder)
}
type TypeLookup = <$to as BuildSchema>::TypeLookup;
}
)*
};
}
impl_forward! {
str, String;
[u8], Vec<u8>;
u16, i32;
u32, i64;
u64, i64;
i8, i32;
i16, i32;
usize, i64;
}

macro_rules! impl_ptr {
($($($ty_path:ident)::+,)+) => {
$(
impl<T: BuildSchema + ?Sized> BuildSchema for $($ty_path)::+<T> {
fn append_schema(builder: &mut SchemaBuilder) {
<T as BuildSchema>::append_schema(builder)
}
type TypeLookup = T::TypeLookup;
}
)*
};
}
impl_ptr! {
Box,
std::sync::Arc,
std::rc::Rc,
std::cell::RefCell,
std::cell::Cell,
}
impl<T: BuildSchema + ?Sized> BuildSchema for &'_ T {
fn append_schema(builder: &mut SchemaBuilder) {
<T as BuildSchema>::append_schema(builder)
}
type TypeLookup = T::TypeLookup;
}
impl<T: BuildSchema + ?Sized> BuildSchema for &'_ mut T {
fn append_schema(builder: &mut SchemaBuilder) {
<T as BuildSchema>::append_schema(builder)
}
type TypeLookup = T::TypeLookup;
}

impl<T: BuildSchema> BuildSchema for Vec<T> {
fn append_schema(builder: &mut SchemaBuilder) {
let reserved_schema_key = builder.reserve();
let new_node =
SchemaNode::RegularType(RegularType::Array(Array::new(builder.find_or_build::<T>())));
builder.nodes[reserved_schema_key] = new_node;
}

type TypeLookup = Vec<T::TypeLookup>;
}

impl<T: BuildSchema> BuildSchema for [T] {
fn append_schema(builder: &mut SchemaBuilder) {
<Vec<T> as BuildSchema>::append_schema(builder)
}
type TypeLookup = <Vec<T> as BuildSchema>::TypeLookup;
}

impl<T: BuildSchema> BuildSchema for Option<T> {
fn append_schema(builder: &mut SchemaBuilder) {
let reserved_schema_key = builder.reserve();
let new_node = SchemaNode::RegularType(RegularType::Union(Union::new(vec![
builder.find_or_build::<()>(),
builder.find_or_build::<T>(),
])));
builder.nodes[reserved_schema_key] = new_node;
}

type TypeLookup = Option<T::TypeLookup>;
}

impl<const N: usize> BuildSchema for [u8; N] {
fn append_schema(builder: &mut SchemaBuilder) {
builder
.nodes
.push(SchemaNode::RegularType(RegularType::Fixed(Fixed::new(
Name::from_fully_qualified_name(format!("u8_array_{}", N)),
N,
))));
}
type TypeLookup = Self;
}

impl<S: std::ops::Deref<Target = str>, V: BuildSchema> BuildSchema for HashMap<S, V> {
fn append_schema(builder: &mut SchemaBuilder) {
let reserved_schema_key = builder.reserve();
let new_node =
SchemaNode::RegularType(RegularType::Map(Map::new(builder.find_or_build::<V>())));
builder.nodes[reserved_schema_key] = new_node;
}
type TypeLookup = HashMap<String, V::TypeLookup>;
}
impl<S: std::ops::Deref<Target = str>, V: BuildSchema> BuildSchema
for std::collections::BTreeMap<S, V>
{
fn append_schema(builder: &mut SchemaBuilder) {
<HashMap<String, V> as BuildSchema>::append_schema(builder)
}
type TypeLookup = <HashMap<String, V> as BuildSchema>::TypeLookup;
}

#[doc(hidden)]
pub fn hash_type_id(struct_name: &mut String, type_id: TypeId) {
use std::{
fmt::Write,
hash::{Hash as _, Hasher as _},
};
#[allow(deprecated)] // I actually want to not change hasher
let mut hasher = std::hash::SipHasher::new();
type_id.hash(&mut hasher);
write!(struct_name, "_{:016x?}", hasher.finish()).unwrap();
}
Loading

0 comments on commit 3111e8a

Please sign in to comment.