diff options
author | Casper <casperneo@uchicago.edu> | 2020-05-07 14:11:26 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2020-05-07 14:11:26 -0700 |
commit | 8be05f6bd474a9942d41d98ad197d9813af1da1a (patch) | |
tree | 90716eb973d26aefecc81740fa9cb9d76ab9c99d /rust | |
parent | 870ecbc09a3f9605aed4796b4ad3e97a733bebc3 (diff) | |
download | flatbuffers-8be05f6bd474a9942d41d98ad197d9813af1da1a.tar.gz flatbuffers-8be05f6bd474a9942d41d98ad197d9813af1da1a.tar.bz2 flatbuffers-8be05f6bd474a9942d41d98ad197d9813af1da1a.zip |
Rust Flexbuffers (#5669)
* Cargo clippy lints
* more lints
* more lints
* Restored a doc comment
* Comment on float eps-eq and adjusted casting
* Rust Flexbuffers
* more serde tests, removed some unsafe
* Redid serde to be map-like and Reader is Display
* Moved iter from Reader to VectorReader
* Serious quickcheck + bugs
* wvo api
* Made types smaller for a reasonable speedup
* redid reading in a way that's a bit faster.
Profiling shows the rust slowdown as building +10%, reading +20%
* src/bin are developer binaries in rust
* Root and Map width are not packed
* key null check is debug only + doc changes
* BuilderOptions
* Documentation
* Documentation
* Moved tests to rust_usage_test
* Moved rust flexbuffers samples to Flatbuffers/samples
* Fixed RustTest
* Fixed for Rust 1.37.0
* Upgraded to rust 1_40_0
* fixed a little-endian-only feature in a test
* 1.40.0
* fixed some benchmarks for bigendian
* Updated .bat file
* misspelling
* Gold Flexbuffer test.
* Serialize,Deserialize, std::error::Error for Errors.
* Undo rustfmt in integration_test.rs
* from_slice instead of from_vec
* Added comments to unsafe blocks
* expanded on comment
* bump
Co-authored-by: CasperN <cneo@google.com>
Diffstat (limited to 'rust')
-rw-r--r-- | rust/flexbuffers/.gitignore | 3 | ||||
-rw-r--r-- | rust/flexbuffers/Cargo.toml | 20 | ||||
-rw-r--r-- | rust/flexbuffers/README.md | 14 | ||||
-rw-r--r-- | rust/flexbuffers/src/bitwidth.rs | 113 | ||||
-rw-r--r-- | rust/flexbuffers/src/builder/map.rs | 118 | ||||
-rw-r--r-- | rust/flexbuffers/src/builder/mod.rs | 404 | ||||
-rw-r--r-- | rust/flexbuffers/src/builder/push.rs | 167 | ||||
-rw-r--r-- | rust/flexbuffers/src/builder/ser.rs | 530 | ||||
-rw-r--r-- | rust/flexbuffers/src/builder/value.rs | 306 | ||||
-rw-r--r-- | rust/flexbuffers/src/builder/vector.rs | 65 | ||||
-rw-r--r-- | rust/flexbuffers/src/flexbuffer_type.rs | 240 | ||||
-rw-r--r-- | rust/flexbuffers/src/lib.rs | 103 | ||||
-rw-r--r-- | rust/flexbuffers/src/reader/de.rs | 250 | ||||
-rw-r--r-- | rust/flexbuffers/src/reader/iter.rs | 63 | ||||
-rw-r--r-- | rust/flexbuffers/src/reader/map.rs | 144 | ||||
-rw-r--r-- | rust/flexbuffers/src/reader/mod.rs | 592 | ||||
-rw-r--r-- | rust/flexbuffers/src/reader/vector.rs | 74 |
17 files changed, 3206 insertions, 0 deletions
diff --git a/rust/flexbuffers/.gitignore b/rust/flexbuffers/.gitignore new file mode 100644 index 00000000..69369904 --- /dev/null +++ b/rust/flexbuffers/.gitignore @@ -0,0 +1,3 @@ +/target +**/*.rs.bk +Cargo.lock diff --git a/rust/flexbuffers/Cargo.toml b/rust/flexbuffers/Cargo.toml new file mode 100644 index 00000000..7dec3c94 --- /dev/null +++ b/rust/flexbuffers/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "flexbuffers" +version = "0.1.0" +authors = ["Casper Neo <cneo@google.com>"] +edition = "2018" +license = "Apache-2.0" + +[dependencies] +serde = "1.0.103" +serde_derive = "*" +byteorder = "1.3.2" +num_enum = "0.4.1" +debug_stub_derive = "0.3.0" +bitflags = "1.2.1" + +[dev-dependencies] +bencher = "*" +quickcheck = "*" +quickcheck_derive = "*" +rand = "*" diff --git a/rust/flexbuffers/README.md b/rust/flexbuffers/README.md new file mode 100644 index 00000000..c076c0e4 --- /dev/null +++ b/rust/flexbuffers/README.md @@ -0,0 +1,14 @@ +# Flexbuffers + +[Flexbuffers](https://google.github.io/flatbuffers/flexbuffers.html) is a +schema-less binary format developed at Google. FlexBuffers can be accessed +without parsing, copying, or allocation. This is a huge win for efficiency, +memory friendly-ness, and allows for unique use cases such as mmap-ing large +amounts of free-form data. + +FlexBuffers' design and implementation allows for a very compact encoding, +with automatic sizing of containers to their smallest possible representation +(8/16/32/64 bits). Many values and offsets can be encoded in just 8 bits. + +Flexbuffers is the schema-less cousin of +[Flatbuffers](https://google.github.io/flatbuffers/). diff --git a/rust/flexbuffers/src/bitwidth.rs b/rust/flexbuffers/src/bitwidth.rs new file mode 100644 index 00000000..8e0bfedb --- /dev/null +++ b/rust/flexbuffers/src/bitwidth.rs @@ -0,0 +1,113 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bitwidth::BitWidth::*; +use std::slice::Iter; + +/// Represents the size of Flexbuffers data. +/// +/// Flexbuffers automatically compresses numbers to the smallest possible width +/// (`250u64` is stored as `250u8`). +#[derive( + Debug, + Clone, + Copy, + PartialEq, + Eq, + PartialOrd, + Serialize, + Deserialize, + Ord, + num_enum::TryFromPrimitive, +)] +#[repr(u8)] +pub enum BitWidth { + W8 = 0, + W16 = 1, + W32 = 2, + W64 = 3, +} +impl BitWidth { + pub(crate) fn iter() -> Iter<'static, Self> { + [W8, W16, W32, W64].iter() + } + pub fn n_bytes(self) -> usize { + 1 << self as usize + } + pub fn from_nbytes(n: impl std::convert::Into<usize>) -> Option<Self> { + match n.into() { + 1 => Some(W8), + 2 => Some(W16), + 4 => Some(W32), + 8 => Some(W64), + _ => None, + } + } +} + +impl Default for BitWidth { + fn default() -> Self { + W8 + } +} + +// TODO(cneo): Overloading with `from` is probably not the most readable idea in hindsight. +macro_rules! impl_bitwidth_from { + ($from: ident, $w64: ident, $w32: ident, $w16: ident, $w8: ident) => { + impl From<$from> for BitWidth { + fn from(x: $from) -> BitWidth { + let x = x as $w64; + if x >= $w8::min_value() as $w64 && x <= $w8::max_value() as $w64 { + return W8; + } + if x >= $w16::min_value() as $w64 && x <= $w16::max_value() as $w64 { + return W16; + } + if x >= $w32::min_value() as $w64 && x <= $w32::max_value() as $w64 { + return W32; + } + W64 + } + } + }; +} +impl_bitwidth_from!(u64, u64, u32, u16, u8); +impl_bitwidth_from!(usize, u64, u32, u16, u8); +impl_bitwidth_from!(i64, i64, i32, i16, i8); + +#[allow(clippy::float_cmp)] +impl From<f64> for BitWidth { + fn from(x: f64) -> BitWidth { + if x != x as f32 as f64 { + W64 + } else { + W32 + } + } +} +impl From<f32> for BitWidth { + fn from(_: f32) -> BitWidth { + W32 + } +} + +/// Zero pad `v` until `T` will be byte aligned when pushed. +pub fn align(buffer: &mut Vec<u8>, width: BitWidth) { + let bytes = 1 << width as u8; + let alignment = (bytes - buffer.len() % bytes) % bytes; + // Profiling reveals the loop is faster than Vec::resize. + for _ in 0..alignment as usize { + buffer.push(0); + } +} diff --git a/rust/flexbuffers/src/builder/map.rs b/rust/flexbuffers/src/builder/map.rs new file mode 100644 index 00000000..1635f648 --- /dev/null +++ b/rust/flexbuffers/src/builder/map.rs @@ -0,0 +1,118 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{Builder, Pushable, Value, VectorBuilder}; + +/// Builds a Flexbuffer map, returned by a [Builder](struct.Builder.html). +/// +/// ## Side effect when dropped: +/// When this is dropped, or `end_map` is called, the map is +/// commited to the buffer. If this map is the root of the flexbuffer, then the +/// root is written and the flexbuffer is complete. +/// ## Panics: +/// - Duplicate keys will result in a panic in both debug and release mode. +/// - Keys with internal nulls results in a panic in debug mode and result in silent truncaction +/// in release mode. +pub struct MapBuilder<'a> { + pub(super) builder: &'a mut Builder, + // If the root is this map then start == None. Otherwise start is the + // number of values in the 'values stack' before adding this map. + pub(super) start: Option<usize>, +} +impl<'a> MapBuilder<'a> { + /// Push `p` onto this map with key `key`. + /// This will panic (in debug mode) if `key` contains internal nulls. + #[inline] + pub fn push<P: Pushable>(&mut self, key: &str, p: P) { + self.builder.push_key(key); + self.builder.push(p); + } + /// Starts a nested vector that will be pushed onto this map + /// with key `key` when it is dropped. + /// + /// This will panic (in debug mode) if `key` contains internal nulls. + #[inline] + pub fn start_vector(&mut self, key: &str) -> VectorBuilder { + // Push the key that refers to this nested vector. + self.builder.push_key(key); + // Nested vector. + let start = Some(self.builder.values.len()); + VectorBuilder { + builder: &mut self.builder, + start, + } + } + /// Starts a nested map which that will be pushed onto this map + /// with key `key` when it is dropped. + /// + /// This will panic (in debug mode) if `key` contains internal nulls. + #[inline] + pub fn start_map(&mut self, key: &str) -> MapBuilder { + // Push the key that refers to this nested vector. + self.builder.push_key(key); + // Nested map. + let start = Some(self.builder.values.len()); + MapBuilder { + builder: &mut self.builder, + start, + } + } + /// `end_map` sorts the map by key and writes it to the buffer. This happens anyway + /// when the map builder is dropped. + #[inline] + pub fn end_map(self) {} +} +impl<'a> Drop for MapBuilder<'a> { + #[inline] + fn drop(&mut self) { + self.builder.end_map_or_vector(true, self.start); + } +} + +// Read known keys / strings as iterators over bytes -- skipping utf8 validation and strlen. +pub(super) fn get_key(buffer: &[u8], address: usize) -> impl Iterator<Item = &u8> { + buffer[address..].iter().take_while(|&&b| b != b'\0') +} + +// `values` is assumed to be of the format [key1, value1, ..., keyN, valueN]. +// The keys refer to cstrings in `buffer`. When this function returns, +// `values` is sorted in place by key. +pub(super) fn sort_map_by_keys(values: &mut [Value], buffer: &[u8]) { + debug_assert_eq!(values.len() % 2, 0); + debug_assert!(values.iter().step_by(2).all(Value::is_key)); + let raw_pairs = values.as_mut_ptr() as *mut [Value; 2]; + let pairs_len = values.len() / 2; + // Unsafe code needed to treat the slice as key-value pairs when sorting in place. This is + // preferred over custom sorting or adding another dependency. By construction, this part + // of the values stack must be alternating (key, value) pairs. The public API must not be + // able to trigger the above debug_assets that protect this unsafe usage. + let pairs: &mut [[Value; 2]] = + unsafe { std::slice::from_raw_parts_mut(raw_pairs, pairs_len) }; + #[rustfmt::skip] + pairs.sort_unstable_by(|[key1, _], [key2, _]| { + if let Value::Key(a1) = *key1 { + if let Value::Key(a2) = *key2 { + let s1 = get_key(buffer, a1); + let s2 = get_key(buffer, a2); + let ord = s1.cmp(s2); + if ord == std::cmp::Ordering::Equal { + let dup: String = get_key(buffer, a1).map(|&b| b as char).collect(); + panic!("Duplicated key in map {:?}", dup); + } + return ord; + } + } + unreachable!(); + }); +} diff --git a/rust/flexbuffers/src/builder/mod.rs b/rust/flexbuffers/src/builder/mod.rs new file mode 100644 index 00000000..e71acd0d --- /dev/null +++ b/rust/flexbuffers/src/builder/mod.rs @@ -0,0 +1,404 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bitwidth::{align, BitWidth}; +mod value; +use crate::FlexBufferType; +use std::cmp::max; +use value::{find_vector_type, store_value, Value}; +mod map; +mod push; +mod ser; +mod vector; +use map::sort_map_by_keys; +pub use map::MapBuilder; +pub use push::Pushable; +pub use ser::{Error, FlexbufferSerializer}; +pub use vector::VectorBuilder; + +macro_rules! push_slice { + ($push_name: ident, $scalar: ty, $Val: ident, $new_vec: ident) => { + fn $push_name<T, S>(&mut self, xs: S) + where + T: Into<$scalar> + Copy, + S: AsRef<[T]> + { + let mut value = Value::$new_vec(xs.as_ref().len()); + let mut width = xs.as_ref() + .iter() + .map(|x| BitWidth::from((*x).into())) + .max() + .unwrap_or_default(); + if !value.is_fixed_length_vector() { + let length = Value::UInt(xs.as_ref().len() as u64); + width = std::cmp::max(width, length.width_or_child_width()); + align(&mut self.buffer, width); + store_value(&mut self.buffer, length, width); + } else { + align(&mut self.buffer, width); + } + let address = self.buffer.len(); + for &x in xs.as_ref().iter() { + store_value(&mut self.buffer, Value::$Val(x.into()), width); + } + value.set_address_or_panic(address); + value.set_child_width_or_panic(width); + self.values.push(value); + } + } +} +macro_rules! push_indirect { + ($push_name: ident, $scalar: ty, $Direct: ident, $Indirect: ident) => { + fn $push_name<T: Into<$scalar>>(&mut self, x: T) { + let x = Value::$Direct(x.into()); + let child_width = x.width_or_child_width(); + let address = self.buffer.len(); + store_value(&mut self.buffer, x, child_width); + self.values.push( + Value::Reference { + address, + child_width, + fxb_type: FlexBufferType::$Indirect, + } + ); + } + } +} + +bitflags! { + /// Options for sharing data within a flexbuffer. + /// + /// These increase serialization time but decrease the size of the resulting buffer. By + /// default, `SHARE_KEYS`. You may wish to turn on `SHARE_STRINGS` if you know your data has + /// many duplicate strings or `SHARE_KEY_VECTORS` if your data has many maps with identical + /// keys. + /// + /// ## Not Yet Implemented + /// - `SHARE_STRINGS` + /// - `SHARE_KEY_VECTORS` + pub struct BuilderOptions: u8 { + const SHARE_NONE = 0; + const SHARE_KEYS = 1; + const SHARE_STRINGS = 2; + const SHARE_KEYS_AND_STRINGS = 3; + const SHARE_KEY_VECTORS = 4; + const SHARE_ALL = 7; + } +} +impl Default for BuilderOptions { + fn default() -> Self { + Self::SHARE_KEYS + } +} + +#[derive(Debug, Clone, Copy)] +// Address of a Key inside of the buffer. +struct CachedKey(usize); + +/// **Use this struct to build a Flexbuffer.** +/// +/// Flexbuffers may only have a single root value, which may be constructed +/// with one of the following functions. +/// * `build_singleton` will push 1 value to the buffer and serialize it as the root. +/// * `start_vector` returns a `VectorBuilder`, into which many (potentially +/// heterogenous) values can be pushed. The vector itself is the root and is serialized +/// when the `VectorBuilder` is dropped (or `end` is called). +/// * `start_map` returns a `MapBuilder`, which is similar to a `VectorBuilder` except +/// every value must be pushed with an associated key. The map is serialized when the +/// `MapBuilder` is dropped (or `end` is called). +/// +/// These functions reset and overwrite the Builder which means, while there are no +/// active `MapBuilder` or `VectorBuilder`, the internal buffer is empty or contains a +/// finished Flexbuffer. The internal buffer is accessed with `view`. +#[derive(Debug, Clone)] +pub struct Builder { + buffer: Vec<u8>, + values: Vec<Value>, + key_pool: Option<Vec<CachedKey>>, +} +impl Default for Builder { + fn default() -> Self { + let opts = Default::default(); + Builder::new(opts) + } +} + +impl<'a> Builder { + pub fn new(opts: BuilderOptions) -> Self { + let key_pool = if opts.contains(BuilderOptions::SHARE_KEYS) { + Some(vec![]) + } else { + None + }; + Builder { + key_pool, + values: Vec::new(), + buffer: Vec::new(), + } + } + /// Shows the internal flexbuffer. It will either be empty or populated with the most + /// recently built flexbuffer. + pub fn view(&self) -> &[u8] { + &self.buffer + } + /// Returns the internal buffer, replacing it with a new vector. The returned buffer will + /// either be empty or populated with the most recently built flexbuffer. + pub fn take_buffer(&mut self) -> Vec<u8> { + let mut b = Vec::new(); + std::mem::swap(&mut self.buffer, &mut b); + b + } + /// Resets the internal state. Automatically called before building a new flexbuffer. + pub fn reset(&mut self) { + self.buffer.clear(); + self.values.clear(); + if let Some(pool) = self.key_pool.as_mut() { + pool.clear(); + } + } + fn push_key(&mut self, key: &str) { + debug_assert!( + key.bytes().all(|b| b != b'\0'), + "Keys must not have internal nulls." + ); + // Search key pool if there is one. + let found = self.key_pool.as_ref().map(|pool| { + pool.binary_search_by(|&CachedKey(addr)| { + let old_key = map::get_key(&self.buffer, addr); + old_key.cloned().cmp(key.bytes()) + }) + }); + let address = if let Some(Ok(idx)) = found { + // Found key in key pool. + self.key_pool.as_ref().unwrap()[idx].0 + } else { + // Key not in pool (or no pool). + let address = self.buffer.len(); + self.buffer.extend_from_slice(key.as_bytes()); + self.buffer.push(b'\0'); + address + }; + if let Some(Err(idx)) = found { + // Insert into key pool. + let pool = self.key_pool.as_mut().unwrap(); + pool.insert(idx, CachedKey(address)); + } + self.values.push(Value::Key(address)); + } + fn push_uint<T: Into<u64>>(&mut self, x: T) { + self.values.push(Value::UInt(x.into())); + } + fn push_int<T: Into<i64>>(&mut self, x: T) { + self.values.push(Value::Int(x.into())); + } + fn push_float<T: Into<f64>>(&mut self, x: T) { + self.values.push(Value::Float(x.into())); + } + fn push_null(&mut self) { + self.values.push(Value::Null); + } + fn push_bool(&mut self, x: bool) { + self.values.push(Value::Bool(x)); + } + fn store_blob(&mut self, xs: &[u8]) -> Value { + let length = Value::UInt(xs.len() as u64); + let width = length.width_or_child_width(); + align(&mut self.buffer, width); + store_value(&mut self.buffer, length, width); + let address = self.buffer.len(); + self.buffer.extend_from_slice(xs); + Value::Reference { + fxb_type: FlexBufferType::Blob, + address, + child_width: width, + } + } + fn push_str(&mut self, x: &str) { + let mut string = self.store_blob(x.as_bytes()); + self.buffer.push(b'\0'); + string.set_fxb_type_or_panic(FlexBufferType::String); + self.values.push(string); + } + fn push_blob(&mut self, x: &[u8]) { + let blob = self.store_blob(x); + self.values.push(blob); + } + fn push_bools(&mut self, xs: &[bool]) { + let length = Value::UInt(xs.len() as u64); + let width = length.width_or_child_width(); + align(&mut self.buffer, width); + store_value(&mut self.buffer, length, width); + let address = self.buffer.len(); + for &b in xs.iter() { + self.buffer.push(b as u8); + for _ in 0..width as u8 { + self.buffer.push(0); // Well this seems wasteful. + } + } + self.values.push(Value::Reference { + fxb_type: FlexBufferType::VectorBool, + address, + child_width: width, + }); + } + + push_slice!(push_uints, u64, UInt, new_uint_vector); + push_slice!(push_ints, i64, Int, new_int_vector); + push_slice!(push_floats, f64, Float, new_float_vector); + push_indirect!(push_indirect_int, i64, Int, IndirectInt); + push_indirect!(push_indirect_uint, u64, UInt, IndirectUInt); + push_indirect!(push_indirect_float, f64, Float, IndirectFloat); + + /// Resets the builder and starts a new flexbuffer with a vector at the root. + /// The exact Flexbuffer vector type is dynamically inferred. + pub fn start_vector(&'a mut self) -> VectorBuilder<'a> { + self.reset(); + VectorBuilder { + builder: self, + start: None, + } + } + /// Resets the builder and builds a new flexbuffer with a map at the root. + pub fn start_map(&'a mut self) -> MapBuilder<'a> { + self.reset(); + MapBuilder { + builder: self, + start: None, + } + } + /// Resets the builder and builds a new flexbuffer with the pushed value at the root. + pub fn build_singleton<P: Pushable>(&mut self, p: P) { + self.reset(); + p.push_to_builder(self); + let root = self.values.pop().unwrap(); + store_root(&mut self.buffer, root); + } + fn push<P: Pushable>(&mut self, p: P) { + p.push_to_builder(self); + } + /// Stores the values past `previous_end` as a map or vector depending on `is_map`. + /// If `previous_end` is None then this was a root map / vector and the last value + /// is stored as the root. + fn end_map_or_vector(&mut self, is_map: bool, previous_end: Option<usize>) { + let split = previous_end.unwrap_or(0); + let value = if is_map { + let key_vals = &mut self.values[split..]; + sort_map_by_keys(key_vals, &self.buffer); + let key_vector = store_vector(&mut self.buffer, key_vals, StoreOption::MapKeys); + store_vector(&mut self.buffer, key_vals, StoreOption::Map(key_vector)) + } else { + store_vector(&mut self.buffer, &self.values[split..], StoreOption::Vector) + }; + self.values.truncate(split); + if previous_end.is_some() { + self.values.push(value); + } else { + store_root(&mut self.buffer, value); + } + } +} + +/// Builds a Flexbuffer with the single pushed value as the root. +pub fn singleton<P: Pushable>(p: P) -> Vec<u8> { + let mut b = Builder::default(); + b.build_singleton(p); + let Builder { buffer, .. } = b; + buffer +} + +/// Stores the root value, root type and root width. +/// This should be called to finish the Flexbuffer. +fn store_root(buffer: &mut Vec<u8>, root: Value) { + let root_width = root.width_in_vector(buffer.len(), 0); + align(buffer, root_width); + store_value(buffer, root, root_width); + buffer.push(root.packed_type(root_width)); + buffer.push(root_width.n_bytes() as u8); +} + +pub enum StoreOption { + Vector, + Map(Value), + MapKeys, +} +/// Writes a Flexbuffer Vector or Map. +/// StoreOption::Map(Keys) must be a Value::Key or this will panic. +// #[inline(always)] +pub fn store_vector(buffer: &mut Vec<u8>, values: &[Value], opt: StoreOption) -> Value { + let (skip, stride) = match opt { + StoreOption::Vector => (0, 1), + StoreOption::MapKeys => (0, 2), + StoreOption::Map(_) => (1, 2), + }; + let iter_values = || values.iter().skip(skip).step_by(stride); + + // Figure out vector type and how long is the prefix. + let mut result = if let StoreOption::Map(_) = opt { + Value::new_map() + } else { + find_vector_type(iter_values()) + }; + let length_slot = if !result.is_fixed_length_vector() { + let length = iter_values().count(); + Some(Value::UInt(length as u64)) + } else { + None + }; + // Measure required width and align to it. + let mut width = BitWidth::W8; + if let StoreOption::Map(keys) = opt { + width = max(width, keys.width_in_vector(buffer.len(), 0)) + } + if let Some(l) = length_slot { + width = max(width, l.width_or_child_width()); + } + let prefix_length = result.prefix_length(); + for (i, &val) in iter_values().enumerate() { + width = max(width, val.width_in_vector(buffer.len(), i + prefix_length)); + } + align(buffer, width); + #[allow(deprecated)] + { + debug_assert_ne!( + result.fxb_type(), + FlexBufferType::VectorString, + "VectorString is deprecated and cannot be written.\ + (https://github.com/google/flatbuffers/issues/5627)" + ); + } + // Write Prefix. + if let StoreOption::Map(keys) = opt { + let key_width = Value::UInt(keys.width_or_child_width().n_bytes() as u64); + store_value(buffer, keys, width); + store_value(buffer, key_width, width); + } + if let Some(len) = length_slot { + store_value(buffer, len, width); + } + // Write data. + let address = buffer.len(); + for &v in iter_values() { + store_value(buffer, v, width); + } + // Write types + if result.is_typed_vector_or_map() { + for v in iter_values() { + buffer.push(v.packed_type(width)); + } + } + // Return Value representing this Vector. + result.set_address_or_panic(address); + result.set_child_width_or_panic(width); + result +} diff --git a/rust/flexbuffers/src/builder/push.rs b/rust/flexbuffers/src/builder/push.rs new file mode 100644 index 00000000..d22b47f1 --- /dev/null +++ b/rust/flexbuffers/src/builder/push.rs @@ -0,0 +1,167 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::builder::Builder; +use crate::private::Sealed; +use crate::{Blob, IndirectFloat, IndirectInt, IndirectUInt}; + +impl<'a> Sealed for Blob<'a> {} +impl Sealed for () {} + +// TODO: String interning +// TODO: Pushable for Map types? + +/// Types that implement the Pushable trait can be written into a Flexbuffer. +/// +/// All Rust's standard numbers, `u8, u16, u32, u64, i8, i16, i32, i64, f32, f64`, +/// can all be pushed. They are `FlexBufferType::{UInt, Int, Float}`. +/// Flexbuffers chooses the smallest width that can represent the given number. +/// Strings can pe pushed, they become `FlexBufferType::String` and are stored +/// with both a length and null terminator. +/// +/// * For convenience and speed push typed vectors using rust arrays and slices. +/// Doing so will immediately serialize the data, skipping the `Builder`'s +/// internal cache. +/// +/// * Pushable cannot not be implemented by any downstream crates. +pub trait Pushable: Sealed + Sized { + fn push_to_builder(self, _: &mut Builder) {} +} + +impl Pushable for () { + fn push_to_builder(self, builder: &mut Builder) { + builder.push_null(); + } +} +impl<'a> Pushable for Blob<'a> { + fn push_to_builder(self, builder: &mut Builder) { + builder.push_blob(self.0); + } +} + +macro_rules! forward_to_builder { + ($T: ty, $method: ident) => { + impl Sealed for $T {} + impl Pushable for $T { + fn push_to_builder(self, builder: &mut Builder) { + builder.$method(self); + } + } + }; + ($T: ty, $method: ident, $asT: ty) => { + impl Sealed for $T {} + impl Pushable for $T { + fn push_to_builder(self, builder: &mut Builder) { + builder.$method(self as $asT); + } + } + }; +} +forward_to_builder!(&str, push_str); +forward_to_builder!(bool, push_bool); +forward_to_builder!(u8, push_uint); +forward_to_builder!(u16, push_uint); +forward_to_builder!(u32, push_uint); +forward_to_builder!(u64, push_uint); +forward_to_builder!(i8, push_int); +forward_to_builder!(i16, push_int); +forward_to_builder!(i32, push_int); +forward_to_builder!(i64, push_int); +forward_to_builder!(f32, push_float); +forward_to_builder!(f64, push_float); +forward_to_builder!(&[u8], push_uints); +forward_to_builder!(&[u16], push_uints); +forward_to_builder!(&[u32], push_uints); +forward_to_builder!(&[u64], push_uints); +forward_to_builder!(&[i8], push_ints); +forward_to_builder!(&[i16], push_ints); +forward_to_builder!(&[i32], push_ints); +forward_to_builder!(&[i64], push_ints); +forward_to_builder!(&[f32], push_floats); +forward_to_builder!(&[f64], push_floats); +forward_to_builder!(&[bool], push_bools); +forward_to_builder!(&Vec<u8>, push_uints); +forward_to_builder!(&Vec<u16>, push_uints); +forward_to_builder!(&Vec<u32>, push_uints); +forward_to_builder!(&Vec<u64>, push_uints); +forward_to_builder!(&Vec<i8>, push_ints); +forward_to_builder!(&Vec<i16>, push_ints); +forward_to_builder!(&Vec<i32>, push_ints); +forward_to_builder!(&Vec<i64>, push_ints); +forward_to_builder!(&Vec<f32>, push_floats); +forward_to_builder!(&Vec<f64>, push_floats); +forward_to_builder!(&Vec<bool>, push_bools); + +macro_rules! impl_indirects { + ($Indirect: ident, $method: ident) => { + impl Sealed for $Indirect {} + impl Pushable for $Indirect { + fn push_to_builder(self, builder: &mut Builder) { + builder.$method(self.0); + } + } + }; +} +impl_indirects!(IndirectInt, push_indirect_int); +impl_indirects!(IndirectUInt, push_indirect_uint); +impl_indirects!(IndirectFloat, push_indirect_float); + +macro_rules! impl_arrays { + ($num: expr) => { + forward_to_builder!(&[u8; $num], push_uints, &[u8]); + forward_to_builder!(&[u16; $num], push_uints, &[u16]); + forward_to_builder!(&[u32; $num], push_uints, &[u32]); + forward_to_builder!(&[u64; $num], push_uints, &[u64]); + forward_to_builder!(&[i8; $num], push_ints, &[i8]); + forward_to_builder!(&[i16; $num], push_ints, &[i16]); + forward_to_builder!(&[i32; $num], push_ints, &[i32]); + forward_to_builder!(&[i64; $num], push_ints, &[i64]); + forward_to_builder!(&[f32; $num], push_floats, &[f32]); + forward_to_builder!(&[f64; $num], push_floats, &[f64]); + forward_to_builder!(&[bool; $num], push_bools, &[bool]); + }; +} +impl_arrays!(0); +impl_arrays!(1); +impl_arrays!(2); +impl_arrays!(3); +impl_arrays!(4); +impl_arrays!(5); +impl_arrays!(6); +// impl_arrays!(7); +// impl_arrays!(8); +// impl_arrays!(9); +// impl_arrays!(10); +// impl_arrays!(11); +// impl_arrays!(12); +// impl_arrays!(13); +// impl_arrays!(14); +// impl_arrays!(15); +// impl_arrays!(16); +// impl_arrays!(17); +// impl_arrays!(18); +// impl_arrays!(19); +// impl_arrays!(20); +// impl_arrays!(21); +// impl_arrays!(22); +// impl_arrays!(23); +// impl_arrays!(24); +// impl_arrays!(25); +// impl_arrays!(26); +// impl_arrays!(27); +// impl_arrays!(28); +// impl_arrays!(29); +// impl_arrays!(30); +// impl_arrays!(31); +// impl_arrays!(32); diff --git a/rust/flexbuffers/src/builder/ser.rs b/rust/flexbuffers/src/builder/ser.rs new file mode 100644 index 00000000..a0180b67 --- /dev/null +++ b/rust/flexbuffers/src/builder/ser.rs @@ -0,0 +1,530 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::Builder; +use serde::ser; +use serde::ser::*; +use std::fmt::Display; + +// This struct internally tracks the nested vectors representing +// nested structs and such. +// TODO: Add an option field names in a map. +/// Flexbuffer Serializer. This should be used to serialize structs. +#[derive(Debug, Default)] +pub struct FlexbufferSerializer { + builder: Builder, + nesting: Vec<Option<usize>>, +} +impl FlexbufferSerializer { + pub fn new() -> Self { + Self::default() + } + pub fn view(&self) -> &[u8] { + self.builder.view() + } + pub fn take_buffer(&mut self) -> Vec<u8> { + self.builder.take_buffer() + } + fn finish_if_not_nested(&mut self) -> Result<(), Error> { + if self.nesting.is_empty() { + assert_eq!(self.builder.values.len(), 1); + let root = self.builder.values.pop().unwrap(); + super::store_root(&mut self.builder.buffer, root); + } + Ok(()) + } + fn start_vector(&mut self) { + let previous_end = if self.nesting.is_empty() { + None + } else { + Some(self.builder.values.len()) + }; + self.nesting.push(previous_end); + } + fn start_map(&mut self) { + let previous_end = if self.nesting.is_empty() { + None + } else { + Some(self.builder.values.len()) + }; + self.nesting.push(previous_end); + } + fn end_vector(&mut self) -> Result<(), Error> { + let previous_end = self.nesting.pop().unwrap(); + self.builder.end_map_or_vector(false, previous_end); + Ok(()) + } + fn end_map(&mut self) -> Result<(), Error> { + let previous_end = self.nesting.pop().unwrap(); + self.builder.end_map_or_vector(true, previous_end); + Ok(()) + } +} + +#[derive(Debug)] +/// Errors that may happen with Serde. +pub enum Error { + /// Only `str` and `String` can be serialized as keys in serde maps. + KeyMustBeString, + Serde(String), +} + +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + write!(f, "{:?}", self) + } +} +impl std::error::Error for Error {} +impl ser::Error for Error { + fn custom<T>(msg: T) -> Self + where + T: Display, + { + Self::Serde(format!("{}", msg)) + } +} +impl<'a> ser::SerializeSeq for &mut FlexbufferSerializer { + type Ok = (); + type Error = Error; + fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + value.serialize(&mut **self) + } + fn end(self) -> Result<Self::Ok, Self::Error> { + self.end_vector() + } +} +// This is unlike a flexbuffers map which requires CString like keys. +// Its implemented as alternating keys and values (hopefully). +impl<'a> ser::SerializeMap for &'a mut FlexbufferSerializer { + type Ok = (); + type Error = Error; + fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + key.serialize(MapKeySerializer(&mut **self)) + } + fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + value.serialize(&mut **self) + } + fn end(self) -> Result<Self::Ok, Self::Error> { + self.end_map() + } +} +impl<'a> ser::SerializeTuple for &mut FlexbufferSerializer { + type Ok = (); + type Error = Error; + fn serialize_element<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + value.serialize(&mut **self) + } + fn end(self) -> Result<Self::Ok, Self::Error> { + self.end_vector() + } +} +impl<'a> ser::SerializeTupleStruct for &mut FlexbufferSerializer { + type Ok = (); + type Error = Error; + fn serialize_field<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + value.serialize(&mut **self) + } + fn end(self) -> Result<Self::Ok, Self::Error> { + self.end_vector() + } +} +impl<'a> ser::SerializeStruct for &mut FlexbufferSerializer { + type Ok = (); + type Error = Error; + fn serialize_field<T: ?Sized>( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), Self::Error> + where + T: Serialize, + { + self.builder.push_key(key); + value.serialize(&mut **self) + } + fn end(self) -> Result<Self::Ok, Self::Error> { + self.end_map() + } +} +impl<'a> ser::SerializeTupleVariant for &mut FlexbufferSerializer { + type Ok = (); + type Error = Error; + fn serialize_field<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error> + where + T: Serialize, + { + value.serialize(&mut **self) + } + fn end(self) -> Result<Self::Ok, Self::Error> { + self.end_vector()?; + self.end_map() + } +} +impl<'a> ser::SerializeStructVariant for &mut FlexbufferSerializer { + type Ok = (); + type Error = Error; + fn serialize_field<T: ?Sized>( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), Self::Error> + where + T: Serialize, + { + self.builder.push_key(key); + value.serialize(&mut **self) + } + fn end(self) -> Result<Self::Ok, Self::Error> { + self.end_map()?; + self.end_map() + } + // TODO: skip field? +} + +impl<'a> ser::Serializer for &'a mut FlexbufferSerializer { + type SerializeSeq = &'a mut FlexbufferSerializer; + type SerializeTuple = &'a mut FlexbufferSerializer; + type SerializeTupleStruct = &'a mut FlexbufferSerializer; + type SerializeTupleVariant = &'a mut FlexbufferSerializer; + type SerializeMap = &'a mut FlexbufferSerializer; + type SerializeStruct = &'a mut FlexbufferSerializer; + type SerializeStructVariant = &'a mut FlexbufferSerializer; + type Ok = (); + type Error = Error; + fn serialize_bool(self, v: bool) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_i8(self, v: i8) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_i16(self, v: i16) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_i32(self, v: i32) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_i64(self, v: i64) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_u8(self, v: u8) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_u16(self, v: u16) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_u32(self, v: u32) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_u64(self, v: u64) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_f32(self, v: f32) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_f64(self, v: f64) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_char(self, v: char) -> Result<Self::Ok, Self::Error> { + self.builder.push(v as u8); + self.finish_if_not_nested() + } + fn serialize_str(self, v: &str) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_bytes(self, v: &[u8]) -> Result<Self::Ok, Self::Error> { + self.builder.push(v); + self.finish_if_not_nested() + } + fn serialize_none(self) -> Result<Self::Ok, Self::Error> { + self.builder.push(()); + self.finish_if_not_nested() + } + fn serialize_some<T: ?Sized>(self, t: &T) -> Result<Self::Ok, Self::Error> + where + T: Serialize, + { + t.serialize(self) + } + fn serialize_unit(self) -> Result<Self::Ok, Self::Error> { + self.builder.push(()); + self.finish_if_not_nested() + } + fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> { + self.builder.push(()); + self.finish_if_not_nested() + } + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result<Self::Ok, Self::Error> { + self.builder.push(variant); + self.finish_if_not_nested() + } + fn serialize_newtype_struct<T: ?Sized>( + self, + _name: &'static str, + value: &T, + ) -> Result<Self::Ok, Self::Error> + where + T: Serialize, + { + value.serialize(self) + } + fn serialize_newtype_variant<T: ?Sized>( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + value: &T, + ) -> Result<Self::Ok, Self::Error> + where + T: Serialize, + { + self.start_map(); + self.builder.push_key(variant); + value.serialize(&mut *self)?; + self.end_map() + } + fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> { + self.start_vector(); + Ok(self) + } + fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> { + self.start_vector(); + Ok(self) + } + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result<Self::SerializeTupleStruct, Self::Error> { + self.start_map(); + Ok(self) + } + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeTupleVariant, Self::Error> { + self.start_map(); + self.builder.push_key(variant); + self.start_vector(); + Ok(self) + } + fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> { + self.start_map(); + Ok(self) + } + fn serialize_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result<Self::SerializeStruct, Self::Error> { + self.start_map(); + Ok(self) + } + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeStructVariant, Self::Error> { + self.start_map(); + self.builder.push_key(variant); + self.start_map(); + Ok(self) + } +} + +fn key_must_be_a_string<T>() -> Result<T, Error> { + Err(Error::KeyMustBeString) +} +struct MapKeySerializer<'a>(&'a mut FlexbufferSerializer); +impl<'a> Serializer for MapKeySerializer<'a> { + type Ok = (); + type Error = Error; + #[inline] + fn serialize_str(self, value: &str) -> Result<(), Error> { + self.0.builder.push_key(value); + Ok(()) + } + #[inline] + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + variant: &'static str, + ) -> Result<(), Error> { + self.0.builder.push_key(variant); + Ok(()) + } + #[inline] + fn serialize_newtype_struct<T: ?Sized>( + self, + _name: &'static str, + value: &T, + ) -> Result<(), Error> + where + T: Serialize, + { + value.serialize(self) + } + type SerializeSeq = Impossible<(), Error>; + type SerializeTuple = Impossible<(), Error>; + type SerializeTupleStruct = Impossible<(), Error>; + type SerializeTupleVariant = Impossible<(), Error>; + type SerializeMap = Impossible<(), Error>; + type SerializeStruct = Impossible<(), Error>; + type SerializeStructVariant = Impossible<(), Error>; + + fn serialize_bool(self, _value: bool) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_i8(self, _value: i8) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_i16(self, _value: i16) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_i32(self, _value: i32) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_i64(self, _value: i64) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_u8(self, _value: u8) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_u16(self, _value: u16) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_u32(self, _value: u32) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_u64(self, _value: u64) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_f32(self, _value: f32) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_f64(self, _value: f64) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_char(self, _value: char) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_bytes(self, _value: &[u8]) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_unit(self) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_unit_struct(self, _name: &'static str) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_newtype_variant<T: ?Sized>( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T, + ) -> Result<(), Error> + where + T: Serialize, + { + key_must_be_a_string() + } + fn serialize_none(self) -> Result<(), Error> { + key_must_be_a_string() + } + fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<(), Error> + where + T: Serialize, + { + key_must_be_a_string() + } + fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Error> { + key_must_be_a_string() + } + fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Error> { + key_must_be_a_string() + } + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result<Self::SerializeTupleStruct, Error> { + key_must_be_a_string() + } + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeTupleVariant, Error> { + key_must_be_a_string() + } + fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Error> { + key_must_be_a_string() + } + fn serialize_struct( + self, + _name: &'static str, + _len: usize, + ) -> Result<Self::SerializeStruct, Error> { + key_must_be_a_string() + } + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize, + ) -> Result<Self::SerializeStructVariant, Error> { + key_must_be_a_string() + } +} diff --git a/rust/flexbuffers/src/builder/value.rs b/rust/flexbuffers/src/builder/value.rs new file mode 100644 index 00000000..f230c34f --- /dev/null +++ b/rust/flexbuffers/src/builder/value.rs @@ -0,0 +1,306 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use byteorder::{LittleEndian, WriteBytesExt}; + +use crate::bitwidth::BitWidth; +use crate::bitwidth::BitWidth::*; +use crate::flexbuffer_type::FlexBufferType; +use crate::flexbuffer_type::FlexBufferType::*; + +/// Internal representation of FlexBuffer Types and Data before writing. +/// These get placed on the builder's stack and are eventually commited. +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Value { + // Inline types + Null, + Int(i64), + UInt(u64), + Float(f64), + Bool(bool), + /// Null termintated, c_string. Only used with `Map`s. + Key(usize), + /// The other ~20 or so types. + Reference { + address: usize, + child_width: BitWidth, + fxb_type: FlexBufferType, + }, +} + +macro_rules! new_typed_vector { + ($name: ident, $v2: ident, $v3: ident, $v4: ident, $vn: ident) => { + /// Returns a typed vector, fixed length if possible. + /// Address and child width are zero initialized and must be set. + pub fn $name(n: usize) -> Value { + let address = 0; + let child_width = W8; + match n { + 2 => Value::Reference { + address, + child_width, + fxb_type: $v2, + }, + 3 => Value::Reference { + address, + child_width, + fxb_type: $v3, + }, + 4 => Value::Reference { + address, + child_width, + fxb_type: $v4, + }, + _ => Value::Reference { + address, + child_width, + fxb_type: $vn, + }, + } + } + }; +} + +impl Value { + pub fn new_vector() -> Self { + Value::Reference { + address: 0, + child_width: W8, + fxb_type: Vector, + } + } + pub fn new_map() -> Self { + Value::Reference { + address: 0, + child_width: W8, + fxb_type: Map, + } + } + new_typed_vector!( + new_int_vector, + VectorInt2, + VectorInt3, + VectorInt4, + VectorInt + ); + new_typed_vector!( + new_uint_vector, + VectorUInt2, + VectorUInt3, + VectorUInt4, + VectorUInt + ); + new_typed_vector!( + new_float_vector, + VectorFloat2, + VectorFloat3, + VectorFloat4, + VectorFloat + ); + pub fn fxb_type(&self) -> FlexBufferType { + match *self { + Value::Null => Null, + Value::Int(_) => Int, + Value::UInt(_) => UInt, + Value::Float(_) => Float, + Value::Bool(_) => Bool, + Value::Key(_) => Key, + Value::Reference { fxb_type, .. } => fxb_type, + } + } + pub fn is_fixed_length_vector(&self) -> bool { + self.fxb_type().is_fixed_length_vector() + } + pub fn is_inline(&self) -> bool { + self.fxb_type().is_inline() + } + pub fn is_reference(&self) -> bool { + !self.is_inline() + } + pub fn is_key(&self) -> bool { + match self { + Value::Key(_) => true, + _ => false, + } + } + pub fn is_typed_vector_or_map(&self) -> bool { + if let Value::Reference { fxb_type, .. } = self { + fxb_type.is_heterogenous() + } else { + false + } + } + pub fn prefix_length(&self) -> usize { + if self.is_fixed_length_vector() || self.is_inline() { + return 0; + } + if let Value::Reference { fxb_type, .. } = self { + if *fxb_type == Map { + return 3; + } + } + 1 + } + pub fn set_fxb_type_or_panic(&mut self, new_type: FlexBufferType) { + if let Value::Reference { fxb_type, .. } = self { + *fxb_type = new_type; + } else { + panic!("`set_fxb_type_or_panic` called on {:?}", self) + } + } + pub fn set_child_width_or_panic(&mut self, new_width: BitWidth) { + if let Value::Reference { child_width, .. } = self { + *child_width = new_width; + } else { + panic!("`set_child_width_or_panic` called on {:?}", self); + } + } + pub fn get_address(&self) -> Option<usize> { + if let Value::Reference { address, .. } | Value::Key(address) = self { + Some(*address) + } else { + None + } + } + pub fn set_address_or_panic(&mut self, new_address: usize) { + if let Value::Reference { address, .. } | Value::Key(address) = self { + *address = new_address; + } else { + panic!("`set_address_or_panic` called on {:?}", self); + } + } + /// For inline types - the width of the value to be stored. + /// For reference types, the width of the referred. + /// Note Key types always refer to 8 bit data. + pub fn width_or_child_width(&self) -> BitWidth { + match *self { + Value::Int(x) => x.into(), + Value::UInt(x) => x.into(), + Value::Float(x) => x.into(), + Value::Key(_) | Value::Bool(_) | Value::Null => W8, + Value::Reference { child_width, .. } => child_width, + } + } + pub fn relative_address(self, written_at: usize) -> Option<Value> { + self.get_address().map(|address| { + let offset = written_at + .checked_sub(address) + .expect("Error: References may only refer backwards in buffer."); + Value::UInt(offset as u64) + }) + } + /// Computes the minimum required width of `value` when stored in a vector + /// starting at `vector_start` at index `idx` (this index includes the prefix). + /// `Value::Reference{..}` variants require location information because + /// offsets are relative. + pub fn width_in_vector(self, vector_start: usize, idx: usize) -> BitWidth { + match self { + Value::Bool(_) => W8, + Value::Null => W8, + Value::Int(x) => x.into(), + Value::UInt(x) => x.into(), + Value::Float(x) => x.into(), + _ => { + debug_assert!(self.is_reference()); + for &width in BitWidth::iter() { + let bytes = width as usize + 1; + let alignment = (bytes - vector_start % bytes) % bytes; + let written_at = vector_start + alignment + idx * bytes; + // This match must always succeed. + if let Some(Value::UInt(offset)) = self.relative_address(written_at) { + if BitWidth::from(offset) == width { + return width; + } + } + } + unreachable!() + } + } + } + pub fn packed_type(self, parent_width: BitWidth) -> u8 { + let width = if self.is_inline() { + std::cmp::max(parent_width, self.width_or_child_width()) + } else { + self.width_or_child_width() + }; + (self.fxb_type() as u8) << 2 | width as u8 + } +} + +pub fn find_vector_type<'a, T>(mut values: T) -> Value +where + T: std::iter::Iterator<Item = &'a Value>, +{ + let first = values.next(); + if first.is_none() { + return Value::new_vector(); + } + let mut len = 1; + let init = first.unwrap().fxb_type(); + for v in values { + if v.fxb_type() != init { + return Value::new_vector(); + } + len += 1; + } + let vector_type = match init { + Bool => VectorBool, + UInt => return Value::new_uint_vector(len), + Int => return Value::new_int_vector(len), + Float => return Value::new_float_vector(len), + Key => VectorKey, + // Note that VectorString is deprecated for writing + _ => return Value::new_vector(), + }; + Value::Reference { + address: 0, + child_width: W8, + fxb_type: vector_type, + } +} + +#[inline] +pub fn store_value(buffer: &mut Vec<u8>, mut value: Value, width: BitWidth) { + // Remap to number types. + use Value::*; + if let Some(offset) = value.relative_address(buffer.len()) { + value = offset; + } else { + value = match value { + Bool(x) => UInt(x.into()), + Null => UInt(0), // Should this be 0 bytes? + _ => value, + } + } + let write_result = match (value, width) { + (UInt(x), W8) => buffer.write_u8(x as u8), + (UInt(x), W16) => buffer.write_u16::<LittleEndian>(x as u16), + (UInt(x), W32) => buffer.write_u32::<LittleEndian>(x as u32), + (UInt(x), W64) => buffer.write_u64::<LittleEndian>(x), + (Int(x), W8) => buffer.write_i8(x as i8), + (Int(x), W16) => buffer.write_i16::<LittleEndian>(x as i16), + (Int(x), W32) => buffer.write_i32::<LittleEndian>(x as i32), + (Int(x), W64) => buffer.write_i64::<LittleEndian>(x), + (Float(x), W32) => buffer.write_f32::<LittleEndian>(x as f32), + (Float(x), W64) => buffer.write_f64::<LittleEndian>(x), + (Float(_), _) => unreachable!("Error: Flatbuffers does not support 8 and 16 bit floats."), + _ => unreachable!("Variant not considered: {:?}", value), + }; + write_result.unwrap_or_else(|err| { + panic!( + "Error writing value {:?} with width {:?}: {:?}", + value, width, err + ) + }); +} diff --git a/rust/flexbuffers/src/builder/vector.rs b/rust/flexbuffers/src/builder/vector.rs new file mode 100644 index 00000000..4d73da3c --- /dev/null +++ b/rust/flexbuffers/src/builder/vector.rs @@ -0,0 +1,65 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{Builder, MapBuilder, Pushable}; + +/// Builds a Flexbuffer vector, returned by a [Builder](struct.Builder.html). +/// +/// ## Side effect when dropped: +/// When this is dropped, or `end_vector` is called, the vector is +/// commited to the buffer. If this vector is the root of the flexbuffer, then the +/// root is written and the flexbuffer is complete. The FlexBufferType of this vector +/// is determined by the pushed values when this is dropped. The most compact vector type is +/// automatically chosen. +pub struct VectorBuilder<'a> { + pub(crate) builder: &'a mut Builder, + // If the root is this vector then start == None. Otherwise start is the + // number of values in the 'values stack' before adding this vector. + pub(crate) start: Option<usize>, +} +impl<'a> VectorBuilder<'a> { + /// Pushes `p` onto the vector. + #[inline] + pub fn push<P: Pushable>(&mut self, p: P) { + self.builder.push(p); + } + /// Starts a nested vector that will be pushed onto this vector when it is dropped. + #[inline] + pub fn start_vector(&mut self) -> VectorBuilder { + let start = Some(self.builder.values.len()); + VectorBuilder { + builder: &mut self.builder, + start, + } + } + /// Starts a nested map that will be pushed onto this vector when it is dropped. + #[inline] + pub fn start_map(&mut self) -> MapBuilder { + let start = Some(self.builder.values.len()); + MapBuilder { + builder: &mut self.builder, + start, + } + } + /// `end_vector` determines the type of the vector and writes it to the buffer. + /// This will happen automatically if the VectorBuilder is dropped. + #[inline] + pub fn end_vector(self) {} +} +impl<'a> Drop for VectorBuilder<'a> { + #[inline] + fn drop(&mut self) { + self.builder.end_map_or_vector(false, self.start); + } +} diff --git a/rust/flexbuffers/src/flexbuffer_type.rs b/rust/flexbuffers/src/flexbuffer_type.rs new file mode 100644 index 00000000..1a95ac47 --- /dev/null +++ b/rust/flexbuffers/src/flexbuffer_type.rs @@ -0,0 +1,240 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#![allow(deprecated)] +/// Represents all the valid types in a flexbuffer. +/// +/// Flexbuffers supports +/// heterogenous maps, heterogenous vectors, typed vectors, and fixed length +/// typed vectors for some lengths and types. Rust types are converted into +/// Flexbuffers via the [Pushable](trait.Pushable.html) trait. +/// +/// For exact details see the [internals document]( +/// https://google.github.io/flatbuffers/flatbuffers_internals.html) +/// +/// ### Notes: +/// * In the binary format, Each element of a `Map` or (heterogenous) `Vector` +/// is stored with a byte describing its FlexBufferType and BitWidth. +/// +/// * Typed vectors do not store this extra type information and fixed length +/// typed vectors do not store length. Whether a vector is stored as a typed +/// vector or fixed length typed vector is determined dymaically from the +/// given data. +/// +/// * Indirect numbers are stored as an offset instead of inline. Using +/// indirect numbers instead of their inline counterparts in maps and typed +/// vectors can reduce the minimum element width and therefore bytes used. + +#[repr(u8)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize, num_enum::TryFromPrimitive)] +pub enum FlexBufferType { + /// Nulls are represented with `()` in Rust. + Null = 0, + /// Variable width signed integer: `i8, i16, i32, i64` + Int = 1, + /// Variable width unsigned integer: `u8, u16, u32, u64` + UInt = 2, + /// Variable width floating point: `f32, f64` + Float = 3, + Bool = 26, + /// Null termintated, utf8 string. Typically used with `Map`s. + Key = 4, + /// Stored with a unsigned integer length, then UTF-8 bytes, and an extra null terminator that + /// is not counted with the length. + String = 5, + /// An Int, stored by offset rather than inline. Indirect types can keep the bitwidth of a + /// vector or map small when the inline value would have increased the bitwidth. + IndirectInt = 6, + /// A UInt, stored by offset rather than inline. Indirect types can keep the bitwidth of a + /// vector or map small when the inline value would have increased the bitwidth. + IndirectUInt = 7, + /// A Float, stored by offset rather than inline. Indirect types can keep the bitwidth of a + /// vector or map small when the inline value would have increased the bitwidth. + IndirectFloat = 8, + /// Maps are like Vectors except elements are associated with, and sorted by, keys. + Map = 9, + /// Heterogenous Vector (stored with a type table). + Vector = 10, + /// Homogenous Vector of Ints. + VectorInt = 11, + /// Homogenous Vector of UInts. + VectorUInt = 12, + /// Homogenous Vector of Floats. + VectorFloat = 13, + /// Homogenous Vector of Keys. + VectorKey = 14, + /// Homogenous Vector of Strings. + #[deprecated( + note = "Please use Vector or VectorKey instead. See https://github.com/google/flatbuffers/issues/5627" + )] + VectorString = 15, + /// Since the elements of a vector use the same `BitWidth` as the length, + /// Blob is more efficient for >255 element boolean vectors. + VectorBool = 36, + /// Homogenous vector of two Ints + VectorInt2 = 16, + /// Homogenous vector of two UInts + VectorUInt2 = 17, + /// Homogenous vector of two Floats + VectorFloat2 = 18, + /// Homogenous vector of three Ints + VectorInt3 = 19, + /// Homogenous vector of three UInts + VectorUInt3 = 20, + /// Homogenous vector of three Floats + VectorFloat3 = 21, + /// Homogenous vector of four Ints + VectorInt4 = 22, + /// Homogenous vector of four UInts + VectorUInt4 = 23, + /// Homogenous vector of four Floats + VectorFloat4 = 24, + /// An array of bytes. Stored with a variable width length. + Blob = 25, +} +use FlexBufferType::*; + +impl Default for FlexBufferType { + fn default() -> Self { + Null + } +} + +macro_rules! is_ty { + ($is_T: ident, $FTy: ident) => { + #[inline(always)] + pub fn $is_T(self) -> bool { + self == $FTy + } + }; +} + +impl FlexBufferType { + /// Returns true for flexbuffer types that are stored inline. + pub fn is_inline(self) -> bool { + match self { + Null | Int | UInt | Float | Bool => true, + _ => false, + } + } + /// Returns true for flexbuffer types that are stored by offset. + pub fn is_reference(self) -> bool { + !self.is_inline() + } + /// Returns true if called on a map, vector, typed vector, or fixed length typed vector. + pub fn is_vector(self) -> bool { + let d = self as u8; + 9 <= d && d < 25 || self == VectorBool + } + /// True iff the binary format stores the length. + /// This applies to Blob, String, Maps, and Vectors of variable length. + pub fn has_length_slot(self) -> bool { + !self.is_fixed_length_vector() && self.is_vector() || self == String || self == Blob + } + /// Returns true if called on a fixed length typed vector. + pub fn is_fixed_length_vector(self) -> bool { + self.fixed_length_vector_length().is_some() + } + /// If called on a fixed type vector, returns the type of the elements. + pub fn typed_vector_type(self) -> Option<FlexBufferType> { + match self { + VectorInt | VectorInt2 | VectorInt3 | VectorInt4 => Some(Int), + VectorUInt | VectorUInt2 | VectorUInt3 | VectorUInt4 => Some(UInt), + VectorFloat | VectorFloat2 | VectorFloat3 | VectorFloat4 => Some(Float), + VectorKey => Some(Key), + // Treat them as keys because we do not know width of length slot. + // see deprecation link. + VectorString => Some(Key), + VectorBool => Some(Bool), + _ => None, + } + } + /// Return the length of the fixed length vector or None. + pub fn fixed_length_vector_length(self) -> Option<usize> { + match self { + VectorInt2 | VectorUInt2 | VectorFloat2 => Some(2), + VectorInt3 | VectorUInt3 | VectorFloat3 => Some(3), + VectorInt4 | VectorUInt4 | VectorFloat4 => Some(4), + _ => None, + } + } + /// Returns true if self is a Map or Vector. Typed vectors are not heterogenous. + pub fn is_heterogenous(self) -> bool { + self == Map || self == Vector + } + /// If `self` is an indirect scalar, remap it to the scalar. Otherwise do nothing. + pub fn to_direct(self) -> Option<Self> { + match self { + IndirectInt => Some(Int), + IndirectUInt => Some(UInt), + IndirectFloat => Some(Float), + _ => None, + } + } + /// returns true if and only if the flexbuffer type is `Null`. + is_ty!(is_null, Null); + /// returns true if and only if the flexbuffer type is `Int`. + is_ty!(is_int, Int); + /// returns true if and only if the flexbuffer type is `UInt`. + is_ty!(is_uint, UInt); + /// returns true if and only if the flexbuffer type is `Float`. + is_ty!(is_float, Float); + /// returns true if and only if the flexbuffer type is `Bool`. + is_ty!(is_bool, Bool); + /// returns true if and only if the flexbuffer type is `Key`. + is_ty!(is_key, Key); + /// returns true if and only if the flexbuffer type is `String`. + is_ty!(is_string, String); + /// returns true if and only if the flexbuffer type is `IndirectInt`. + is_ty!(is_indirect_int, IndirectInt); + /// returns true if and only if the flexbuffer type is `IndirectUInt`. + is_ty!(is_indirect_uint, IndirectUInt); + /// returns true if and only if the flexbuffer type is `IndirectFloat`. + is_ty!(is_indirect_float, IndirectFloat); + /// returns true if and only if the flexbuffer type is `Map`. + is_ty!(is_map, Map); + /// returns true if and only if the flexbuffer type is `Vector`. + is_ty!(is_heterogenous_vector, Vector); + /// returns true if and only if the flexbuffer type is `VectorInt`. + is_ty!(is_vector_int, VectorInt); + /// returns true if and only if the flexbuffer type is `VectorUInt`. + is_ty!(is_vector_uint, VectorUInt); + /// returns true if and only if the flexbuffer type is `VectorFloat`. + is_ty!(is_vector_float, VectorFloat); + /// returns true if and only if the flexbuffer type is `VectorKey`. + is_ty!(is_vector_key, VectorKey); + /// returns true if and only if the flexbuffer type is `VectorString`. + is_ty!(is_vector_string, VectorString); + /// returns true if and only if the flexbuffer type is `VectorBool`. + is_ty!(is_vector_bool, VectorBool); + /// returns true if and only if the flexbuffer type is `VectorInt2`. + is_ty!(is_vector_int2, VectorInt2); + /// returns true if and only if the flexbuffer type is `VectorUInt2`. + is_ty!(is_vector_uint2, VectorUInt2); + /// returns true if and only if the flexbuffer type is `VectorFloat2`. + is_ty!(is_vector_float2, VectorFloat2); + /// returns true if and only if the flexbuffer type is `VectorInt3`. + is_ty!(is_vector_int3, VectorInt3); + /// returns true if and only if the flexbuffer type is `VectorUInt3`. + is_ty!(is_vector_uint3, VectorUInt3); + /// returns true if and only if the flexbuffer type is `VectorFloat3`. + is_ty!(is_vector_float3, VectorFloat3); + /// returns true if and only if the flexbuffer type is `VectorInt4`. + is_ty!(is_vector_int4, VectorInt4); + /// returns true if and only if the flexbuffer type is `VectorUInt4`. + is_ty!(is_vector_uint4, VectorUInt4); + /// returns true if and only if the flexbuffer type is `VectorFloat4`. + is_ty!(is_vector_float4, VectorFloat4); + /// returns true if and only if the flexbuffer type is `Blob`. + is_ty!(is_blob, Blob); +} diff --git a/rust/flexbuffers/src/lib.rs b/rust/flexbuffers/src/lib.rs new file mode 100644 index 00000000..8076b340 --- /dev/null +++ b/rust/flexbuffers/src/lib.rs @@ -0,0 +1,103 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![cfg_attr(test, feature(test))] +//! Flexbuffers is a high performance schemaless binary data format designed at Google. +//! It is complementary to the schema-ed format [Flatbuffers](http://docs.rs/flatbuffers/). +//! See [Flexbuffer Internals](https://google.github.io/flatbuffers/flatbuffers_internals.html) +//! for details on the binary format. +//! +//! * [See the examples for usage.](https://github.com/CasperN/flexbuffers/tree/master/examples) +//! +//! This rust implementation is in progress and, until the 1.0 release, breaking API changes may +/// happen between minor versions. +// TODO(cneo): serde stuff are behind a default-on feature flag +// Reader to Json is behind a default-off feature flag +// Serializable structs are Pushable +// Serde with maps - field names and type names. + +#[macro_use] +extern crate bitflags; +extern crate byteorder; +#[macro_use] +extern crate serde_derive; +#[macro_use] +extern crate debug_stub_derive; +extern crate num_enum; +#[cfg(test)] +extern crate quickcheck; +#[cfg(test)] +extern crate quickcheck_derive; +#[cfg(test)] +extern crate rand; +extern crate serde; +#[cfg(test)] +extern crate test; + +mod bitwidth; +mod builder; +mod flexbuffer_type; +mod reader; +pub use bitwidth::BitWidth; +pub use builder::Error as SerializationError; +pub use builder::{ + singleton, Builder, BuilderOptions, FlexbufferSerializer, MapBuilder, Pushable, VectorBuilder, +}; +pub use flexbuffer_type::FlexBufferType; +pub use reader::Error as ReaderError; +pub use reader::{DeserializationError, MapReader, Reader, ReaderIterator, VectorReader}; +use serde::{Deserialize, Serialize}; + +mod private { + pub trait Sealed {} +} + +/// Serialize as a flexbuffer into a vector. +pub fn to_vec<T: Serialize>(x: T) -> Result<Vec<u8>, SerializationError> { + let mut s = FlexbufferSerializer::new(); + x.serialize(&mut s)?; + Ok(s.take_buffer()) +} +/// Deserialize a type from a flexbuffer. +pub fn from_slice<'de, T: Deserialize<'de>>(buf: &'de [u8]) -> Result<T, DeserializationError> { + let r = Reader::get_root(buf)?; + T::deserialize(r) +} + +/// This struct, when pushed will be serialized as a `FlexBufferType::Blob`. +/// +/// A `Blob` is a variable width `length` followed by that many bytes of data. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct Blob<'a>(pub &'a [u8]); + +/// This struct, when pushed, will be serialized as a `FlexBufferType::IndirectUInt`. +/// +/// It is an unsigned integer stored by reference in the flexbuffer. This can reduce the +/// size of vectors and maps containing the `IndirectUInt`. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct IndirectUInt(pub u64); + +/// This struct, when pushed, will be serialized as a `FlexBufferType::IndirectInt`. +/// +/// It is a signed integer stored by reference in the flexbuffer. This can reduce the +/// size of vectors and maps containing the `IndirectInt`. +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct IndirectInt(pub i64); + +/// This struct, when pushed, will be serialized as a `FlexBufferType::IndirectFloat`. +/// +/// It is a floating point stored by reference in the flexbuffer. This can reduce the +/// size of vectors and maps containing the `IndirectFloat`. +#[derive(Debug, Copy, Clone, PartialEq)] +pub struct IndirectFloat(pub f64); diff --git a/rust/flexbuffers/src/reader/de.rs b/rust/flexbuffers/src/reader/de.rs new file mode 100644 index 00000000..ad35f25c --- /dev/null +++ b/rust/flexbuffers/src/reader/de.rs @@ -0,0 +1,250 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::Error; +use crate::{FlexBufferType, Reader, ReaderIterator}; +use serde::de::{ + DeserializeSeed, Deserializer, EnumAccess, IntoDeserializer, MapAccess, SeqAccess, + VariantAccess, Visitor, +}; + +/// Errors that may happen when deserializing a flexbuffer with serde. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum DeserializationError { + Reader(Error), + Serde(String), +} + +impl std::error::Error for DeserializationError {} +impl std::fmt::Display for DeserializationError { + fn fmt(&self, f: &mut std::fmt::Formatter) -> Result<(), std::fmt::Error> { + match self { + Self::Reader(r) => write!(f, "Flexbuffer Read Error: {:?}", r), + Self::Serde(s) => write!(f, "Serde Error: {}", s), + } + } +} +impl serde::de::Error for DeserializationError { + fn custom<T>(msg: T) -> Self + where + T: std::fmt::Display, + { + Self::Serde(format!("{}", msg)) + } +} +impl std::convert::From<super::Error> for DeserializationError { + fn from(e: super::Error) -> Self { + Self::Reader(e) + } +} + +impl<'de> SeqAccess<'de> for ReaderIterator<'de> { + type Error = DeserializationError; + fn next_element_seed<T>( + &mut self, + seed: T, + ) -> Result<Option<<T as DeserializeSeed<'de>>::Value>, Self::Error> + where + T: DeserializeSeed<'de>, + { + if let Some(elem) = self.next() { + seed.deserialize(elem).map(Some) + } else { + Ok(None) + } + } + fn size_hint(&self) -> Option<usize> { + Some(self.len()) + } +} + +struct EnumReader<'de> { + variant: &'de str, + value: Option<Reader<'de>>, +} + +impl<'de> EnumAccess<'de> for EnumReader<'de> { + type Error = DeserializationError; + type Variant = Reader<'de>; + fn variant_seed<V>(self, seed: V) -> Result<(V::Value, Self::Variant), Self::Error> + where + V: DeserializeSeed<'de>, + { + seed.deserialize(self.variant.into_deserializer()) + .map(|v| (v, self.value.unwrap_or_default())) + } +} + +struct MapAccessor<'de> { + keys: ReaderIterator<'de>, + vals: ReaderIterator<'de>, +} +impl<'de> MapAccess<'de> for MapAccessor<'de> { + type Error = DeserializationError; + + fn next_key_seed<K>(&mut self, seed: K) -> Result<Option<K::Value>, Self::Error> + where + K: DeserializeSeed<'de>, + { + if let Some(k) = self.keys.next() { + seed.deserialize(k).map(Some) + } else { + Ok(None) + } + } + fn next_value_seed<V>(&mut self, seed: V) -> Result<V::Value, Self::Error> + where + V: DeserializeSeed<'de>, + { + let val = self.vals.next().ok_or(Error::IndexOutOfBounds)?; + seed.deserialize(val) + } +} + +impl<'de> VariantAccess<'de> for Reader<'de> { + type Error = DeserializationError; + fn unit_variant(self) -> Result<(), Self::Error> { + Ok(()) + } + fn newtype_variant_seed<T>(self, seed: T) -> Result<T::Value, Self::Error> + where + T: DeserializeSeed<'de>, + { + seed.deserialize(self) + } + // Tuple variants have an internally tagged representation. They are vectors where Index 0 is + // the discriminant and index N is field N-1. + fn tuple_variant<V>(self, _len: usize, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + visitor.visit_seq(self.as_vector().iter()) + } + // Struct variants have an internally tagged representation. They are vectors where Index 0 is + // the discriminant and index N is field N-1. + fn struct_variant<V>( + self, + _fields: &'static [&'static str], + visitor: V, + ) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + let m = self.get_map()?; + visitor.visit_map(MapAccessor { + keys: m.keys_vector().iter(), + vals: m.iter_values(), + }) + } +} + +impl<'de> Deserializer<'de> for crate::Reader<'de> { + type Error = DeserializationError; + fn deserialize_any<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + use crate::BitWidth::*; + use crate::FlexBufferType::*; + match (self.flexbuffer_type(), self.bitwidth()) { + (Bool, _) => visitor.visit_bool(self.as_bool()), + (UInt, W8) => visitor.visit_u8(self.as_u8()), + (UInt, W16) => visitor.visit_u16(self.as_u16()), + (UInt, W32) => visitor.visit_u32(self.as_u32()), + (UInt, W64) => visitor.visit_u64(self.as_u64()), + (Int, W8) => visitor.visit_i8(self.as_i8()), + (Int, W16) => visitor.visit_i16(self.as_i16()), + (Int, W32) => visitor.visit_i32(self.as_i32()), + (Int, W64) => visitor.visit_i64(self.as_i64()), + (Float, W32) => visitor.visit_f32(self.as_f32()), + (Float, W64) => visitor.visit_f64(self.as_f64()), + (Float, _) => Err(Error::InvalidPackedType.into()), // f8 and f16 are not supported. + (Null, _) => visitor.visit_unit(), + (String, _) | (Key, _) => visitor.visit_borrowed_str(self.as_str()), + (Blob, _) => visitor.visit_borrowed_bytes(self.get_blob()?.0), + (Map, _) => { + let m = self.get_map()?; + visitor.visit_map(MapAccessor { + keys: m.keys_vector().iter(), + vals: m.iter_values(), + }) + } + (ty, _) if ty.is_vector() => visitor.visit_seq(self.as_vector().iter()), + (ty, bw) => unreachable!("TODO deserialize_any {:?} {:?}.", ty, bw), + } + } + serde::forward_to_deserialize_any! { + bool i8 i16 i32 i64 u8 u16 u32 u64 f32 f64 str unit unit_struct bytes + ignored_any map identifier struct tuple tuple_struct seq string + } + fn deserialize_char<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + visitor.visit_char(self.as_u8() as char) + } + fn deserialize_byte_buf<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + visitor.visit_byte_buf(self.get_blob()?.0.to_vec()) + } + fn deserialize_option<V>(self, visitor: V) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + if self.flexbuffer_type() == FlexBufferType::Null { + visitor.visit_none() + } else { + visitor.visit_some(self) + } + } + fn deserialize_newtype_struct<V>( + self, + _name: &'static str, + visitor: V, + ) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + visitor.visit_newtype_struct(self) + } + fn deserialize_enum<V>( + self, + _name: &'static str, + _variants: &'static [&'static str], + visitor: V, + ) -> Result<V::Value, Self::Error> + where + V: Visitor<'de>, + { + let (variant, value) = match self.fxb_type { + FlexBufferType::String => (self.as_str(), None), + FlexBufferType::Map => { + let m = self.get_map()?; + let variant = m.keys_vector().idx(0).get_key()?; + let value = Some(m.idx(0)); + (variant, value) + } + _ => { + return Err(Error::UnexpectedFlexbufferType { + expected: FlexBufferType::Map, + actual: self.fxb_type, + } + .into()); + } + }; + visitor.visit_enum(EnumReader { variant, value }) + } +} diff --git a/rust/flexbuffers/src/reader/iter.rs b/rust/flexbuffers/src/reader/iter.rs new file mode 100644 index 00000000..8e06171c --- /dev/null +++ b/rust/flexbuffers/src/reader/iter.rs @@ -0,0 +1,63 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{Reader, VectorReader}; +use std::iter::{DoubleEndedIterator, ExactSizeIterator, FusedIterator, Iterator}; + +/// Iterates over a flexbuffer vector, typed vector, or map. Yields [Readers](struct.Reader.html). +/// +/// If any error occurs, the Reader is defaulted to a Null flexbuffer Reader. +pub struct ReaderIterator<'de> { + pub(super) reader: VectorReader<'de>, + pub(super) front: usize, + end: usize, +} +impl<'de> ReaderIterator<'de> { + pub(super) fn new(reader: VectorReader<'de>) -> Self { + let end = reader.len(); + ReaderIterator { + reader, + front: 0, + end, + } + } +} +impl<'de> Iterator for ReaderIterator<'de> { + type Item = Reader<'de>; + fn next(&mut self) -> Option<Self::Item> { + if self.front < self.end { + let r = self.reader.idx(self.front); + self.front += 1; + Some(r) + } else { + None + } + } + fn size_hint(&self) -> (usize, Option<usize>) { + let remaining = self.end - self.front; + (remaining, Some(remaining)) + } +} +impl<'de> DoubleEndedIterator for ReaderIterator<'de> { + fn next_back(&mut self) -> Option<Self::Item> { + if self.front < self.end { + self.end -= 1; + Some(self.reader.idx(self.end)) + } else { + None + } + } +} +impl<'de> ExactSizeIterator for ReaderIterator<'de> {} +impl<'de> FusedIterator for ReaderIterator<'de> {} diff --git a/rust/flexbuffers/src/reader/map.rs b/rust/flexbuffers/src/reader/map.rs new file mode 100644 index 00000000..6769ab3e --- /dev/null +++ b/rust/flexbuffers/src/reader/map.rs @@ -0,0 +1,144 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{deref_offset, unpack_type, Error, Reader, ReaderIterator, VectorReader}; +use crate::BitWidth; +use std::cmp::Ordering; +use std::iter::{DoubleEndedIterator, ExactSizeIterator, FusedIterator, Iterator}; + +/// Allows indexing on a flexbuffer map. +/// +/// MapReaders may be indexed with strings or usizes. `index` returns a result type, +/// which may indicate failure due to a missing key or bad data, `idx` returns an Null Reader in +/// cases of error. +#[derive(DebugStub, Default, Clone)] +pub struct MapReader<'de> { + #[debug_stub = "&[..]"] + pub(super) buffer: &'de [u8], + pub(super) values_address: usize, + pub(super) keys_address: usize, + pub(super) values_width: BitWidth, + pub(super) keys_width: BitWidth, + pub(super) length: usize, +} + +impl<'de> MapReader<'de> { + /// Returns the number of key/value pairs are in the map. + pub fn len(&self) -> usize { + self.length + } + /// Returns true if the map has zero key/value pairs. + pub fn is_empty(&self) -> bool { + self.length == 0 + } + // Using &CStr will eagerly compute the length of the key. &str needs length info AND utf8 + // validation. This version is faster than both. + fn lazy_strcmp(&self, key_addr: usize, key: &str) -> Ordering { + // TODO: Can we know this won't OOB and panic? + let k = self.buffer[key_addr..].iter().take_while(|&&b| b != b'\0'); + k.cmp(key.as_bytes().iter()) + } + /// Returns the index of a given key in the map. + pub fn index_key(&self, key: &str) -> Option<usize> { + let (mut low, mut high) = (0, self.length); + while low < high { + let i = (low + high) / 2; + let key_offset_address = self.keys_address + i * self.keys_width.n_bytes(); + let key_address = + deref_offset(self.buffer, key_offset_address, self.keys_width).ok()?; + match self.lazy_strcmp(key_address, key) { + Ordering::Equal => return Some(i), + Ordering::Less => low = if i == low { i + 1 } else { i }, + Ordering::Greater => high = i, + } + } + None + } + /// Index into a map with a key or usize. + pub fn index<I: MapReaderIndexer>(&self, i: I) -> Result<Reader<'de>, Error> { + i.index_map_reader(self) + } + /// Index into a map with a key or usize. If any errors occur a Null reader is returned. + pub fn idx<I: MapReaderIndexer>(&self, i: I) -> Reader<'de> { + i.index_map_reader(self).unwrap_or_default() + } + fn usize_index(&self, i: usize) -> Result<Reader<'de>, Error> { + if i >= self.length { + return Err(Error::IndexOutOfBounds); + } + let data_address = self.values_address + self.values_width.n_bytes() * i; + let type_address = self.values_address + self.values_width.n_bytes() * self.length + i; + let (fxb_type, width) = self + .buffer + .get(type_address) + .ok_or(Error::FlexbufferOutOfBounds) + .and_then(|&b| unpack_type(b))?; + Reader::new( + &self.buffer, + data_address, + fxb_type, + width, + self.values_width, + ) + } + fn key_index(&self, k: &str) -> Result<Reader<'de>, Error> { + let i = self.index_key(k).ok_or(Error::KeyNotFound)?; + self.usize_index(i) + } + /// Iterate over the values of the map. + pub fn iter_values(&self) -> ReaderIterator<'de> { + ReaderIterator::new(VectorReader { + reader: Reader { + buffer: self.buffer, + fxb_type: crate::FlexBufferType::Map, + width: self.values_width, + address: self.values_address, + }, + length: self.length, + }) + } + /// Iterate over the keys of the map. + pub fn iter_keys( + &self, + ) -> impl Iterator<Item = &'de str> + DoubleEndedIterator + ExactSizeIterator + FusedIterator + { + self.keys_vector().iter().map(|k| k.as_str()) + } + pub fn keys_vector(&self) -> VectorReader<'de> { + VectorReader { + reader: Reader { + buffer: self.buffer, + fxb_type: crate::FlexBufferType::VectorKey, + width: self.keys_width, + address: self.keys_address, + }, + length: self.length, + } + } +} +pub trait MapReaderIndexer { + fn index_map_reader<'de>(self, r: &MapReader<'de>) -> Result<Reader<'de>, Error>; +} +impl MapReaderIndexer for usize { + #[inline] + fn index_map_reader<'de>(self, r: &MapReader<'de>) -> Result<Reader<'de>, Error> { + r.usize_index(self) + } +} +impl MapReaderIndexer for &str { + #[inline] + fn index_map_reader<'de>(self, r: &MapReader<'de>) -> Result<Reader<'de>, Error> { + r.key_index(self) + } +} diff --git a/rust/flexbuffers/src/reader/mod.rs b/rust/flexbuffers/src/reader/mod.rs new file mode 100644 index 00000000..6db5b495 --- /dev/null +++ b/rust/flexbuffers/src/reader/mod.rs @@ -0,0 +1,592 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use crate::bitwidth::BitWidth; +use crate::flexbuffer_type::FlexBufferType; +use crate::Blob; +use std::convert::{TryFrom, TryInto}; +use std::fmt; +use std::ops::Rem; +use std::str::FromStr; +mod de; +mod iter; +mod map; +mod vector; +pub use de::DeserializationError; +pub use iter::ReaderIterator; +pub use map::{MapReader, MapReaderIndexer}; +pub use vector::VectorReader; + +/// All the possible errors when reading a flexbuffer. +#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] +pub enum Error { + /// One of the following data errors occured: + /// + /// * The read flexbuffer had an offset that pointed outside the flexbuffer. + /// * The 'negative indicies' where length and map keys are stored were out of bounds + /// * The buffer was too small to contain a flexbuffer root. + FlexbufferOutOfBounds, + /// Failed to parse a valid FlexbufferType and Bitwidth from a type byte. + InvalidPackedType, + /// Flexbuffer type of the read data does not match function used. + UnexpectedFlexbufferType { + expected: FlexBufferType, + actual: FlexBufferType, + }, + /// BitWidth type of the read data does not match function used. + UnexpectedBitWidth { + expected: BitWidth, + actual: BitWidth, + }, + /// Read a flexbuffer offset or length that overflowed usize. + ReadUsizeOverflowed, + /// Tried to index a type that's not one of the Flexbuffer vector types. + CannotIndexAsVector, + /// Tried to index a Flexbuffer vector or map out of bounds. + IndexOutOfBounds, + /// A Map was indexed with a key that it did not contain. + KeyNotFound, + /// Failed to parse a Utf8 string. + /// The Option will be `None` if and only if this Error was deserialized. + // NOTE: std::str::Utf8Error does not implement Serialize, Deserialize, nor Default. We tell + // serde to skip the field and default to None. We prefer to have the boxed error so it can be + // used with std::error::Error::source, though another (worse) option could be to drop that + // information. + Utf8Error(#[serde(skip)] Option<Box<std::str::Utf8Error>>), + /// get_slice failed because the given data buffer is misaligned. + AlignmentError, + InvalidRootWidth, + InvalidMapKeysVectorWidth, +} +impl std::convert::From<std::str::Utf8Error> for Error { + fn from(e: std::str::Utf8Error) -> Self { + Self::Utf8Error(Some(Box::new(e))) + } +} +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::UnexpectedBitWidth { expected, actual } => write!( + f, + "Error reading flexbuffer: Expected bitwidth: {:?}, found bitwidth: {:?}", + expected, actual + ), + Self::UnexpectedFlexbufferType { expected, actual } => write!( + f, + "Error reading flexbuffer: Expected type: {:?}, found type: {:?}", + expected, actual + ), + _ => write!(f, "Error reading flexbuffer: {:?}", self), + } + } +} +impl std::error::Error for Error { + fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { + if let Self::Utf8Error(Some(e)) = self { + Some(e) + } else { + None + } + } +} + +pub trait ReadLE: crate::private::Sealed + std::marker::Sized { + const VECTOR_TYPE: FlexBufferType; + const WIDTH: BitWidth; +} +macro_rules! rle { + ($T: ty, $VECTOR_TYPE: ident, $WIDTH: ident) => { + impl ReadLE for $T { + const VECTOR_TYPE: FlexBufferType = FlexBufferType::$VECTOR_TYPE; + const WIDTH: BitWidth = BitWidth::$WIDTH; + } + }; +} +rle!(u8, VectorUInt, W8); +rle!(u16, VectorUInt, W16); +rle!(u32, VectorUInt, W32); +rle!(u64, VectorUInt, W64); +rle!(i8, VectorInt, W8); +rle!(i16, VectorInt, W16); +rle!(i32, VectorInt, W32); +rle!(i64, VectorInt, W64); +rle!(f32, VectorFloat, W32); +rle!(f64, VectorFloat, W64); + +macro_rules! as_default { + ($as: ident, $get: ident, $T: ty) => { + pub fn $as(&self) -> $T { + self.$get().unwrap_or_default() + } + }; +} + +/// `Reader`s allow access to data stored in a Flexbuffer. +/// +/// Each reader represents a single address in the buffer so data is read lazily. Start a reader +/// by calling `get_root` on your flexbuffer `&[u8]`. +/// +/// - The `get_T` methods return a `Result<T, Error>`. They return an OK value if and only if the +/// flexbuffer type matches `T`. This is analogous to the behavior of Rust's json library, though +/// with Result instead of Option. +/// - The `as_T` methods will try their best to return to a value of type `T` +/// (by casting or even parsing a string if necessary) but ultimately returns `T::default` if it +/// fails. This behavior is analogous to that of flexbuffers C++. +#[derive(DebugStub, Default, Clone)] +pub struct Reader<'de> { + fxb_type: FlexBufferType, + width: BitWidth, + address: usize, + #[debug_stub = "&[..]"] + buffer: &'de [u8], +} + +macro_rules! try_cast_fn { + ($name: ident, $full_width: ident, $Ty: ident) => { + pub fn $name(&self) -> $Ty { + self.$full_width().try_into().unwrap_or_default() + } + } +} + +fn safe_sub(a: usize, b: usize) -> Result<usize, Error> { + a.checked_sub(b).ok_or(Error::FlexbufferOutOfBounds) +} + +fn deref_offset(buffer: &[u8], address: usize, width: BitWidth) -> Result<usize, Error> { + let off = read_usize(buffer, address, width); + safe_sub(address, off) +} + +impl<'de> Reader<'de> { + fn new( + buffer: &'de [u8], + mut address: usize, + mut fxb_type: FlexBufferType, + width: BitWidth, + parent_width: BitWidth, + ) -> Result<Self, Error> { + if fxb_type.is_reference() { + address = deref_offset(buffer, address, parent_width)?; + // Indirects were dereferenced. + if let Some(t) = fxb_type.to_direct() { + fxb_type = t; + } + } + Ok(Reader { + address, + fxb_type, + width, + buffer, + }) + } + /// Parses the flexbuffer from the given buffer. Assumes the flexbuffer root is the last byte + /// of the buffer. + pub fn get_root(buffer: &'de [u8]) -> Result<Self, Error> { + let end = buffer.len(); + if end < 3 { + return Err(Error::FlexbufferOutOfBounds); + } + // Last byte is the root width. + let root_width = BitWidth::from_nbytes(buffer[end - 1]).ok_or(Error::InvalidRootWidth)?; + // Second last byte is root type. + let (fxb_type, width) = unpack_type(buffer[end - 2])?; + // Location of root data. (BitWidth bits before root type) + let address = safe_sub(end - 2, root_width.n_bytes())?; + Self::new(buffer, address, fxb_type, width, root_width) + } + /// Returns the FlexBufferType of this Reader. + pub fn flexbuffer_type(&self) -> FlexBufferType { + self.fxb_type + } + /// Returns the bitwidth of this Reader. + pub fn bitwidth(&self) -> BitWidth { + self.width + } + /// Returns the length of the Flexbuffer. If the type has no length, or if an error occurs, + /// 0 is returned. + pub fn length(&self) -> usize { + if let Some(len) = self.fxb_type.fixed_length_vector_length() { + len + } else if self.fxb_type.has_length_slot() && self.address >= self.width.n_bytes() { + read_usize(self.buffer, self.address - self.width.n_bytes(), self.width) + } else { + 0 + } + } + /// Returns true if the flexbuffer is aligned to 8 bytes. This guarantees, for valid + /// flexbuffers, that the data is correctly aligned in memory and slices can be read directly + /// e.g. with `get_f64s` or `get_i16s`. + pub fn is_aligned(&self) -> bool { + (self.buffer.as_ptr() as usize).rem(8) == 0 + } + as_default!(as_vector, get_vector, VectorReader<'de>); + as_default!(as_map, get_map, MapReader<'de>); + + fn expect_type(&self, ty: FlexBufferType) -> Result<(), Error> { + if self.fxb_type == ty { + Ok(()) + } else { + Err(Error::UnexpectedFlexbufferType { + expected: ty, + actual: self.fxb_type, + }) + } + } + fn expect_bw(&self, bw: BitWidth) -> Result<(), Error> { + if self.width == bw { + Ok(()) + } else { + Err(Error::UnexpectedBitWidth { + expected: bw, + actual: self.width, + }) + } + } + /// Directly reads a slice of type `T`where `T` is one of `u8,u16,u32,u64,i8,i16,i32,i64,f32,f64`. + /// Returns Err if the type, bitwidth, or memory alignment does not match. Since the bitwidth is + /// dynamic, its better to use a VectorReader unless you know your data and performance is critical. + #[cfg(target_endian = "little")] + pub fn get_slice<T: ReadLE>(&self) -> Result<&'de [T], Error> { + if self.flexbuffer_type().typed_vector_type() != T::VECTOR_TYPE.typed_vector_type() { + self.expect_type(T::VECTOR_TYPE)?; + } + if self.bitwidth().n_bytes() != std::mem::size_of::<T>() { + self.expect_bw(T::WIDTH)?; + } + let end = self.address + self.length() * std::mem::size_of::<T>(); + let slice = &self + .buffer + .get(self.address..end) + .ok_or(Error::FlexbufferOutOfBounds)?; + // `align_to` is required because the point of this function is to directly hand back a + // slice of scalars. This can fail because Rust's default allocator is not 16byte aligned + // (though in practice this only happens for small buffers). + let (pre, mid, suf) = unsafe { slice.align_to::<T>() }; + if pre.is_empty() && suf.is_empty() { + Ok(mid) + } else { + Err(Error::AlignmentError) + } + } + + pub fn get_bool(&self) -> Result<bool, Error> { + self.expect_type(FlexBufferType::Bool)?; + Ok( + self.buffer[self.address..self.address + self.width.n_bytes()] + .iter() + .any(|&b| b != 0), + ) + } + pub fn get_key(&self) -> Result<&'de str, Error> { + self.expect_type(FlexBufferType::Key)?; + let (length, _) = self.buffer[self.address..] + .iter() + .enumerate() + .find(|(_, &b)| b == b'\0') + .unwrap_or((0, &0)); + let bytes = &self.buffer[self.address..self.address + length]; + Ok(std::str::from_utf8(bytes)?) + } + pub fn get_blob(&self) -> Result<Blob<'de>, Error> { + self.expect_type(FlexBufferType::Blob)?; + Ok(Blob( + &self.buffer[self.address..self.address + self.length()], + )) + } + pub fn as_blob(&self) -> Blob<'de> { + self.get_blob().unwrap_or(Blob(&[])) + } + pub fn get_str(&self) -> Result<&'de str, Error> { + self.expect_type(FlexBufferType::String)?; + let bytes = &self.buffer[self.address..self.address + self.length()]; + Ok(std::str::from_utf8(bytes)?) + } + fn get_map_info(&self) -> Result<(usize, BitWidth), Error> { + self.expect_type(FlexBufferType::Map)?; + if 3 * self.width.n_bytes() >= self.address { + return Err(Error::FlexbufferOutOfBounds); + } + let keys_offset_address = self.address - 3 * self.width.n_bytes(); + let keys_width = { + let kw_addr = self.address - 2 * self.width.n_bytes(); + let kw = read_usize(self.buffer, kw_addr, self.width); + BitWidth::from_nbytes(kw).ok_or(Error::InvalidMapKeysVectorWidth) + }?; + Ok((keys_offset_address, keys_width)) + } + pub fn get_map(&self) -> Result<MapReader<'de>, Error> { + let (keys_offset_address, keys_width) = self.get_map_info()?; + let keys_address = deref_offset(self.buffer, keys_offset_address, self.width)?; + // TODO(cneo): Check that vectors length equals keys length. + Ok(MapReader { + buffer: self.buffer, + values_address: self.address, + values_width: self.width, + keys_address, + keys_width, + length: self.length(), + }) + } + /// Tries to read a FlexBufferType::UInt. Returns Err if the type is not a UInt or if the + /// address is out of bounds. + pub fn get_u64(&self) -> Result<u64, Error> { + self.expect_type(FlexBufferType::UInt)?; + let cursor = self + .buffer + .get(self.address..self.address + self.width.n_bytes()); + match self.width { + BitWidth::W8 => cursor.map(|s| s[0] as u8).map(Into::into), + BitWidth::W16 => cursor + .and_then(|s| s.try_into().ok()) + .map(<u16>::from_le_bytes) + .map(Into::into), + BitWidth::W32 => cursor + .and_then(|s| s.try_into().ok()) + .map(<u32>::from_le_bytes) + .map(Into::into), + BitWidth::W64 => cursor + .and_then(|s| s.try_into().ok()) + .map(<u64>::from_le_bytes), + } + .ok_or(Error::FlexbufferOutOfBounds) + } + /// Tries to read a FlexBufferType::Int. Returns Err if the type is not a UInt or if the + /// address is out of bounds. + pub fn get_i64(&self) -> Result<i64, Error> { + self.expect_type(FlexBufferType::Int)?; + let cursor = self + .buffer + .get(self.address..self.address + self.width.n_bytes()); + match self.width { + BitWidth::W8 => cursor.map(|s| s[0] as i8).map(Into::into), + BitWidth::W16 => cursor + .and_then(|s| s.try_into().ok()) + .map(<i16>::from_le_bytes) + .map(Into::into), + BitWidth::W32 => cursor + .and_then(|s| s.try_into().ok()) + .map(<i32>::from_le_bytes) + .map(Into::into), + BitWidth::W64 => cursor + .and_then(|s| s.try_into().ok()) + .map(<i64>::from_le_bytes), + } + .ok_or(Error::FlexbufferOutOfBounds) + } + /// Tries to read a FlexBufferType::Float. Returns Err if the type is not a UInt, if the + /// address is out of bounds, or if its a f16 or f8 (not currently supported). + pub fn get_f64(&self) -> Result<f64, Error> { + self.expect_type(FlexBufferType::Float)?; + let cursor = self + .buffer + .get(self.address..self.address + self.width.n_bytes()); + match self.width { + BitWidth::W8 | BitWidth::W16 => return Err(Error::InvalidPackedType), + BitWidth::W32 => cursor + .and_then(|s| s.try_into().ok()) + .map(f32_from_le_bytes) + .map(Into::into), + BitWidth::W64 => cursor + .and_then(|s| s.try_into().ok()) + .map(f64_from_le_bytes), + } + .ok_or(Error::FlexbufferOutOfBounds) + } + pub fn as_bool(&self) -> bool { + use FlexBufferType::*; + match self.fxb_type { + Bool => self.get_bool().unwrap_or_default(), + UInt => self.as_u64() != 0, + Int => self.as_i64() != 0, + Float => self.as_f64().abs() > std::f64::EPSILON, + String | Key => !self.as_str().is_empty(), + Null => false, + Blob => self.length() != 0, + ty if ty.is_vector() => self.length() != 0, + _ => unreachable!(), + } + } + /// Returns a u64, casting if necessary. For Maps and Vectors, their length is + /// returned. If anything fails, 0 is returned. + pub fn as_u64(&self) -> u64 { + match self.fxb_type { + FlexBufferType::UInt => self.get_u64().unwrap_or_default(), + FlexBufferType::Int => self + .get_i64() + .unwrap_or_default() + .try_into() + .unwrap_or_default(), + FlexBufferType::Float => self.get_f64().unwrap_or_default() as u64, + FlexBufferType::String => { + if let Ok(s) = self.get_str() { + if let Ok(f) = u64::from_str(s) { + return f; + } + } + 0 + } + _ if self.fxb_type.is_vector() => self.length() as u64, + _ => 0, + } + } + try_cast_fn!(as_u32, as_u64, u32); + try_cast_fn!(as_u16, as_u64, u16); + try_cast_fn!(as_u8, as_u64, u8); + + /// Returns an i64, casting if necessary. For Maps and Vectors, their length is + /// returned. If anything fails, 0 is returned. + pub fn as_i64(&self) -> i64 { + match self.fxb_type { + FlexBufferType::Int => self.get_i64().unwrap_or_default(), + FlexBufferType::UInt => self + .get_u64() + .unwrap_or_default() + .try_into() + .unwrap_or_default(), + FlexBufferType::Float => self.get_f64().unwrap_or_default() as i64, + FlexBufferType::String => { + if let Ok(s) = self.get_str() { + if let Ok(f) = i64::from_str(s) { + return f; + } + } + 0 + } + _ if self.fxb_type.is_vector() => self.length() as i64, + _ => 0, + } + } + try_cast_fn!(as_i32, as_i64, i32); + try_cast_fn!(as_i16, as_i64, i16); + try_cast_fn!(as_i8, as_i64, i8); + + /// Returns an f64, casting if necessary. For Maps and Vectors, their length is + /// returned. If anything fails, 0 is returned. + pub fn as_f64(&self) -> f64 { + match self.fxb_type { + FlexBufferType::Int => self.get_i64().unwrap_or_default() as f64, + FlexBufferType::UInt => self.get_u64().unwrap_or_default() as f64, + FlexBufferType::Float => self.get_f64().unwrap_or_default(), + FlexBufferType::String => { + if let Ok(s) = self.get_str() { + if let Ok(f) = f64::from_str(s) { + return f; + } + } + 0.0 + } + _ if self.fxb_type.is_vector() => self.length() as f64, + _ => 0.0, + } + } + pub fn as_f32(&self) -> f32 { + self.as_f64() as f32 + } + + /// Returns empty string if you're not trying to read a string. + pub fn as_str(&self) -> &'de str { + match self.fxb_type { + FlexBufferType::String => self.get_str().unwrap_or_default(), + FlexBufferType::Key => self.get_key().unwrap_or_default(), + _ => "", + } + } + pub fn get_vector(&self) -> Result<VectorReader<'de>, Error> { + if !self.fxb_type.is_vector() { + self.expect_type(FlexBufferType::Vector)?; + }; + Ok(VectorReader { + reader: self.clone(), + length: self.length(), + }) + } +} + +impl<'de> fmt::Display for Reader<'de> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use FlexBufferType::*; + match self.flexbuffer_type() { + Null => write!(f, "null"), + UInt => write!(f, "{}", self.as_u64()), + Int => write!(f, "{}", self.as_i64()), + Float => write!(f, "{}", self.as_f64()), + Key | String => write!(f, "{:?}", self.as_str()), + Bool => write!(f, "{}", self.as_bool()), + Blob => write!(f, "blob"), + Map => { + write!(f, "{{")?; + let m = self.as_map(); + let mut pairs = m.iter_keys().zip(m.iter_values()); + if let Some((k, v)) = pairs.next() { + write!(f, "{:?}: {}", k, v)?; + for (k, v) in pairs { + write!(f, ", {:?}: {}", k, v)?; + } + } + write!(f, "}}") + } + t if t.is_vector() => { + write!(f, "[")?; + let mut elems = self.as_vector().iter(); + if let Some(first) = elems.next() { + write!(f, "{}", first)?; + for e in elems { + write!(f, ", {}", e)?; + } + } + write!(f, "]") + } + _ => unreachable!("Display not implemented for {:?}", self), + } + } +} + +// TODO(cneo): Use <f..>::from_le_bytes when we move past rustc 1.39. +fn f32_from_le_bytes(bytes: [u8; 4]) -> f32 { + let bits = <u32>::from_le_bytes(bytes); + <f32>::from_bits(bits) +} +fn f64_from_le_bytes(bytes: [u8; 8]) -> f64 { + let bits = <u64>::from_le_bytes(bytes); + <f64>::from_bits(bits) +} + +fn read_usize(buffer: &[u8], address: usize, width: BitWidth) -> usize { + let cursor = &buffer[address..]; + match width { + BitWidth::W8 => cursor[0] as usize, + BitWidth::W16 => cursor + .get(0..2) + .and_then(|s| s.try_into().ok()) + .map(<u16>::from_le_bytes) + .unwrap_or_default() as usize, + BitWidth::W32 => cursor + .get(0..4) + .and_then(|s| s.try_into().ok()) + .map(<u32>::from_le_bytes) + .unwrap_or_default() as usize, + BitWidth::W64 => cursor + .get(0..8) + .and_then(|s| s.try_into().ok()) + .map(<u64>::from_le_bytes) + .unwrap_or_default() as usize, + } +} + +fn unpack_type(ty: u8) -> Result<(FlexBufferType, BitWidth), Error> { + let w = BitWidth::try_from(ty & 3u8).map_err(|_| Error::InvalidPackedType)?; + let t = FlexBufferType::try_from(ty >> 2).map_err(|_| Error::InvalidPackedType)?; + Ok((t, w)) +} diff --git a/rust/flexbuffers/src/reader/vector.rs b/rust/flexbuffers/src/reader/vector.rs new file mode 100644 index 00000000..8ba8fe50 --- /dev/null +++ b/rust/flexbuffers/src/reader/vector.rs @@ -0,0 +1,74 @@ +// Copyright 2019 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use super::{unpack_type, Error, Reader, ReaderIterator}; +use crate::{BitWidth, FlexBufferType}; + +#[derive(Default, Clone)] +/// Allows indexing on any flexbuffer vector type, (heterogenous vector, typed vector, or fixed +/// length typed vector). +/// +/// VectorReaders may be indexed with usize, `index` returns a result type +/// which may indicate failure due to indexing out of bounds or bad data. `idx` returns a +/// Null Reader in the event of any failure. +pub struct VectorReader<'de> { + pub(super) reader: Reader<'de>, + // Cache the length because read_usize can be slow. + pub(super) length: usize, +} + +impl<'de> VectorReader<'de> { + /// Returns the number of elements in the vector. + pub fn len(&self) -> usize { + self.length + } + /// Returns true if there are 0 elements in the vector. + pub fn is_empty(&self) -> bool { + self.length == 0 + } + fn get_elem_type(&self, i: usize) -> Result<(FlexBufferType, BitWidth), Error> { + if let Some(ty) = self.reader.fxb_type.typed_vector_type() { + Ok((ty, self.reader.width)) + } else { + let types_addr = self.reader.address + self.length * self.reader.width.n_bytes(); + self.reader + .buffer + .get(types_addr + i) + .ok_or(Error::FlexbufferOutOfBounds) + .and_then(|&t| unpack_type(t)) + } + } + /// Index into a flexbuffer vector. Any errors are defaulted to Null Readers. + pub fn idx(&self, i: usize) -> Reader<'de> { + self.index(i).unwrap_or_default() + } + /// Index into a flexbuffer. + pub fn index(&self, i: usize) -> Result<Reader<'de>, Error> { + if i >= self.length { + return Err(Error::IndexOutOfBounds); + } + let (fxb_type, bw) = self.get_elem_type(i)?; + let data_address = self.reader.address + self.reader.width.n_bytes() * i; + Reader::new( + self.reader.buffer, + data_address, + fxb_type, + bw, + self.reader.width, + ) + } + pub fn iter(&self) -> ReaderIterator<'de> { + ReaderIterator::new(self.clone()) + } +} |