summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDongHun Kwak <dh0128.kwak@samsung.com>2023-03-21 13:05:27 +0900
committerDongHun Kwak <dh0128.kwak@samsung.com>2023-03-21 13:05:27 +0900
commite1c8b5a4017d4509bc070c1766907e552890d10e (patch)
tree812d36f82e01f9640a29d57ba4e426bacd885a42
downloadrust-ciborium-ll-e1c8b5a4017d4509bc070c1766907e552890d10e.tar.gz
rust-ciborium-ll-e1c8b5a4017d4509bc070c1766907e552890d10e.tar.bz2
rust-ciborium-ll-e1c8b5a4017d4509bc070c1766907e552890d10e.zip
Import ciborium-ll 0.2.0upstream/0.2.0upstream
-rw-r--r--.cargo_vcs_info.json5
-rw-r--r--Cargo.toml46
-rw-r--r--Cargo.toml.orig31
-rw-r--r--README.md131
-rw-r--r--src/dec.rs174
-rw-r--r--src/enc.rs127
-rw-r--r--src/hdr.rs163
-rw-r--r--src/lib.rs487
-rw-r--r--src/seg.rs213
9 files changed, 1377 insertions, 0 deletions
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
new file mode 100644
index 0000000..e3f78ef
--- /dev/null
+++ b/.cargo_vcs_info.json
@@ -0,0 +1,5 @@
+{
+ "git": {
+ "sha1": "e8512abee2f126ae60923be4362c175703550894"
+ }
+}
diff --git a/Cargo.toml b/Cargo.toml
new file mode 100644
index 0000000..f70a861
--- /dev/null
+++ b/Cargo.toml
@@ -0,0 +1,46 @@
+# THIS FILE IS AUTOMATICALLY GENERATED BY CARGO
+#
+# When uploading crates to the registry Cargo will automatically
+# "normalize" Cargo.toml files for maximal compatibility
+# with all versions of Cargo and also rewrite `path` dependencies
+# to registry (e.g., crates.io) dependencies.
+#
+# If you are reading this file be aware that the original Cargo.toml
+# will likely look very different (and much more reasonable).
+# See Cargo.toml.orig for the original contents.
+
+[package]
+edition = "2021"
+name = "ciborium-ll"
+version = "0.2.0"
+authors = ["Nathaniel McCallum <npmccallum@profian.com>"]
+description = "Low-level CBOR codec primitives"
+homepage = "https://github.com/enarx/ciborium"
+readme = "README.md"
+keywords = ["cbor"]
+categories = ["data-structures", "embedded", "encoding", "no-std", "parsing"]
+license = "Apache-2.0"
+repository = "https://github.com/enarx/ciborium"
+[dependencies.ciborium-io]
+version = "0.2.0"
+
+[dependencies.half]
+version = "1.6"
+[dev-dependencies.hex]
+version = "0.4"
+
+[features]
+alloc = []
+std = ["alloc"]
+[badges.github]
+repository = "enarx/ciborium"
+workflow = "test"
+
+[badges.is-it-maintained-issue-resolution]
+repository = "enarx/ciborium"
+
+[badges.is-it-maintained-open-issues]
+repository = "enarx/ciborium"
+
+[badges.maintenance]
+status = "actively-developed"
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
new file mode 100644
index 0000000..97dc646
--- /dev/null
+++ b/Cargo.toml.orig
@@ -0,0 +1,31 @@
+[package]
+name = "ciborium-ll"
+version = "0.2.0"
+authors = ["Nathaniel McCallum <npmccallum@profian.com>"]
+license = "Apache-2.0"
+edition = "2021"
+homepage = "https://github.com/enarx/ciborium"
+repository = "https://github.com/enarx/ciborium"
+description = "Low-level CBOR codec primitives"
+readme = "README.md"
+keywords = ["cbor"]
+categories = ["data-structures", "embedded", "encoding", "no-std", "parsing"]
+
+[badges]
+# See https://doc.rust-lang.org/cargo/reference/manifest.html#the-badges-section
+github = { repository = "enarx/ciborium", workflow = "test" }
+#github = { repository = "enarx/ciborium", workflow = "lint" }
+maintenance = { status = "actively-developed" }
+is-it-maintained-issue-resolution = { repository = "enarx/ciborium" }
+is-it-maintained-open-issues = { repository = "enarx/ciborium" }
+
+[dependencies]
+ciborium-io = { path = "../ciborium-io", version = "0.2.0" }
+half = "1.6"
+
+[dev-dependencies]
+hex = "0.4"
+
+[features]
+alloc = []
+std = ["alloc"]
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..2ca6306
--- /dev/null
+++ b/README.md
@@ -0,0 +1,131 @@
+[![Workflow Status](https://github.com/enarx/ciborium/workflows/test/badge.svg)](https://github.com/enarx/ciborium/actions?query=workflow%3A%22test%22)
+[![Average time to resolve an issue](https://isitmaintained.com/badge/resolution/enarx/ciborium.svg)](https://isitmaintained.com/project/enarx/ciborium "Average time to resolve an issue")
+[![Percentage of issues still open](https://isitmaintained.com/badge/open/enarx/ciborium.svg)](https://isitmaintained.com/project/enarx/ciborium "Percentage of issues still open")
+![Maintenance](https://img.shields.io/badge/maintenance-activly--developed-brightgreen.svg)
+
+# ciborium-ll
+
+Low level CBOR parsing tools
+
+This crate contains low-level types for encoding and decoding items in
+CBOR. This crate is usable in both `no_std` and `no_alloc` environments.
+To understand how this crate works, first we will look at the structure
+of a CBOR item on the wire.
+
+## Anatomy of a CBOR Item
+
+This is a brief anatomy of a CBOR item on the wire.
+
+```
++------------+-----------+
+| | |
+| Major | Minor |
+| (3bits) | (5bits) |
+| | |
++------------+-----------+
+^ ^
+| |
++-----+ +-----+
+ | |
+ | |
+ +----------------------------+--------------+
+ | | | |
+ | Prefix | Affix | Suffix |
+ | (1 byte) | (0-8 bytes) | (0+ bytes) |
+ | | | |
+ +------------+---------------+--------------+
+
+ | | |
+ +------------+---------------+--------------+
+ | |
+ v v
+
+ Header Body
+```
+
+The `ciborium` crate works by providing the `Decoder` and `Encoder` types
+which provide input and output for a CBOR header (see: `Header`). From
+there, you can either handle the body yourself or use the provided utility
+functions.
+
+For more information on the CBOR format, see
+[RFC 7049](https://tools.ietf.org/html/rfc7049).
+
+## Decoding
+
+In order to decode CBOR, you will create a `Decoder` from a reader. The
+decoder instance will allow you to `Decoder::pull()` `Header` instances
+from the input.
+
+Most CBOR items are fully contained in their headers and therefore have no
+body. These items can be evaluated directly from the `Header` instance.
+
+Bytes and text items have a body but do not contain child items. Since
+both bytes and text values may be segmented, parsing them can be a bit
+tricky. Therefore, we provide helper functions to parse these types. See
+`Decoder::bytes()` and `Decoder::text()` for more details.
+
+Array and map items have a body which contains child items. These can be
+parsed by simply doing `Decoder::pull()` to parse the child items.
+
+### Example
+
+```rust
+use ciborium_ll::{Decoder, Header};
+use ciborium_io::Read as _;
+
+let input = b"\x6dHello, World!";
+let mut decoder = Decoder::from(&input[..]);
+let mut chunks = 0;
+
+match decoder.pull().unwrap() {
+ Header::Text(len) => {
+ let mut segments = decoder.text(len);
+ while let Some(mut segment) = segments.pull().unwrap() {
+ let mut buffer = [0u8; 7];
+ while let Some(chunk) = segment.pull(&mut buffer[..]).unwrap() {
+ match chunk {
+ "Hello, " if chunks == 0 => chunks = 1,
+ "World!" if chunks == 1 => chunks = 2,
+ _ => panic!("received unexpected chunk"),
+ }
+ }
+ }
+ }
+
+ _ => panic!("received unexpected value"),
+}
+
+assert_eq!(chunks, 2);
+```
+
+## Encoding
+
+To encode values to CBOR, create an `Encoder` from a writer. The encoder
+instance provides the `Encoder::push()` method to write a `Header` value
+to the wire. CBOR item bodies can be written directly.
+
+For bytes and text, there are the `Encoder::bytes()` and `Encoder::text()`
+utility functions, respectively, which will properly segment the output
+on the wire for you.
+
+### Example
+
+```rust
+use ciborium_ll::{Encoder, Header};
+use ciborium_io::Write as _;
+
+let mut buffer = [0u8; 19];
+let mut encoder = Encoder::from(&mut buffer[..]);
+
+// Write the structure
+encoder.push(Header::Map(Some(1))).unwrap();
+encoder.push(Header::Positive(7)).unwrap();
+encoder.text("Hello, World!", 7).unwrap();
+
+// Validate our output
+encoder.flush().unwrap();
+assert_eq!(b"\xa1\x07\x7f\x67Hello, \x66World!\xff", &buffer[..]);
+```
+
+License: Apache-2.0
diff --git a/src/dec.rs b/src/dec.rs
new file mode 100644
index 0000000..8329634
--- /dev/null
+++ b/src/dec.rs
@@ -0,0 +1,174 @@
+use super::*;
+
+use ciborium_io::Read;
+
+/// An error that occurred while decoding
+#[derive(Debug)]
+pub enum Error<T> {
+ /// An error occurred while reading bytes
+ ///
+ /// Contains the underlying error reaturned while reading.
+ Io(T),
+
+ /// An error occurred while parsing bytes
+ ///
+ /// Contains the offset into the stream where the syntax error occurred.
+ Syntax(usize),
+}
+
+impl<T> From<T> for Error<T> {
+ #[inline]
+ fn from(value: T) -> Self {
+ Self::Io(value)
+ }
+}
+
+/// A decoder for deserializing CBOR items
+///
+/// This decoder manages the low-level decoding of CBOR items into `Header`
+/// objects. It also contains utility functions for parsing segmented bytes
+/// and text inputs.
+pub struct Decoder<R: Read> {
+ reader: R,
+ offset: usize,
+ buffer: Option<Title>,
+}
+
+impl<R: Read> From<R> for Decoder<R> {
+ #[inline]
+ fn from(value: R) -> Self {
+ Self {
+ reader: value,
+ offset: 0,
+ buffer: None,
+ }
+ }
+}
+
+impl<R: Read> Read for Decoder<R> {
+ type Error = R::Error;
+
+ #[inline]
+ fn read_exact(&mut self, data: &mut [u8]) -> Result<(), Self::Error> {
+ assert!(self.buffer.is_none());
+ self.reader.read_exact(data)?;
+ self.offset += data.len();
+ Ok(())
+ }
+}
+
+impl<R: Read> Decoder<R> {
+ #[inline]
+ fn pull_title(&mut self) -> Result<Title, Error<R::Error>> {
+ if let Some(title) = self.buffer.take() {
+ self.offset += title.1.as_ref().len() + 1;
+ return Ok(title);
+ }
+
+ let mut prefix = [0u8; 1];
+ self.read_exact(&mut prefix[..])?;
+
+ let major = match prefix[0] >> 5 {
+ 0 => Major::Positive,
+ 1 => Major::Negative,
+ 2 => Major::Bytes,
+ 3 => Major::Text,
+ 4 => Major::Array,
+ 5 => Major::Map,
+ 6 => Major::Tag,
+ 7 => Major::Other,
+ _ => unreachable!(),
+ };
+
+ let mut minor = match prefix[0] & 0b00011111 {
+ x if x < 24 => Minor::This(x),
+ 24 => Minor::Next1([0; 1]),
+ 25 => Minor::Next2([0; 2]),
+ 26 => Minor::Next4([0; 4]),
+ 27 => Minor::Next8([0; 8]),
+ 31 => Minor::More,
+ _ => return Err(Error::Syntax(self.offset - 1)),
+ };
+
+ self.read_exact(minor.as_mut())?;
+ Ok(Title(major, minor))
+ }
+
+ #[inline]
+ fn push_title(&mut self, item: Title) {
+ assert!(self.buffer.is_none());
+ self.buffer = Some(item);
+ self.offset -= item.1.as_ref().len() + 1;
+ }
+
+ /// Pulls the next header from the input
+ #[inline]
+ pub fn pull(&mut self) -> Result<Header, Error<R::Error>> {
+ let offset = self.offset;
+ self.pull_title()?
+ .try_into()
+ .map_err(|_| Error::Syntax(offset))
+ }
+
+ /// Push a single header into the input buffer
+ ///
+ /// # Panics
+ ///
+ /// This function panics if called while there is already a header in the
+ /// input buffer. You should take care to call this function only after
+ /// pulling a header to ensure there is nothing in the input buffer.
+ #[inline]
+ pub fn push(&mut self, item: Header) {
+ self.push_title(Title::from(item))
+ }
+
+ /// Gets the current byte offset into the stream
+ ///
+ /// The offset starts at zero when the decoder is created. Therefore, if
+ /// bytes were already read from the reader before the decoder was created,
+ /// you must account for this.
+ #[inline]
+ pub fn offset(&mut self) -> usize {
+ self.offset
+ }
+
+ /// Process an incoming bytes item
+ ///
+ /// In CBOR, bytes can be segmented. The logic for this can be a bit tricky,
+ /// so we encapsulate that logic using this function. This function **MUST**
+ /// be called immediately after first pulling a `Header::Bytes(len)` from
+ /// the wire and `len` must be provided to this function from that value.
+ ///
+ /// The `buf` parameter provides a buffer used when reading in the segmented
+ /// bytes. A large buffer will result in fewer calls to read incoming bytes
+ /// at the cost of memory usage. You should consider this trade off when
+ /// deciding the size of your buffer.
+ #[inline]
+ pub fn bytes(&mut self, len: Option<usize>) -> Segments<R, crate::seg::Bytes> {
+ self.push(Header::Bytes(len));
+ Segments::new(self, |header| match header {
+ Header::Bytes(len) => Ok(len),
+ _ => Err(()),
+ })
+ }
+
+ /// Process an incoming text item
+ ///
+ /// In CBOR, text can be segmented. The logic for this can be a bit tricky,
+ /// so we encapsulate that logic using this function. This function **MUST**
+ /// be called immediately after first pulling a `Header::Text(len)` from
+ /// the wire and `len` must be provided to this function from that value.
+ ///
+ /// The `buf` parameter provides a buffer used when reading in the segmented
+ /// text. A large buffer will result in fewer calls to read incoming bytes
+ /// at the cost of memory usage. You should consider this trade off when
+ /// deciding the size of your buffer.
+ #[inline]
+ pub fn text(&mut self, len: Option<usize>) -> Segments<R, crate::seg::Text> {
+ self.push(Header::Text(len));
+ Segments::new(self, |header| match header {
+ Header::Text(len) => Ok(len),
+ _ => Err(()),
+ })
+ }
+}
diff --git a/src/enc.rs b/src/enc.rs
new file mode 100644
index 0000000..909728f
--- /dev/null
+++ b/src/enc.rs
@@ -0,0 +1,127 @@
+use super::*;
+
+use ciborium_io::Write;
+
+/// An encoder for serializing CBOR items
+///
+/// This structure wraps a writer and provides convenience functions for
+/// writing `Header` objects to the wire.
+pub struct Encoder<W: Write>(W);
+
+impl<W: Write> From<W> for Encoder<W> {
+ #[inline]
+ fn from(value: W) -> Self {
+ Self(value)
+ }
+}
+
+impl<W: Write> Write for Encoder<W> {
+ type Error = W::Error;
+
+ fn write_all(&mut self, data: &[u8]) -> Result<(), Self::Error> {
+ self.0.write_all(data)
+ }
+
+ fn flush(&mut self) -> Result<(), Self::Error> {
+ self.0.flush()
+ }
+}
+
+impl<W: Write> Encoder<W> {
+ /// Push a `Header` to the wire
+ #[inline]
+ pub fn push(&mut self, header: Header) -> Result<(), W::Error> {
+ let title = Title::from(header);
+
+ let major = match title.0 {
+ Major::Positive => 0,
+ Major::Negative => 1,
+ Major::Bytes => 2,
+ Major::Text => 3,
+ Major::Array => 4,
+ Major::Map => 5,
+ Major::Tag => 6,
+ Major::Other => 7,
+ };
+
+ let minor = match title.1 {
+ Minor::This(x) => x,
+ Minor::Next1(..) => 24,
+ Minor::Next2(..) => 25,
+ Minor::Next4(..) => 26,
+ Minor::Next8(..) => 27,
+ Minor::More => 31,
+ };
+
+ self.0.write_all(&[major << 5 | minor])?;
+ self.0.write_all(title.1.as_ref())
+ }
+
+ /// Serialize a byte slice as CBOR
+ ///
+ /// Optionally, segment the output into `segment` size segments. Note that
+ /// if `segment == Some(0)` it will be silently upgraded to `Some(1)`. This
+ /// minimum value is highly inefficient and should not be relied upon.
+ #[inline]
+ pub fn bytes(
+ &mut self,
+ value: &[u8],
+ segment: impl Into<Option<usize>>,
+ ) -> Result<(), W::Error> {
+ let max = segment.into().unwrap_or_else(|| value.len());
+ let max = core::cmp::max(max, 1);
+
+ if max >= value.len() {
+ self.push(Header::Bytes(Some(value.len())))?;
+ self.write_all(value)?;
+ } else {
+ self.push(Header::Bytes(None))?;
+
+ for chunk in value.chunks(max) {
+ self.push(Header::Bytes(Some(chunk.len())))?;
+ self.write_all(chunk)?;
+ }
+
+ self.push(Header::Break)?;
+ }
+
+ Ok(())
+ }
+
+ /// Serialize a string slice as CBOR
+ ///
+ /// Optionally, segment the output into `segment` size segments. Note that
+ /// since care is taken to ensure that each segment is itself a valid UTF-8
+ /// string, if `segment` contains a value of less than 4, it will be
+ /// silently upgraded to 4. This minimum value is highly inefficient and
+ /// should not be relied upon.
+ #[inline]
+ pub fn text(&mut self, value: &str, segment: impl Into<Option<usize>>) -> Result<(), W::Error> {
+ let max = segment.into().unwrap_or_else(|| value.len());
+ let max = core::cmp::max(max, 4);
+
+ if max >= value.len() {
+ self.push(Header::Text(Some(value.len())))?;
+ self.write_all(value.as_bytes())?;
+ } else {
+ self.push(Header::Text(None))?;
+
+ let mut bytes = value.as_bytes();
+ while !bytes.is_empty() {
+ let mut len = core::cmp::min(bytes.len(), max);
+ while len > 0 && core::str::from_utf8(&bytes[..len]).is_err() {
+ len -= 1
+ }
+
+ let (prefix, suffix) = bytes.split_at(len);
+ self.push(Header::Text(Some(prefix.len())))?;
+ self.write_all(prefix)?;
+ bytes = suffix;
+ }
+
+ self.push(Header::Break)?;
+ }
+
+ Ok(())
+ }
+}
diff --git a/src/hdr.rs b/src/hdr.rs
new file mode 100644
index 0000000..dec1788
--- /dev/null
+++ b/src/hdr.rs
@@ -0,0 +1,163 @@
+use super::*;
+
+use half::f16;
+
+/// A semantic representation of a CBOR item header
+///
+/// This structure represents the valid values of a CBOR item header and is
+/// used extensively when serializing or deserializing CBOR items. Note well
+/// that this structure **DOES NOT** represent the body (i.e. suffix) of the
+/// CBOR item. You must parse the body yourself based on the contents of the
+/// `Header`. However, utility functions are provided for this (see:
+/// `Decoder::bytes()` and `Decoder::text()`).
+#[derive(Copy, Clone, Debug, PartialEq)]
+pub enum Header {
+ /// A positive integer
+ Positive(u64),
+
+ /// A negative integer
+ ///
+ /// Note well that this value has all bits inverted from a normal signed
+ /// integer. For example, to convert the `u64` to a `i128` you would do
+ /// this: `neg as i128 ^ !0`.
+ Negative(u64),
+
+ /// A floating point value
+ Float(f64),
+
+ /// A "simple" value
+ Simple(u8),
+
+ /// A tag
+ Tag(u64),
+
+ /// The "break" value
+ ///
+ /// This value is used to terminate indefinite length arrays and maps,
+ /// as well as segmented byte or text items.
+ Break,
+
+ /// A bytes item
+ ///
+ /// The value contained in this variant indicates the length of the bytes
+ /// which follow or, if `None`, segmented bytes input.
+ ///
+ /// A best practice is to call `Decoder::bytes()` immediately after
+ /// first pulling a bytes item header since this utility function
+ /// encapsulates all the logic needed to handle segmentation.
+ Bytes(Option<usize>),
+
+ /// A text item
+ ///
+ /// The value contained in this variant indicates the length of the text
+ /// which follows (in bytes) or, if `None`, segmented text input.
+ ///
+ /// A best practice is to call `Decoder::text()` immediately after
+ /// first pulling a text item header since this utility function
+ /// encapsulates all the logic needed to handle segmentation.
+ Text(Option<usize>),
+
+ /// An array item
+ ///
+ /// The value contained in this variant indicates the length of the array
+ /// which follows (in items) or, if `None`, an indefinite length array
+ /// terminated by a "break" value.
+ Array(Option<usize>),
+
+ /// An map item
+ ///
+ /// The value contained in this variant indicates the length of the map
+ /// which follows (in item pairs) or, if `None`, an indefinite length map
+ /// terminated by a "break" value.
+ Map(Option<usize>),
+}
+
+impl TryFrom<Title> for Header {
+ type Error = InvalidError;
+
+ fn try_from(title: Title) -> Result<Self, Self::Error> {
+ let opt = |minor| {
+ Some(match minor {
+ Minor::This(x) => x.into(),
+ Minor::Next1(x) => u8::from_be_bytes(x).into(),
+ Minor::Next2(x) => u16::from_be_bytes(x).into(),
+ Minor::Next4(x) => u32::from_be_bytes(x).into(),
+ Minor::Next8(x) => u64::from_be_bytes(x),
+ Minor::More => return None,
+ })
+ };
+
+ let int = |m| opt(m).ok_or(InvalidError(()));
+
+ let len = |m| {
+ opt(m)
+ .map(usize::try_from)
+ .transpose()
+ .or(Err(InvalidError(())))
+ };
+
+ Ok(match title {
+ Title(Major::Positive, minor) => Self::Positive(int(minor)?),
+ Title(Major::Negative, minor) => Self::Negative(int(minor)?),
+ Title(Major::Bytes, minor) => Self::Bytes(len(minor)?),
+ Title(Major::Text, minor) => Self::Text(len(minor)?),
+ Title(Major::Array, minor) => Self::Array(len(minor)?),
+ Title(Major::Map, minor) => Self::Map(len(minor)?),
+ Title(Major::Tag, minor) => Self::Tag(int(minor)?),
+
+ Title(Major::Other, Minor::More) => Self::Break,
+ Title(Major::Other, Minor::This(x)) => Self::Simple(x),
+ Title(Major::Other, Minor::Next1(x)) => Self::Simple(x[0]),
+ Title(Major::Other, Minor::Next2(x)) => Self::Float(f16::from_be_bytes(x).into()),
+ Title(Major::Other, Minor::Next4(x)) => Self::Float(f32::from_be_bytes(x).into()),
+ Title(Major::Other, Minor::Next8(x)) => Self::Float(f64::from_be_bytes(x)),
+ })
+ }
+}
+
+impl From<Header> for Title {
+ fn from(header: Header) -> Self {
+ let int = |i: u64| match i {
+ x if x <= 23 => Minor::This(i as u8),
+ x if x <= core::u8::MAX as u64 => Minor::Next1([i as u8]),
+ x if x <= core::u16::MAX as u64 => Minor::Next2((i as u16).to_be_bytes()),
+ x if x <= core::u32::MAX as u64 => Minor::Next4((i as u32).to_be_bytes()),
+ x => Minor::Next8(x.to_be_bytes()),
+ };
+
+ let len = |l: Option<usize>| l.map(|x| int(x as u64)).unwrap_or(Minor::More);
+
+ match header {
+ Header::Positive(x) => Title(Major::Positive, int(x)),
+ Header::Negative(x) => Title(Major::Negative, int(x)),
+ Header::Bytes(x) => Title(Major::Bytes, len(x)),
+ Header::Text(x) => Title(Major::Text, len(x)),
+ Header::Array(x) => Title(Major::Array, len(x)),
+ Header::Map(x) => Title(Major::Map, len(x)),
+ Header::Tag(x) => Title(Major::Tag, int(x)),
+
+ Header::Break => Title(Major::Other, Minor::More),
+
+ Header::Simple(x) => match x {
+ x @ 0..=23 => Title(Major::Other, Minor::This(x)),
+ x => Title(Major::Other, Minor::Next1([x])),
+ },
+
+ Header::Float(n64) => {
+ let n16 = f16::from_f64(n64);
+ let n32 = n64 as f32;
+
+ Title(
+ Major::Other,
+ if f64::from(n16).to_bits() == n64.to_bits() {
+ Minor::Next2(n16.to_be_bytes())
+ } else if f64::from(n32).to_bits() == n64.to_bits() {
+ Minor::Next4(n32.to_be_bytes())
+ } else {
+ Minor::Next8(n64.to_be_bytes())
+ },
+ )
+ }
+ }
+ }
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..8a1fe90
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: Apache-2.0
+
+//! Low level CBOR parsing tools
+//!
+//! This crate contains low-level types for encoding and decoding items in
+//! CBOR. This crate is usable in both `no_std` and `no_alloc` environments.
+//! To understand how this crate works, first we will look at the structure
+//! of a CBOR item on the wire.
+//!
+//! # Anatomy of a CBOR Item
+//!
+//! This is a brief anatomy of a CBOR item on the wire.
+//!
+//! ```text
+//! +------------+-----------+
+//! | | |
+//! | Major | Minor |
+//! | (3bits) | (5bits) |
+//! | | |
+//! +------------+-----------+
+//! ^ ^
+//! | |
+//! +-----+ +-----+
+//! | |
+//! | |
+//! +----------------------------+--------------+
+//! | | | |
+//! | Prefix | Affix | Suffix |
+//! | (1 byte) | (0-8 bytes) | (0+ bytes) |
+//! | | | |
+//! +------------+---------------+--------------+
+//!
+//! | | |
+//! +------------+---------------+--------------+
+//! | |
+//! v v
+//!
+//! Header Body
+//! ```
+//!
+//! The `ciborium` crate works by providing the `Decoder` and `Encoder` types
+//! which provide input and output for a CBOR header (see: `Header`). From
+//! there, you can either handle the body yourself or use the provided utility
+//! functions.
+//!
+//! For more information on the CBOR format, see
+//! [RFC 7049](https://tools.ietf.org/html/rfc7049).
+//!
+//! # Decoding
+//!
+//! In order to decode CBOR, you will create a `Decoder` from a reader. The
+//! decoder instance will allow you to `Decoder::pull()` `Header` instances
+//! from the input.
+//!
+//! Most CBOR items are fully contained in their headers and therefore have no
+//! body. These items can be evaluated directly from the `Header` instance.
+//!
+//! Bytes and text items have a body but do not contain child items. Since
+//! both bytes and text values may be segmented, parsing them can be a bit
+//! tricky. Therefore, we provide helper functions to parse these types. See
+//! `Decoder::bytes()` and `Decoder::text()` for more details.
+//!
+//! Array and map items have a body which contains child items. These can be
+//! parsed by simply doing `Decoder::pull()` to parse the child items.
+//!
+//! ## Example
+//!
+//! ```rust
+//! use ciborium_ll::{Decoder, Header};
+//! use ciborium_io::Read as _;
+//!
+//! let input = b"\x6dHello, World!";
+//! let mut decoder = Decoder::from(&input[..]);
+//! let mut chunks = 0;
+//!
+//! match decoder.pull().unwrap() {
+//! Header::Text(len) => {
+//! let mut segments = decoder.text(len);
+//! while let Some(mut segment) = segments.pull().unwrap() {
+//! let mut buffer = [0u8; 7];
+//! while let Some(chunk) = segment.pull(&mut buffer[..]).unwrap() {
+//! match chunk {
+//! "Hello, " if chunks == 0 => chunks = 1,
+//! "World!" if chunks == 1 => chunks = 2,
+//! _ => panic!("received unexpected chunk"),
+//! }
+//! }
+//! }
+//! }
+//!
+//! _ => panic!("received unexpected value"),
+//! }
+//!
+//! assert_eq!(chunks, 2);
+//! ```
+//!
+//! # Encoding
+//!
+//! To encode values to CBOR, create an `Encoder` from a writer. The encoder
+//! instance provides the `Encoder::push()` method to write a `Header` value
+//! to the wire. CBOR item bodies can be written directly.
+//!
+//! For bytes and text, there are the `Encoder::bytes()` and `Encoder::text()`
+//! utility functions, respectively, which will properly segment the output
+//! on the wire for you.
+//!
+//! ## Example
+//!
+//! ```rust
+//! use ciborium_ll::{Encoder, Header};
+//! use ciborium_io::Write as _;
+//!
+//! let mut buffer = [0u8; 19];
+//! let mut encoder = Encoder::from(&mut buffer[..]);
+//!
+//! // Write the structure
+//! encoder.push(Header::Map(Some(1))).unwrap();
+//! encoder.push(Header::Positive(7)).unwrap();
+//! encoder.text("Hello, World!", 7).unwrap();
+//!
+//! // Validate our output
+//! encoder.flush().unwrap();
+//! assert_eq!(b"\xa1\x07\x7f\x67Hello, \x66World!\xff", &buffer[..]);
+//! ```
+
+#![cfg_attr(not(feature = "std"), no_std)]
+#![deny(missing_docs)]
+#![deny(clippy::all)]
+#![deny(clippy::cargo)]
+
+#[cfg(feature = "alloc")]
+extern crate alloc;
+
+mod dec;
+mod enc;
+mod hdr;
+mod seg;
+
+pub use dec::*;
+pub use enc::*;
+pub use hdr::*;
+pub use seg::{Segment, Segments};
+
+/// Simple value constants
+pub mod simple {
+ #![allow(missing_docs)]
+
+ pub const FALSE: u8 = 20;
+ pub const TRUE: u8 = 21;
+ pub const NULL: u8 = 22;
+ pub const UNDEFINED: u8 = 23;
+}
+
+/// Tag constants
+pub mod tag {
+ #![allow(missing_docs)]
+
+ pub const BIGPOS: u64 = 2;
+ pub const BIGNEG: u64 = 3;
+}
+
+#[derive(Debug)]
+struct InvalidError(());
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum Major {
+ Positive,
+ Negative,
+ Bytes,
+ Text,
+ Array,
+ Map,
+ Tag,
+ Other,
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+enum Minor {
+ This(u8),
+ Next1([u8; 1]),
+ Next2([u8; 2]),
+ Next4([u8; 4]),
+ Next8([u8; 8]),
+ More,
+}
+
+impl AsRef<[u8]> for Minor {
+ #[inline]
+ fn as_ref(&self) -> &[u8] {
+ match self {
+ Self::More => &[],
+ Self::This(..) => &[],
+ Self::Next1(x) => x.as_ref(),
+ Self::Next2(x) => x.as_ref(),
+ Self::Next4(x) => x.as_ref(),
+ Self::Next8(x) => x.as_ref(),
+ }
+ }
+}
+
+impl AsMut<[u8]> for Minor {
+ #[inline]
+ fn as_mut(&mut self) -> &mut [u8] {
+ match self {
+ Self::More => &mut [],
+ Self::This(..) => &mut [],
+ Self::Next1(x) => x.as_mut(),
+ Self::Next2(x) => x.as_mut(),
+ Self::Next4(x) => x.as_mut(),
+ Self::Next8(x) => x.as_mut(),
+ }
+ }
+}
+
+#[derive(Copy, Clone, Debug, PartialEq, Eq)]
+struct Title(pub Major, pub Minor);
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ macro_rules! neg {
+ ($i:expr) => {
+ Header::Negative((($i as i128) ^ !0) as u64)
+ };
+ }
+
+ #[allow(clippy::excessive_precision)]
+ #[test]
+ fn leaf() {
+ use core::f64::{INFINITY, NAN};
+
+ let data = &[
+ (Header::Positive(0), "00", true),
+ (Header::Positive(1), "01", true),
+ (Header::Positive(10), "0a", true),
+ (Header::Positive(23), "17", true),
+ (Header::Positive(24), "1818", true),
+ (Header::Positive(25), "1819", true),
+ (Header::Positive(100), "1864", true),
+ (Header::Positive(1000), "1903e8", true),
+ (Header::Positive(1000000), "1a000f4240", true),
+ (Header::Positive(1000000000000), "1b000000e8d4a51000", true),
+ (
+ Header::Positive(18446744073709551615),
+ "1bffffffffffffffff",
+ true,
+ ),
+ (neg!(-18446744073709551616), "3bffffffffffffffff", true),
+ (neg!(-1), "20", true),
+ (neg!(-10), "29", true),
+ (neg!(-100), "3863", true),
+ (neg!(-1000), "3903e7", true),
+ (Header::Float(0.0), "f90000", true),
+ (Header::Float(-0.0), "f98000", true),
+ (Header::Float(1.0), "f93c00", true),
+ (Header::Float(1.1), "fb3ff199999999999a", true),
+ (Header::Float(1.5), "f93e00", true),
+ (Header::Float(65504.0), "f97bff", true),
+ (Header::Float(100000.0), "fa47c35000", true),
+ (Header::Float(3.4028234663852886e+38), "fa7f7fffff", true),
+ (Header::Float(1.0e+300), "fb7e37e43c8800759c", true),
+ (Header::Float(5.960464477539063e-8), "f90001", true),
+ (Header::Float(0.00006103515625), "f90400", true),
+ (Header::Float(-4.0), "f9c400", true),
+ (Header::Float(-4.1), "fbc010666666666666", true),
+ (Header::Float(INFINITY), "f97c00", true),
+ (Header::Float(NAN), "f97e00", true),
+ (Header::Float(-INFINITY), "f9fc00", true),
+ (Header::Float(INFINITY), "fa7f800000", false),
+ (Header::Float(NAN), "fa7fc00000", false),
+ (Header::Float(-INFINITY), "faff800000", false),
+ (Header::Float(INFINITY), "fb7ff0000000000000", false),
+ (Header::Float(NAN), "fb7ff8000000000000", false),
+ (Header::Float(-INFINITY), "fbfff0000000000000", false),
+ (Header::Simple(simple::FALSE), "f4", true),
+ (Header::Simple(simple::TRUE), "f5", true),
+ (Header::Simple(simple::NULL), "f6", true),
+ (Header::Simple(simple::UNDEFINED), "f7", true),
+ (Header::Simple(16), "f0", true),
+ (Header::Simple(24), "f818", true),
+ (Header::Simple(255), "f8ff", true),
+ (Header::Tag(0), "c0", true),
+ (Header::Tag(1), "c1", true),
+ (Header::Tag(23), "d7", true),
+ (Header::Tag(24), "d818", true),
+ (Header::Tag(32), "d820", true),
+ (Header::Bytes(Some(0)), "40", true),
+ (Header::Bytes(Some(4)), "44", true),
+ (Header::Text(Some(0)), "60", true),
+ (Header::Text(Some(4)), "64", true),
+ ];
+
+ for (header, bytes, encode) in data.iter().cloned() {
+ let bytes = hex::decode(bytes).unwrap();
+
+ let mut decoder = Decoder::from(&bytes[..]);
+ match (header, decoder.pull().unwrap()) {
+ // NaN equality...
+ (Header::Float(l), Header::Float(r)) if l.is_nan() && r.is_nan() => (),
+
+ // Everything else...
+ (l, r) => assert_eq!(l, r),
+ }
+
+ if encode {
+ let mut buffer = [0u8; 1024];
+ let mut writer = &mut buffer[..];
+ let mut encoder = Encoder::from(&mut writer);
+ encoder.push(header).unwrap();
+
+ let len = writer.len();
+ assert_eq!(&bytes[..], &buffer[..1024 - len]);
+ }
+ }
+ }
+
+ #[test]
+ fn node() {
+ let data: &[(&str, &[Header])] = &[
+ ("80", &[Header::Array(Some(0))]),
+ (
+ "83010203",
+ &[
+ Header::Array(Some(3)),
+ Header::Positive(1),
+ Header::Positive(2),
+ Header::Positive(3),
+ ],
+ ),
+ (
+ "98190102030405060708090a0b0c0d0e0f101112131415161718181819",
+ &[
+ Header::Array(Some(25)),
+ Header::Positive(1),
+ Header::Positive(2),
+ Header::Positive(3),
+ Header::Positive(4),
+ Header::Positive(5),
+ Header::Positive(6),
+ Header::Positive(7),
+ Header::Positive(8),
+ Header::Positive(9),
+ Header::Positive(10),
+ Header::Positive(11),
+ Header::Positive(12),
+ Header::Positive(13),
+ Header::Positive(14),
+ Header::Positive(15),
+ Header::Positive(16),
+ Header::Positive(17),
+ Header::Positive(18),
+ Header::Positive(19),
+ Header::Positive(20),
+ Header::Positive(21),
+ Header::Positive(22),
+ Header::Positive(23),
+ Header::Positive(24),
+ Header::Positive(25),
+ ],
+ ),
+ ("a0", &[Header::Map(Some(0))]),
+ (
+ "a201020304",
+ &[
+ Header::Map(Some(2)),
+ Header::Positive(1),
+ Header::Positive(2),
+ Header::Positive(3),
+ Header::Positive(4),
+ ],
+ ),
+ ("9fff", &[Header::Array(None), Header::Break]),
+ (
+ "9f018202039f0405ffff",
+ &[
+ Header::Array(None),
+ Header::Positive(1),
+ Header::Array(Some(2)),
+ Header::Positive(2),
+ Header::Positive(3),
+ Header::Array(None),
+ Header::Positive(4),
+ Header::Positive(5),
+ Header::Break,
+ Header::Break,
+ ],
+ ),
+ (
+ "9f01820203820405ff",
+ &[
+ Header::Array(None),
+ Header::Positive(1),
+ Header::Array(Some(2)),
+ Header::Positive(2),
+ Header::Positive(3),
+ Header::Array(Some(2)),
+ Header::Positive(4),
+ Header::Positive(5),
+ Header::Break,
+ ],
+ ),
+ (
+ "83018202039f0405ff",
+ &[
+ Header::Array(Some(3)),
+ Header::Positive(1),
+ Header::Array(Some(2)),
+ Header::Positive(2),
+ Header::Positive(3),
+ Header::Array(None),
+ Header::Positive(4),
+ Header::Positive(5),
+ Header::Break,
+ ],
+ ),
+ (
+ "83019f0203ff820405",
+ &[
+ Header::Array(Some(3)),
+ Header::Positive(1),
+ Header::Array(None),
+ Header::Positive(2),
+ Header::Positive(3),
+ Header::Break,
+ Header::Array(Some(2)),
+ Header::Positive(4),
+ Header::Positive(5),
+ ],
+ ),
+ (
+ "9f0102030405060708090a0b0c0d0e0f101112131415161718181819ff",
+ &[
+ Header::Array(None),
+ Header::Positive(1),
+ Header::Positive(2),
+ Header::Positive(3),
+ Header::Positive(4),
+ Header::Positive(5),
+ Header::Positive(6),
+ Header::Positive(7),
+ Header::Positive(8),
+ Header::Positive(9),
+ Header::Positive(10),
+ Header::Positive(11),
+ Header::Positive(12),
+ Header::Positive(13),
+ Header::Positive(14),
+ Header::Positive(15),
+ Header::Positive(16),
+ Header::Positive(17),
+ Header::Positive(18),
+ Header::Positive(19),
+ Header::Positive(20),
+ Header::Positive(21),
+ Header::Positive(22),
+ Header::Positive(23),
+ Header::Positive(24),
+ Header::Positive(25),
+ Header::Break,
+ ],
+ ),
+ ];
+
+ for (bytes, headers) in data {
+ let bytes = hex::decode(bytes).unwrap();
+
+ // Test decoding
+ let mut decoder = Decoder::from(&bytes[..]);
+ for header in headers.iter().cloned() {
+ assert_eq!(header, decoder.pull().unwrap());
+ }
+
+ // Test encoding
+ let mut buffer = [0u8; 1024];
+ let mut writer = &mut buffer[..];
+ let mut encoder = Encoder::from(&mut writer);
+
+ for header in headers.iter().cloned() {
+ encoder.push(header).unwrap();
+ }
+
+ let len = writer.len();
+ assert_eq!(&bytes[..], &buffer[..1024 - len]);
+ }
+ }
+}
diff --git a/src/seg.rs b/src/seg.rs
new file mode 100644
index 0000000..d75ebae
--- /dev/null
+++ b/src/seg.rs
@@ -0,0 +1,213 @@
+use super::*;
+
+use ciborium_io::Read;
+
+use core::marker::PhantomData;
+
+/// A parser for incoming segments
+pub trait Parser: Default {
+ /// The type of item that is parsed
+ type Item: ?Sized;
+
+ /// The parsing error that may occur
+ type Error;
+
+ /// The main parsing function
+ ///
+ /// This function processes the incoming bytes and returns the item.
+ ///
+ /// One important detail that **MUST NOT** be overlooked is that the
+ /// parser may save data from a previous parsing attempt. The number of
+ /// bytes saved is indicated by the `Parser::saved()` function. The saved
+ /// bytes will be copied into the beginning of the `bytes` array before
+ /// processing. Therefore, two requirements should be met.
+ ///
+ /// First, the incoming byte slice should be larger than the saved bytes.
+ ///
+ /// Second, the incoming byte slice should contain new bytes only after
+ /// the saved byte prefix.
+ ///
+ /// If both criteria are met, this allows the parser to prepend its saved
+ /// bytes without any additional allocation.
+ fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
+
+ /// Indicates the number of saved bytes in the parser
+ fn saved(&self) -> usize {
+ 0
+ }
+}
+
+/// A bytes parser
+///
+/// No actual processing is performed and the input bytes are directly
+/// returned. This implies that this parser never saves any bytes internally.
+#[derive(Default)]
+pub struct Bytes(());
+
+impl Parser for Bytes {
+ type Item = [u8];
+ type Error = core::convert::Infallible;
+
+ fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
+ Ok(bytes)
+ }
+}
+
+/// A text parser
+///
+/// This parser converts the input bytes to a `str`. This parser preserves
+/// trailing invalid UTF-8 sequences in the case that chunking fell in the
+/// middle of a valid UTF-8 character.
+#[derive(Default)]
+pub struct Text {
+ stored: usize,
+ buffer: [u8; 3],
+}
+
+impl Parser for Text {
+ type Item = str;
+ type Error = core::str::Utf8Error;
+
+ fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
+ // If we cannot advance, return nothing.
+ if bytes.len() <= self.stored {
+ return Ok("");
+ }
+
+ // Copy previously invalid data into place.
+ bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
+
+ Ok(match core::str::from_utf8(bytes) {
+ Ok(s) => s,
+ Err(e) => {
+ let valid_len = e.valid_up_to();
+ let invalid_len = bytes.len() - valid_len;
+
+ // If the size of the invalid UTF-8 is large enough to hold
+ // all valid UTF-8 characters, we have a syntax error.
+ if invalid_len > self.buffer.len() {
+ return Err(e);
+ }
+
+ // Otherwise, store the invalid bytes for the next read cycle.
+ self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
+ self.stored = invalid_len;
+
+ // Decode the valid part of the string.
+ core::str::from_utf8(&bytes[..valid_len]).unwrap()
+ }
+ })
+ }
+
+ fn saved(&self) -> usize {
+ self.stored
+ }
+}
+
+/// A CBOR segment
+///
+/// This type represents a single bytes or text segment on the wire. It can be
+/// read out in parsed chunks based on the size of the input scratch buffer.
+pub struct Segment<'r, R: Read, P: Parser> {
+ reader: &'r mut Decoder<R>,
+ unread: usize,
+ offset: usize,
+ parser: P,
+}
+
+impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
+ /// Gets the number of unprocessed bytes
+ #[inline]
+ pub fn left(&self) -> usize {
+ self.unread + self.parser.saved()
+ }
+
+ /// Gets the next parsed chunk within the segment
+ ///
+ /// Returns `Ok(None)` when all chunks have been read.
+ #[inline]
+ pub fn pull<'a>(
+ &mut self,
+ buffer: &'a mut [u8],
+ ) -> Result<Option<&'a P::Item>, Error<R::Error>> {
+ use core::cmp::min;
+
+ let prev = self.parser.saved();
+ match self.unread {
+ 0 if prev == 0 => return Ok(None),
+ 0 => return Err(Error::Syntax(self.offset)),
+ _ => (),
+ }
+
+ // Determine how many bytes to read.
+ let size = min(buffer.len(), prev + self.unread);
+ let full = &mut buffer[..size];
+ let next = &mut full[min(size, prev)..];
+
+ // Read additional bytes.
+ self.reader.read_exact(next)?;
+ self.unread -= next.len();
+
+ self.parser
+ .parse(full)
+ .or(Err(Error::Syntax(self.offset)))
+ .map(Some)
+ }
+}
+
+/// A sequence of CBOR segments
+///
+/// CBOR allows for bytes or text items to be segmented. This type represents
+/// the state of that segmented input stream.
+pub struct Segments<'r, R: Read, P: Parser> {
+ reader: &'r mut Decoder<R>,
+ finish: bool,
+ nested: usize,
+ parser: PhantomData<P>,
+ unwrap: fn(Header) -> Result<Option<usize>, ()>,
+}
+
+impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
+ #[inline]
+ pub(crate) fn new(
+ decoder: &'r mut Decoder<R>,
+ unwrap: fn(Header) -> Result<Option<usize>, ()>,
+ ) -> Self {
+ Self {
+ reader: decoder,
+ finish: false,
+ nested: 0,
+ parser: PhantomData,
+ unwrap,
+ }
+ }
+
+ /// Gets the next segment in the stream
+ ///
+ /// Returns `Ok(None)` at the conclusion of the stream.
+ #[inline]
+ pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
+ while !self.finish {
+ let offset = self.reader.offset();
+ match self.reader.pull()? {
+ Header::Break if self.nested == 1 => return Ok(None),
+ Header::Break if self.nested > 1 => self.nested -= 1,
+ header => match (self.unwrap)(header) {
+ Err(..) => return Err(Error::Syntax(offset)),
+ Ok(None) => self.nested += 1,
+ Ok(Some(len)) => {
+ self.finish = self.nested == 0;
+ return Ok(Some(Segment {
+ reader: self.reader,
+ unread: len,
+ offset,
+ parser: P::default(),
+ }));
+ }
+ },
+ }
+ }
+
+ Ok(None)
+ }
+}