ciborium_ll/seg.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
use super::*;
use ciborium_io::Read;
use core::marker::PhantomData;
/// A parser for incoming segments
pub trait Parser: Default {
/// The type of item that is parsed
type Item: ?Sized;
/// The parsing error that may occur
type Error;
/// The main parsing function
///
/// This function processes the incoming bytes and returns the item.
///
/// One important detail that **MUST NOT** be overlooked is that the
/// parser may save data from a previous parsing attempt. The number of
/// bytes saved is indicated by the `Parser::saved()` function. The saved
/// bytes will be copied into the beginning of the `bytes` array before
/// processing. Therefore, two requirements should be met.
///
/// First, the incoming byte slice should be larger than the saved bytes.
///
/// Second, the incoming byte slice should contain new bytes only after
/// the saved byte prefix.
///
/// If both criteria are met, this allows the parser to prepend its saved
/// bytes without any additional allocation.
fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a Self::Item, Self::Error>;
/// Indicates the number of saved bytes in the parser
fn saved(&self) -> usize {
0
}
}
/// A bytes parser
///
/// No actual processing is performed and the input bytes are directly
/// returned. This implies that this parser never saves any bytes internally.
#[derive(Default)]
pub struct Bytes(());
impl Parser for Bytes {
type Item = [u8];
type Error = core::convert::Infallible;
fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a [u8], Self::Error> {
Ok(bytes)
}
}
/// A text parser
///
/// This parser converts the input bytes to a `str`. This parser preserves
/// trailing invalid UTF-8 sequences in the case that chunking fell in the
/// middle of a valid UTF-8 character.
#[derive(Default)]
pub struct Text {
stored: usize,
buffer: [u8; 3],
}
impl Parser for Text {
type Item = str;
type Error = core::str::Utf8Error;
fn parse<'a>(&mut self, bytes: &'a mut [u8]) -> Result<&'a str, Self::Error> {
// If we cannot advance, return nothing.
if bytes.len() <= self.stored {
return Ok("");
}
// Copy previously invalid data into place.
bytes[..self.stored].clone_from_slice(&self.buffer[..self.stored]);
Ok(match core::str::from_utf8(bytes) {
Ok(s) => {
self.stored = 0;
s
}
Err(e) => {
let valid_len = e.valid_up_to();
let invalid_len = bytes.len() - valid_len;
// If the size of the invalid UTF-8 is large enough to hold
// all valid UTF-8 characters, we have a syntax error.
if invalid_len > self.buffer.len() {
return Err(e);
}
// Otherwise, store the invalid bytes for the next read cycle.
self.buffer[..invalid_len].clone_from_slice(&bytes[valid_len..]);
self.stored = invalid_len;
// Decode the valid part of the string.
core::str::from_utf8(&bytes[..valid_len]).unwrap()
}
})
}
fn saved(&self) -> usize {
self.stored
}
}
/// A CBOR segment
///
/// This type represents a single bytes or text segment on the wire. It can be
/// read out in parsed chunks based on the size of the input scratch buffer.
pub struct Segment<'r, R: Read, P: Parser> {
reader: &'r mut Decoder<R>,
unread: usize,
offset: usize,
parser: P,
}
impl<'r, R: Read, P: Parser> Segment<'r, R, P> {
/// Gets the number of unprocessed bytes
#[inline]
pub fn left(&self) -> usize {
self.unread + self.parser.saved()
}
/// Gets the next parsed chunk within the segment
///
/// Returns `Ok(None)` when all chunks have been read.
#[inline]
pub fn pull<'a>(
&mut self,
buffer: &'a mut [u8],
) -> Result<Option<&'a P::Item>, Error<R::Error>> {
use core::cmp::min;
let prev = self.parser.saved();
match self.unread {
0 if prev == 0 => return Ok(None),
0 => return Err(Error::Syntax(self.offset)),
_ => (),
}
// Determine how many bytes to read.
let size = min(buffer.len(), prev + self.unread);
let full = &mut buffer[..size];
let next = &mut full[min(size, prev)..];
// Read additional bytes.
self.reader.read_exact(next)?;
self.unread -= next.len();
self.parser
.parse(full)
.or(Err(Error::Syntax(self.offset)))
.map(Some)
}
}
/// A sequence of CBOR segments
///
/// CBOR allows for bytes or text items to be segmented. This type represents
/// the state of that segmented input stream.
pub struct Segments<'r, R: Read, P: Parser> {
reader: &'r mut Decoder<R>,
finish: bool,
nested: usize,
parser: PhantomData<P>,
unwrap: fn(Header) -> Result<Option<usize>, ()>,
}
impl<'r, R: Read, P: Parser> Segments<'r, R, P> {
#[inline]
pub(crate) fn new(
decoder: &'r mut Decoder<R>,
unwrap: fn(Header) -> Result<Option<usize>, ()>,
) -> Self {
Self {
reader: decoder,
finish: false,
nested: 0,
parser: PhantomData,
unwrap,
}
}
/// Gets the next segment in the stream
///
/// Returns `Ok(None)` at the conclusion of the stream.
#[inline]
pub fn pull(&mut self) -> Result<Option<Segment<R, P>>, Error<R::Error>> {
while !self.finish {
let offset = self.reader.offset();
match self.reader.pull()? {
Header::Break if self.nested == 1 => return Ok(None),
Header::Break if self.nested > 1 => self.nested -= 1,
header => match (self.unwrap)(header) {
Err(..) => return Err(Error::Syntax(offset)),
Ok(None) => self.nested += 1,
Ok(Some(len)) => {
self.finish = self.nested == 0;
return Ok(Some(Segment {
reader: self.reader,
unread: len,
offset,
parser: P::default(),
}));
}
},
}
}
Ok(None)
}
}