arbitrary/
unstructured.rs

1// Copyright © 2019 The Rust Fuzz Project Developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Wrappers around raw, unstructured bytes.
10
11use crate::{Arbitrary, Error, Result};
12use std::marker::PhantomData;
13use std::ops::ControlFlow;
14use std::{mem, ops};
15
16/// A source of unstructured data.
17///
18/// An `Unstructured` helps `Arbitrary` implementations interpret raw data
19/// (typically provided by a fuzzer) as a "DNA string" that describes how to
20/// construct the `Arbitrary` type. The goal is that a small change to the "DNA
21/// string" (the raw data wrapped by an `Unstructured`) results in a small
22/// change to the generated `Arbitrary` instance. This helps a fuzzer
23/// efficiently explore the `Arbitrary`'s input space.
24///
25/// `Unstructured` is deterministic: given the same raw data, the same series of
26/// API calls will return the same results (modulo system resource constraints,
27/// like running out of memory). However, `Unstructured` does not guarantee
28/// anything beyond that: it makes not guarantee that it will yield bytes from
29/// the underlying data in any particular order.
30///
31/// You shouldn't generally need to use an `Unstructured` unless you are writing
32/// a custom `Arbitrary` implementation by hand, instead of deriving it. Mostly,
33/// you should just be passing it through to nested `Arbitrary::arbitrary`
34/// calls.
35///
36/// # Example
37///
38/// Imagine you were writing a color conversion crate. You might want to write
39/// fuzz tests that take a random RGB color and assert various properties, run
40/// functions and make sure nothing panics, etc.
41///
42/// Below is what translating the fuzzer's raw input into an `Unstructured` and
43/// using that to generate an arbitrary RGB color might look like:
44///
45/// ```
46/// # #[cfg(feature = "derive")] fn foo() {
47/// use arbitrary::{Arbitrary, Unstructured};
48///
49/// /// An RGB color.
50/// #[derive(Arbitrary)]
51/// pub struct Rgb {
52///     r: u8,
53///     g: u8,
54///     b: u8,
55/// }
56///
57/// // Get the raw bytes from the fuzzer.
58/// #   let get_input_from_fuzzer = || &[];
59/// let raw_data: &[u8] = get_input_from_fuzzer();
60///
61/// // Wrap it in an `Unstructured`.
62/// let mut unstructured = Unstructured::new(raw_data);
63///
64/// // Generate an `Rgb` color and run our checks.
65/// if let Ok(rgb) = Rgb::arbitrary(&mut unstructured) {
66/// #   let run_my_color_conversion_checks = |_| {};
67///     run_my_color_conversion_checks(rgb);
68/// }
69/// # }
70/// ```
71#[derive(Debug)]
72pub struct Unstructured<'a> {
73    data: &'a [u8],
74}
75
76impl<'a> Unstructured<'a> {
77    /// Create a new `Unstructured` from the given raw data.
78    ///
79    /// # Example
80    ///
81    /// ```
82    /// use arbitrary::Unstructured;
83    ///
84    /// let u = Unstructured::new(&[1, 2, 3, 4]);
85    /// ```
86    pub fn new(data: &'a [u8]) -> Self {
87        Unstructured { data }
88    }
89
90    /// Get the number of remaining bytes of underlying data that are still
91    /// available.
92    ///
93    /// # Example
94    ///
95    /// ```
96    /// use arbitrary::{Arbitrary, Unstructured};
97    ///
98    /// let mut u = Unstructured::new(&[1, 2, 3]);
99    ///
100    /// // Initially have three bytes of data.
101    /// assert_eq!(u.len(), 3);
102    ///
103    /// // Generating a `bool` consumes one byte from the underlying data, so
104    /// // we are left with two bytes afterwards.
105    /// let _ = bool::arbitrary(&mut u);
106    /// assert_eq!(u.len(), 2);
107    /// ```
108    #[inline]
109    pub fn len(&self) -> usize {
110        self.data.len()
111    }
112
113    /// Is the underlying unstructured data exhausted?
114    ///
115    /// `unstructured.is_empty()` is the same as `unstructured.len() == 0`.
116    ///
117    /// # Example
118    ///
119    /// ```
120    /// use arbitrary::{Arbitrary, Unstructured};
121    ///
122    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
123    ///
124    /// // Initially, we are not empty.
125    /// assert!(!u.is_empty());
126    ///
127    /// // Generating a `u32` consumes all four bytes of the underlying data, so
128    /// // we become empty afterwards.
129    /// let _ = u32::arbitrary(&mut u);
130    /// assert!(u.is_empty());
131    /// ```
132    #[inline]
133    pub fn is_empty(&self) -> bool {
134        self.len() == 0
135    }
136
137    /// Generate an arbitrary instance of `A`.
138    ///
139    /// This is simply a helper method that is equivalent to `<A as
140    /// Arbitrary>::arbitrary(self)`. This helper is a little bit more concise,
141    /// and can be used in situations where Rust's type inference will figure
142    /// out what `A` should be.
143    ///
144    /// # Example
145    ///
146    /// ```
147    /// # #[cfg(feature="derive")] fn foo() -> arbitrary::Result<()> {
148    /// use arbitrary::{Arbitrary, Unstructured};
149    ///
150    /// #[derive(Arbitrary)]
151    /// struct MyType {
152    ///     // ...
153    /// }
154    ///
155    /// fn do_stuff(value: MyType) {
156    /// #   let _ = value;
157    ///     // ...
158    /// }
159    ///
160    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
161    ///
162    /// // Rust's type inference can figure out that `value` should be of type
163    /// // `MyType` here:
164    /// let value = u.arbitrary()?;
165    /// do_stuff(value);
166    /// # Ok(()) }
167    /// ```
168    pub fn arbitrary<A>(&mut self) -> Result<A>
169    where
170        A: Arbitrary<'a>,
171    {
172        <A as Arbitrary<'a>>::arbitrary(self)
173    }
174
175    /// Get the number of elements to insert when building up a collection of
176    /// arbitrary `ElementType`s.
177    ///
178    /// This uses the [`<ElementType as
179    /// Arbitrary>::size_hint`][crate::Arbitrary::size_hint] method to smartly
180    /// choose a length such that we most likely have enough underlying bytes to
181    /// construct that many arbitrary `ElementType`s.
182    ///
183    /// This should only be called within an `Arbitrary` implementation.
184    ///
185    /// # Example
186    ///
187    /// ```
188    /// use arbitrary::{Arbitrary, Result, Unstructured};
189    /// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
190    /// # impl<T> MyCollection<T> {
191    /// #     pub fn with_capacity(capacity: usize) -> Self { MyCollection { _t: std::marker::PhantomData } }
192    /// #     pub fn insert(&mut self, element: T) {}
193    /// # }
194    ///
195    /// impl<'a, T> Arbitrary<'a> for MyCollection<T>
196    /// where
197    ///     T: Arbitrary<'a>,
198    /// {
199    ///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
200    ///         // Get the number of `T`s we should insert into our collection.
201    ///         let len = u.arbitrary_len::<T>()?;
202    ///
203    ///         // And then create a collection of that length!
204    ///         let mut my_collection = MyCollection::with_capacity(len);
205    ///         for _ in 0..len {
206    ///             let element = T::arbitrary(u)?;
207    ///             my_collection.insert(element);
208    ///         }
209    ///
210    ///         Ok(my_collection)
211    ///     }
212    /// }
213    /// ```
214    pub fn arbitrary_len<ElementType>(&mut self) -> Result<usize>
215    where
216        ElementType: Arbitrary<'a>,
217    {
218        let byte_size = self.arbitrary_byte_size()?;
219        let (lower, upper) = <ElementType as Arbitrary>::size_hint(0);
220        let elem_size = upper.unwrap_or(lower * 2);
221        let elem_size = std::cmp::max(1, elem_size);
222        Ok(byte_size / elem_size)
223    }
224
225    fn arbitrary_byte_size(&mut self) -> Result<usize> {
226        if self.data.is_empty() {
227            Ok(0)
228        } else if self.data.len() == 1 {
229            self.data = &[];
230            Ok(0)
231        } else {
232            // Take lengths from the end of the data, since the `libFuzzer` folks
233            // found that this lets fuzzers more efficiently explore the input
234            // space.
235            //
236            // https://github.com/rust-fuzz/libfuzzer-sys/blob/0c450753/libfuzzer/utils/FuzzedDataProvider.h#L92-L97
237
238            // We only consume as many bytes as necessary to cover the entire
239            // range of the byte string.
240            // Note: We cast to u64 so we don't overflow when checking u32::MAX + 4 on 32-bit archs
241            let len = if self.data.len() as u64 <= u8::MAX as u64 + 1 {
242                let bytes = 1;
243                let max_size = self.data.len() - bytes;
244                let (rest, for_size) = self.data.split_at(max_size);
245                self.data = rest;
246                Self::int_in_range_impl(0..=max_size as u8, for_size.iter().copied())?.0 as usize
247            } else if self.data.len() as u64 <= u16::MAX as u64 + 2 {
248                let bytes = 2;
249                let max_size = self.data.len() - bytes;
250                let (rest, for_size) = self.data.split_at(max_size);
251                self.data = rest;
252                Self::int_in_range_impl(0..=max_size as u16, for_size.iter().copied())?.0 as usize
253            } else if self.data.len() as u64 <= u32::MAX as u64 + 4 {
254                let bytes = 4;
255                let max_size = self.data.len() - bytes;
256                let (rest, for_size) = self.data.split_at(max_size);
257                self.data = rest;
258                Self::int_in_range_impl(0..=max_size as u32, for_size.iter().copied())?.0 as usize
259            } else {
260                let bytes = 8;
261                let max_size = self.data.len() - bytes;
262                let (rest, for_size) = self.data.split_at(max_size);
263                self.data = rest;
264                Self::int_in_range_impl(0..=max_size as u64, for_size.iter().copied())?.0 as usize
265            };
266
267            Ok(len)
268        }
269    }
270
271    /// Generate an integer within the given range.
272    ///
273    /// Do not use this to generate the size of a collection. Use
274    /// `arbitrary_len` instead.
275    ///
276    /// The probability distribution of the return value is not necessarily uniform.
277    ///
278    /// Returns `range.start()`, not an error,
279    /// if this `Unstructured` [is empty][Unstructured::is_empty].
280    ///
281    /// # Panics
282    ///
283    /// Panics if `range.start > range.end`. That is, the given range must be
284    /// non-empty.
285    ///
286    /// # Example
287    ///
288    /// ```
289    /// # fn foo() -> arbitrary::Result<()> {
290    /// use arbitrary::{Arbitrary, Unstructured};
291    ///
292    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
293    ///
294    /// let x: i32 = u.int_in_range(-5_000..=-1_000)?;
295    ///
296    /// assert!(-5_000 <= x);
297    /// assert!(x <= -1_000);
298    /// # Ok(()) }
299    /// ```
300    pub fn int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T>
301    where
302        T: Int,
303    {
304        let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?;
305        self.data = &self.data[bytes_consumed..];
306        Ok(result)
307    }
308
309    fn int_in_range_impl<T>(
310        range: ops::RangeInclusive<T>,
311        mut bytes: impl Iterator<Item = u8>,
312    ) -> Result<(T, usize)>
313    where
314        T: Int,
315    {
316        let start = *range.start();
317        let end = *range.end();
318        assert!(
319            start <= end,
320            "`arbitrary::Unstructured::int_in_range` requires a non-empty range"
321        );
322
323        // When there is only one possible choice, don't waste any entropy from
324        // the underlying data.
325        if start == end {
326            return Ok((start, 0));
327        }
328
329        // From here on out we work with the unsigned representation. All of the
330        // operations performed below work out just as well whether or not `T`
331        // is a signed or unsigned integer.
332        let start = start.to_unsigned();
333        let end = end.to_unsigned();
334
335        let delta = end.wrapping_sub(start);
336        debug_assert_ne!(delta, T::Unsigned::ZERO);
337
338        // Compute an arbitrary integer offset from the start of the range. We
339        // do this by consuming `size_of(T)` bytes from the input to create an
340        // arbitrary integer and then clamping that int into our range bounds
341        // with a modulo operation.
342        let mut arbitrary_int = T::Unsigned::ZERO;
343        let mut bytes_consumed: usize = 0;
344
345        while (bytes_consumed < mem::size_of::<T>())
346            && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO
347        {
348            let byte = match bytes.next() {
349                None => break,
350                Some(b) => b,
351            };
352            bytes_consumed += 1;
353
354            // Combine this byte into our arbitrary integer, but avoid
355            // overflowing the shift for `u8` and `i8`.
356            arbitrary_int = if mem::size_of::<T>() == 1 {
357                T::Unsigned::from_u8(byte)
358            } else {
359                (arbitrary_int << 8) | T::Unsigned::from_u8(byte)
360            };
361        }
362
363        let offset = if delta == T::Unsigned::MAX {
364            arbitrary_int
365        } else {
366            arbitrary_int % (delta.checked_add(T::Unsigned::ONE).unwrap())
367        };
368
369        // Finally, we add `start` to our offset from `start` to get the result
370        // actual value within the range.
371        let result = start.wrapping_add(offset);
372
373        // And convert back to our maybe-signed representation.
374        let result = T::from_unsigned(result);
375        debug_assert!(*range.start() <= result);
376        debug_assert!(result <= *range.end());
377
378        Ok((result, bytes_consumed))
379    }
380
381    /// Choose one of the given choices.
382    ///
383    /// This should only be used inside of `Arbitrary` implementations.
384    ///
385    /// The probability distribution of choices is not necessarily uniform.
386    ///
387    /// Returns the first choice, not an error,
388    /// if this `Unstructured` [is empty][Unstructured::is_empty].
389    ///
390    /// Returns an error if no choices are provided.
391    ///
392    /// # Examples
393    ///
394    /// Selecting from an array of choices:
395    ///
396    /// ```
397    /// use arbitrary::Unstructured;
398    ///
399    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
400    /// let choices = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
401    ///
402    /// let choice = u.choose(&choices).unwrap();
403    ///
404    /// println!("chose {}", choice);
405    /// ```
406    ///
407    /// An error is returned if no choices are provided:
408    ///
409    /// ```
410    /// use arbitrary::Unstructured;
411    ///
412    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
413    /// let choices: [char; 0] = [];
414    ///
415    /// let result = u.choose(&choices);
416    ///
417    /// assert!(result.is_err());
418    /// ```
419    pub fn choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T> {
420        let idx = self.choose_index(choices.len())?;
421        Ok(&choices[idx])
422    }
423
424    /// Choose one of the given iterator choices.
425    ///
426    /// This should only be used inside of `Arbitrary` implementations.
427    ///
428    /// The probability distribution of choices is not necessarily uniform.
429    ///
430    /// Returns the first choice, not an error,
431    /// if this `Unstructured` [is empty][Unstructured::is_empty].
432    ///
433    /// Returns an error if no choices are provided.
434    ///
435    /// # Examples
436    ///
437    /// Selecting a random item from a set:
438    ///
439    /// ```
440    /// use std::collections::BTreeSet;
441    /// use arbitrary::Unstructured;
442    ///
443    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
444    /// let set = BTreeSet::from(['a', 'b', 'c']);
445    ///
446    /// let choice = u.choose_iter(set.iter()).unwrap();
447    ///
448    /// println!("chose {}", choice);
449    /// ```
450    pub fn choose_iter<T, I>(&mut self, choices: I) -> Result<T>
451    where
452        I: IntoIterator<Item = T>,
453        I::IntoIter: ExactSizeIterator,
454    {
455        let mut choices = choices.into_iter();
456        let idx = self.choose_index(choices.len())?;
457        let choice = choices
458            .nth(idx)
459            .expect("ExactSizeIterator should have correct len");
460        Ok(choice)
461    }
462
463    /// Choose a value in `0..len`.
464    ///
465    /// The probability distribution of return values is not necessarily uniform.
466    ///
467    /// Returns zero, not an error, if this `Unstructured` [is empty][Unstructured::is_empty].
468    ///
469    /// Returns an error if the `len` is zero.
470    ///
471    /// # Examples
472    ///
473    /// Using Fisher–Yates shuffle shuffle to generate an arbitrary permutation.
474    ///
475    /// [Fisher–Yates shuffle]: https://en.wikipedia.org/wiki/Fisher–Yates_shuffle
476    ///
477    /// ```
478    /// use arbitrary::Unstructured;
479    ///
480    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
481    /// let mut permutation = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
482    /// let mut to_permute = &mut permutation[..];
483    /// while to_permute.len() > 1 {
484    ///     let idx = u.choose_index(to_permute.len()).unwrap();
485    ///     to_permute.swap(0, idx);
486    ///     to_permute = &mut to_permute[1..];
487    /// }
488    ///
489    /// println!("permutation: {:?}", permutation);
490    /// ```
491    ///
492    /// An error is returned if the length is zero:
493    ///
494    /// ```
495    /// use arbitrary::Unstructured;
496    ///
497    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
498    /// let array: [i32; 0] = [];
499    ///
500    /// let result = u.choose_index(array.len());
501    ///
502    /// assert!(result.is_err());
503    /// ```
504    pub fn choose_index(&mut self, len: usize) -> Result<usize> {
505        if len == 0 {
506            return Err(Error::EmptyChoose);
507        }
508        let idx = self.int_in_range(0..=len - 1)?;
509        Ok(idx)
510    }
511
512    /// Generate a boolean which is true with probability approximately the given ratio.
513    ///
514    /// Returns true, not an error, if this `Unstructured` [is empty][Unstructured::is_empty].
515    ///
516    /// # Panics
517    ///
518    /// Panics when the numerator and denominator do not meet these constraints:
519    ///
520    /// * `0 < numerator <= denominator`
521    ///
522    /// # Example
523    ///
524    /// Generate a boolean that is `true` five sevenths of the time:
525    ///
526    /// ```
527    /// # fn foo() -> arbitrary::Result<()> {
528    /// use arbitrary::Unstructured;
529    ///
530    /// # let my_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
531    /// let mut u = Unstructured::new(&my_data);
532    ///
533    /// if u.ratio(5, 7)? {
534    ///     // Take this branch approximately 5/7 of the time.
535    /// }
536    /// # Ok(())
537    /// # }
538    /// ```
539    pub fn ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool>
540    where
541        T: Int,
542    {
543        assert!(T::ZERO < numerator);
544        assert!(numerator <= denominator);
545        let x = self.int_in_range(T::ONE..=denominator)?;
546        Ok(x <= numerator)
547    }
548
549    /// Fill a `buffer` with bytes from the underlying raw data.
550    ///
551    /// This should only be called within an `Arbitrary` implementation. This is
552    /// a very low-level operation. You should generally prefer calling nested
553    /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
554    /// `String::arbitrary` over using this method directly.
555    ///
556    /// If this `Unstructured` does not have enough underlying data to fill the
557    /// whole `buffer`, it pads the buffer out with zeros.
558    ///
559    /// # Example
560    ///
561    /// ```
562    /// use arbitrary::Unstructured;
563    ///
564    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
565    ///
566    /// let mut buf = [0; 2];
567    ///
568    /// assert!(u.fill_buffer(&mut buf).is_ok());
569    /// assert_eq!(buf, [1, 2]);
570    ///
571    /// assert!(u.fill_buffer(&mut buf).is_ok());
572    /// assert_eq!(buf, [3, 4]);
573    ///
574    /// assert!(u.fill_buffer(&mut buf).is_ok());
575    /// assert_eq!(buf, [0, 0]);
576    /// ```
577    pub fn fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()> {
578        let n = std::cmp::min(buffer.len(), self.data.len());
579        buffer[..n].copy_from_slice(&self.data[..n]);
580        for byte in buffer[n..].iter_mut() {
581            *byte = 0;
582        }
583        self.data = &self.data[n..];
584        Ok(())
585    }
586
587    /// Provide `size` bytes from the underlying raw data.
588    ///
589    /// This should only be called within an `Arbitrary` implementation. This is
590    /// a very low-level operation. You should generally prefer calling nested
591    /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
592    /// `String::arbitrary` over using this method directly.
593    ///
594    /// # Example
595    ///
596    /// ```
597    /// use arbitrary::Unstructured;
598    ///
599    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
600    ///
601    /// assert!(u.bytes(2).unwrap() == &[1, 2]);
602    /// assert!(u.bytes(2).unwrap() == &[3, 4]);
603    /// ```
604    pub fn bytes(&mut self, size: usize) -> Result<&'a [u8]> {
605        if self.data.len() < size {
606            return Err(Error::NotEnoughData);
607        }
608
609        let (for_buf, rest) = self.data.split_at(size);
610        self.data = rest;
611        Ok(for_buf)
612    }
613
614    /// Peek at `size` number of bytes of the underlying raw input.
615    ///
616    /// Does not consume the bytes, only peeks at them.
617    ///
618    /// Returns `None` if there are not `size` bytes left in the underlying raw
619    /// input.
620    ///
621    /// # Example
622    ///
623    /// ```
624    /// use arbitrary::Unstructured;
625    ///
626    /// let u = Unstructured::new(&[1, 2, 3]);
627    ///
628    /// assert_eq!(u.peek_bytes(0).unwrap(), []);
629    /// assert_eq!(u.peek_bytes(1).unwrap(), [1]);
630    /// assert_eq!(u.peek_bytes(2).unwrap(), [1, 2]);
631    /// assert_eq!(u.peek_bytes(3).unwrap(), [1, 2, 3]);
632    ///
633    /// assert!(u.peek_bytes(4).is_none());
634    /// ```
635    pub fn peek_bytes(&self, size: usize) -> Option<&'a [u8]> {
636        self.data.get(..size)
637    }
638
639    /// Consume all of the rest of the remaining underlying bytes.
640    ///
641    /// Returns a slice of all the remaining, unconsumed bytes.
642    ///
643    /// # Example
644    ///
645    /// ```
646    /// use arbitrary::Unstructured;
647    ///
648    /// let mut u = Unstructured::new(&[1, 2, 3]);
649    ///
650    /// let mut remaining = u.take_rest();
651    ///
652    /// assert_eq!(remaining, [1, 2, 3]);
653    /// ```
654    pub fn take_rest(mut self) -> &'a [u8] {
655        mem::take(&mut self.data)
656    }
657
658    /// Provide an iterator over elements for constructing a collection
659    ///
660    /// This is useful for implementing [`Arbitrary::arbitrary`] on collections
661    /// since the implementation is simply `u.arbitrary_iter()?.collect()`
662    pub fn arbitrary_iter<'b, ElementType: Arbitrary<'a>>(
663        &'b mut self,
664    ) -> Result<ArbitraryIter<'a, 'b, ElementType>> {
665        Ok(ArbitraryIter {
666            u: &mut *self,
667            _marker: PhantomData,
668        })
669    }
670
671    /// Provide an iterator over elements for constructing a collection from
672    /// all the remaining bytes.
673    ///
674    /// This is useful for implementing [`Arbitrary::arbitrary_take_rest`] on collections
675    /// since the implementation is simply `u.arbitrary_take_rest_iter()?.collect()`
676    pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>(
677        self,
678    ) -> Result<ArbitraryTakeRestIter<'a, ElementType>> {
679        Ok(ArbitraryTakeRestIter {
680            u: self,
681            _marker: PhantomData,
682        })
683    }
684
685    /// Call the given function an arbitrary number of times.
686    ///
687    /// The function is given this `Unstructured` so that it can continue to
688    /// generate arbitrary data and structures.
689    ///
690    /// You may optionaly specify minimum and maximum bounds on the number of
691    /// times the function is called.
692    ///
693    /// You may break out of the loop early by returning
694    /// `Ok(std::ops::ControlFlow::Break)`. To continue the loop, return
695    /// `Ok(std::ops::ControlFlow::Continue)`.
696    ///
697    /// # Panics
698    ///
699    /// Panics if `min > max`.
700    ///
701    /// # Example
702    ///
703    /// Call a closure that generates an arbitrary type inside a context an
704    /// arbitrary number of times:
705    ///
706    /// ```
707    /// use arbitrary::{Result, Unstructured};
708    /// use std::ops::ControlFlow;
709    ///
710    /// enum Type {
711    ///     /// A boolean type.
712    ///     Bool,
713    ///
714    ///     /// An integer type.
715    ///     Int,
716    ///
717    ///     /// A list of the `i`th type in this type's context.
718    ///     List(usize),
719    /// }
720    ///
721    /// fn arbitrary_types_context(u: &mut Unstructured) -> Result<Vec<Type>> {
722    ///     let mut context = vec![];
723    ///
724    ///     u.arbitrary_loop(Some(10), Some(20), |u| {
725    ///         let num_choices = if context.is_empty() {
726    ///             2
727    ///         } else {
728    ///             3
729    ///         };
730    ///         let ty = match u.int_in_range::<u8>(1..=num_choices)? {
731    ///             1 => Type::Bool,
732    ///             2 => Type::Int,
733    ///             3 => Type::List(u.int_in_range(0..=context.len() - 1)?),
734    ///             _ => unreachable!(),
735    ///         };
736    ///         context.push(ty);
737    ///         Ok(ControlFlow::Continue(()))
738    ///     })?;
739    ///
740    ///     // The number of loop iterations are constrained by the min/max
741    ///     // bounds that we provided.
742    ///     assert!(context.len() >= 10);
743    ///     assert!(context.len() <= 20);
744    ///
745    ///     Ok(context)
746    /// }
747    /// ```
748    pub fn arbitrary_loop(
749        &mut self,
750        min: Option<u32>,
751        max: Option<u32>,
752        mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>,
753    ) -> Result<()> {
754        let min = min.unwrap_or(0);
755        let max = max.unwrap_or(u32::MAX);
756
757        for _ in 0..self.int_in_range(min..=max)? {
758            match f(self)? {
759                ControlFlow::Continue(_) => continue,
760                ControlFlow::Break(_) => break,
761            }
762        }
763
764        Ok(())
765    }
766}
767
768/// Utility iterator produced by [`Unstructured::arbitrary_iter`]
769pub struct ArbitraryIter<'a, 'b, ElementType> {
770    u: &'b mut Unstructured<'a>,
771    _marker: PhantomData<ElementType>,
772}
773
774impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, '_, ElementType> {
775    type Item = Result<ElementType>;
776    fn next(&mut self) -> Option<Result<ElementType>> {
777        let keep_going = self.u.arbitrary().unwrap_or(false);
778        if keep_going {
779            Some(Arbitrary::arbitrary(self.u))
780        } else {
781            None
782        }
783    }
784}
785
786/// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`]
787pub struct ArbitraryTakeRestIter<'a, ElementType> {
788    u: Unstructured<'a>,
789    _marker: PhantomData<ElementType>,
790}
791
792impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> {
793    type Item = Result<ElementType>;
794    fn next(&mut self) -> Option<Result<ElementType>> {
795        let keep_going = self.u.arbitrary().unwrap_or(false);
796        if keep_going {
797            Some(Arbitrary::arbitrary(&mut self.u))
798        } else {
799            None
800        }
801    }
802}
803
804/// A trait that is implemented for all of the primitive integers:
805///
806/// * `u8`
807/// * `u16`
808/// * `u32`
809/// * `u64`
810/// * `u128`
811/// * `usize`
812/// * `i8`
813/// * `i16`
814/// * `i32`
815/// * `i64`
816/// * `i128`
817/// * `isize`
818///
819/// Don't implement this trait yourself.
820pub trait Int:
821    Copy
822    + std::fmt::Debug
823    + PartialOrd
824    + Ord
825    + ops::Sub<Self, Output = Self>
826    + ops::Rem<Self, Output = Self>
827    + ops::Shr<Self, Output = Self>
828    + ops::Shl<usize, Output = Self>
829    + ops::BitOr<Self, Output = Self>
830{
831    #[doc(hidden)]
832    type Unsigned: Int;
833
834    #[doc(hidden)]
835    const ZERO: Self;
836
837    #[doc(hidden)]
838    const ONE: Self;
839
840    #[doc(hidden)]
841    const MAX: Self;
842
843    #[doc(hidden)]
844    fn from_u8(b: u8) -> Self;
845
846    #[doc(hidden)]
847    fn from_usize(u: usize) -> Self;
848
849    #[doc(hidden)]
850    fn checked_add(self, rhs: Self) -> Option<Self>;
851
852    #[doc(hidden)]
853    fn wrapping_add(self, rhs: Self) -> Self;
854
855    #[doc(hidden)]
856    fn wrapping_sub(self, rhs: Self) -> Self;
857
858    #[doc(hidden)]
859    fn to_unsigned(self) -> Self::Unsigned;
860
861    #[doc(hidden)]
862    fn from_unsigned(unsigned: Self::Unsigned) -> Self;
863}
864
865macro_rules! impl_int {
866    ( $( $ty:ty : $unsigned_ty: ty ; )* ) => {
867        $(
868            impl Int for $ty {
869                type Unsigned = $unsigned_ty;
870
871                const ZERO: Self = 0;
872
873                const ONE: Self = 1;
874
875                const MAX: Self = Self::MAX;
876
877                fn from_u8(b: u8) -> Self {
878                    b as Self
879                }
880
881                fn from_usize(u: usize) -> Self {
882                    u as Self
883                }
884
885                fn checked_add(self, rhs: Self) -> Option<Self> {
886                    <$ty>::checked_add(self, rhs)
887                }
888
889                fn wrapping_add(self, rhs: Self) -> Self {
890                    <$ty>::wrapping_add(self, rhs)
891                }
892
893                fn wrapping_sub(self, rhs: Self) -> Self {
894                    <$ty>::wrapping_sub(self, rhs)
895                }
896
897                fn to_unsigned(self) -> Self::Unsigned {
898                    self as $unsigned_ty
899                }
900
901                fn from_unsigned(unsigned: $unsigned_ty) -> Self {
902                    unsigned as Self
903                }
904            }
905        )*
906    }
907}
908
909impl_int! {
910    u8: u8;
911    u16: u16;
912    u32: u32;
913    u64: u64;
914    u128: u128;
915    usize: usize;
916    i8: u8;
917    i16: u16;
918    i32: u32;
919    i64: u64;
920    i128: u128;
921    isize: usize;
922}
923
924#[cfg(test)]
925mod tests {
926    use super::*;
927
928    #[test]
929    fn test_byte_size() {
930        let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
931        // Should take one byte off the end
932        assert_eq!(u.arbitrary_byte_size().unwrap(), 6);
933        assert_eq!(u.len(), 9);
934        let mut v = vec![0; 260];
935        v.push(1);
936        v.push(4);
937        let mut u = Unstructured::new(&v);
938        // Should read two bytes off the end
939        assert_eq!(u.arbitrary_byte_size().unwrap(), 0x104);
940        assert_eq!(u.len(), 260);
941    }
942
943    #[test]
944    fn int_in_range_of_one() {
945        let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
946        let x = u.int_in_range(0..=0).unwrap();
947        assert_eq!(x, 0);
948        let choice = *u.choose(&[42]).unwrap();
949        assert_eq!(choice, 42)
950    }
951
952    #[test]
953    fn int_in_range_uses_minimal_amount_of_bytes() {
954        let mut u = Unstructured::new(&[1, 2]);
955        assert_eq!(1, u.int_in_range::<u8>(0..=u8::MAX).unwrap());
956        assert_eq!(u.len(), 1);
957
958        let mut u = Unstructured::new(&[1, 2]);
959        assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32).unwrap());
960        assert_eq!(u.len(), 1);
961
962        let mut u = Unstructured::new(&[1]);
963        assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32 + 1).unwrap());
964        assert!(u.is_empty());
965    }
966
967    #[test]
968    fn int_in_range_in_bounds() {
969        for input in u8::MIN..=u8::MAX {
970            let input = [input];
971
972            let mut u = Unstructured::new(&input);
973            let x = u.int_in_range(1..=u8::MAX).unwrap();
974            assert_ne!(x, 0);
975
976            let mut u = Unstructured::new(&input);
977            let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
978            assert_ne!(x, u8::MAX);
979        }
980    }
981
982    #[test]
983    fn int_in_range_covers_unsigned_range() {
984        // Test that we generate all values within the range given to
985        // `int_in_range`.
986
987        let mut full = [false; u8::MAX as usize + 1];
988        let mut no_zero = [false; u8::MAX as usize];
989        let mut no_max = [false; u8::MAX as usize];
990        let mut narrow = [false; 10];
991
992        for input in u8::MIN..=u8::MAX {
993            let input = [input];
994
995            let mut u = Unstructured::new(&input);
996            let x = u.int_in_range(0..=u8::MAX).unwrap();
997            full[x as usize] = true;
998
999            let mut u = Unstructured::new(&input);
1000            let x = u.int_in_range(1..=u8::MAX).unwrap();
1001            no_zero[x as usize - 1] = true;
1002
1003            let mut u = Unstructured::new(&input);
1004            let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
1005            no_max[x as usize] = true;
1006
1007            let mut u = Unstructured::new(&input);
1008            let x = u.int_in_range(100..=109).unwrap();
1009            narrow[x as usize - 100] = true;
1010        }
1011
1012        for (i, covered) in full.iter().enumerate() {
1013            assert!(covered, "full[{}] should have been generated", i);
1014        }
1015        for (i, covered) in no_zero.iter().enumerate() {
1016            assert!(covered, "no_zero[{}] should have been generated", i);
1017        }
1018        for (i, covered) in no_max.iter().enumerate() {
1019            assert!(covered, "no_max[{}] should have been generated", i);
1020        }
1021        for (i, covered) in narrow.iter().enumerate() {
1022            assert!(covered, "narrow[{}] should have been generated", i);
1023        }
1024    }
1025
1026    #[test]
1027    fn int_in_range_covers_signed_range() {
1028        // Test that we generate all values within the range given to
1029        // `int_in_range`.
1030
1031        let mut full = [false; u8::MAX as usize + 1];
1032        let mut no_min = [false; u8::MAX as usize];
1033        let mut no_max = [false; u8::MAX as usize];
1034        let mut narrow = [false; 21];
1035
1036        let abs_i8_min: isize = 128;
1037
1038        for input in 0..=u8::MAX {
1039            let input = [input];
1040
1041            let mut u = Unstructured::new(&input);
1042            let x = u.int_in_range(i8::MIN..=i8::MAX).unwrap();
1043            full[(x as isize + abs_i8_min) as usize] = true;
1044
1045            let mut u = Unstructured::new(&input);
1046            let x = u.int_in_range(i8::MIN + 1..=i8::MAX).unwrap();
1047            no_min[(x as isize + abs_i8_min - 1) as usize] = true;
1048
1049            let mut u = Unstructured::new(&input);
1050            let x = u.int_in_range(i8::MIN..=i8::MAX - 1).unwrap();
1051            no_max[(x as isize + abs_i8_min) as usize] = true;
1052
1053            let mut u = Unstructured::new(&input);
1054            let x = u.int_in_range(-10..=10).unwrap();
1055            narrow[(x as isize + 10) as usize] = true;
1056        }
1057
1058        for (i, covered) in full.iter().enumerate() {
1059            assert!(covered, "full[{}] should have been generated", i);
1060        }
1061        for (i, covered) in no_min.iter().enumerate() {
1062            assert!(covered, "no_min[{}] should have been generated", i);
1063        }
1064        for (i, covered) in no_max.iter().enumerate() {
1065            assert!(covered, "no_max[{}] should have been generated", i);
1066        }
1067        for (i, covered) in narrow.iter().enumerate() {
1068            assert!(covered, "narrow[{}] should have been generated", i);
1069        }
1070    }
1071}