arbitrary/
unstructured.rs

1// Copyright © 2019 The Rust Fuzz Project Developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Wrappers around raw, unstructured bytes.
10
11use crate::{Arbitrary, Error, Result};
12use std::marker::PhantomData;
13use std::ops::ControlFlow;
14use std::{mem, ops};
15
16/// A source of unstructured data.
17///
18/// An `Unstructured` helps `Arbitrary` implementations interpret raw data
19/// (typically provided by a fuzzer) as a "DNA string" that describes how to
20/// construct the `Arbitrary` type. The goal is that a small change to the "DNA
21/// string" (the raw data wrapped by an `Unstructured`) results in a small
22/// change to the generated `Arbitrary` instance. This helps a fuzzer
23/// efficiently explore the `Arbitrary`'s input space.
24///
25/// `Unstructured` is deterministic: given the same raw data, the same series of
26/// API calls will return the same results (modulo system resource constraints,
27/// like running out of memory). However, `Unstructured` does not guarantee
28/// anything beyond that: it makes not guarantee that it will yield bytes from
29/// the underlying data in any particular order.
30///
31/// You shouldn't generally need to use an `Unstructured` unless you are writing
32/// a custom `Arbitrary` implementation by hand, instead of deriving it. Mostly,
33/// you should just be passing it through to nested `Arbitrary::arbitrary`
34/// calls.
35///
36/// # Example
37///
38/// Imagine you were writing a color conversion crate. You might want to write
39/// fuzz tests that take a random RGB color and assert various properties, run
40/// functions and make sure nothing panics, etc.
41///
42/// Below is what translating the fuzzer's raw input into an `Unstructured` and
43/// using that to generate an arbitrary RGB color might look like:
44///
45/// ```
46/// # #[cfg(feature = "derive")] fn foo() {
47/// use arbitrary::{Arbitrary, Unstructured};
48///
49/// /// An RGB color.
50/// #[derive(Arbitrary)]
51/// pub struct Rgb {
52///     r: u8,
53///     g: u8,
54///     b: u8,
55/// }
56///
57/// // Get the raw bytes from the fuzzer.
58/// #   let get_input_from_fuzzer = || &[];
59/// let raw_data: &[u8] = get_input_from_fuzzer();
60///
61/// // Wrap it in an `Unstructured`.
62/// let mut unstructured = Unstructured::new(raw_data);
63///
64/// // Generate an `Rgb` color and run our checks.
65/// if let Ok(rgb) = Rgb::arbitrary(&mut unstructured) {
66/// #   let run_my_color_conversion_checks = |_| {};
67///     run_my_color_conversion_checks(rgb);
68/// }
69/// # }
70/// ```
71#[derive(Debug)]
72pub struct Unstructured<'a> {
73    data: &'a [u8],
74}
75
76impl<'a> Unstructured<'a> {
77    /// Create a new `Unstructured` from the given raw data.
78    ///
79    /// # Example
80    ///
81    /// ```
82    /// use arbitrary::Unstructured;
83    ///
84    /// let u = Unstructured::new(&[1, 2, 3, 4]);
85    /// ```
86    pub fn new(data: &'a [u8]) -> Self {
87        Unstructured { data }
88    }
89
90    /// Get the number of remaining bytes of underlying data that are still
91    /// available.
92    ///
93    /// # Example
94    ///
95    /// ```
96    /// use arbitrary::{Arbitrary, Unstructured};
97    ///
98    /// let mut u = Unstructured::new(&[1, 2, 3]);
99    ///
100    /// // Initially have three bytes of data.
101    /// assert_eq!(u.len(), 3);
102    ///
103    /// // Generating a `bool` consumes one byte from the underlying data, so
104    /// // we are left with two bytes afterwards.
105    /// let _ = bool::arbitrary(&mut u);
106    /// assert_eq!(u.len(), 2);
107    /// ```
108    #[inline]
109    pub fn len(&self) -> usize {
110        self.data.len()
111    }
112
113    /// Is the underlying unstructured data exhausted?
114    ///
115    /// `unstructured.is_empty()` is the same as `unstructured.len() == 0`.
116    ///
117    /// # Example
118    ///
119    /// ```
120    /// use arbitrary::{Arbitrary, Unstructured};
121    ///
122    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
123    ///
124    /// // Initially, we are not empty.
125    /// assert!(!u.is_empty());
126    ///
127    /// // Generating a `u32` consumes all four bytes of the underlying data, so
128    /// // we become empty afterwards.
129    /// let _ = u32::arbitrary(&mut u);
130    /// assert!(u.is_empty());
131    /// ```
132    #[inline]
133    pub fn is_empty(&self) -> bool {
134        self.len() == 0
135    }
136
137    /// Generate an arbitrary instance of `A`.
138    ///
139    /// This is simply a helper method that is equivalent to `<A as
140    /// Arbitrary>::arbitrary(self)`. This helper is a little bit more concise,
141    /// and can be used in situations where Rust's type inference will figure
142    /// out what `A` should be.
143    ///
144    /// # Example
145    ///
146    /// ```
147    /// # #[cfg(feature="derive")] fn foo() -> arbitrary::Result<()> {
148    /// use arbitrary::{Arbitrary, Unstructured};
149    ///
150    /// #[derive(Arbitrary)]
151    /// struct MyType {
152    ///     // ...
153    /// }
154    ///
155    /// fn do_stuff(value: MyType) {
156    /// #   let _ = value;
157    ///     // ...
158    /// }
159    ///
160    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
161    ///
162    /// // Rust's type inference can figure out that `value` should be of type
163    /// // `MyType` here:
164    /// let value = u.arbitrary()?;
165    /// do_stuff(value);
166    /// # Ok(()) }
167    /// ```
168    pub fn arbitrary<A>(&mut self) -> Result<A>
169    where
170        A: Arbitrary<'a>,
171    {
172        <A as Arbitrary<'a>>::arbitrary(self)
173    }
174
175    /// Get the number of elements to insert when building up a collection of
176    /// arbitrary `ElementType`s.
177    ///
178    /// This uses the [`<ElementType as
179    /// Arbitrary>::size_hint`][crate::Arbitrary::size_hint] method to smartly
180    /// choose a length such that we most likely have enough underlying bytes to
181    /// construct that many arbitrary `ElementType`s.
182    ///
183    /// This should only be called within an `Arbitrary` implementation.
184    ///
185    /// # Example
186    ///
187    /// ```
188    /// use arbitrary::{Arbitrary, Result, Unstructured};
189    /// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
190    /// # impl<T> MyCollection<T> {
191    /// #     pub fn with_capacity(capacity: usize) -> Self { MyCollection { _t: std::marker::PhantomData } }
192    /// #     pub fn insert(&mut self, element: T) {}
193    /// # }
194    ///
195    /// impl<'a, T> Arbitrary<'a> for MyCollection<T>
196    /// where
197    ///     T: Arbitrary<'a>,
198    /// {
199    ///     fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
200    ///         // Get the number of `T`s we should insert into our collection.
201    ///         let len = u.arbitrary_len::<T>()?;
202    ///
203    ///         // And then create a collection of that length!
204    ///         let mut my_collection = MyCollection::with_capacity(len);
205    ///         for _ in 0..len {
206    ///             let element = T::arbitrary(u)?;
207    ///             my_collection.insert(element);
208    ///         }
209    ///
210    ///         Ok(my_collection)
211    ///     }
212    /// }
213    /// ```
214    pub fn arbitrary_len<ElementType>(&mut self) -> Result<usize>
215    where
216        ElementType: Arbitrary<'a>,
217    {
218        let byte_size = self.arbitrary_byte_size()?;
219        let (lower, upper) = <ElementType as Arbitrary>::size_hint(0);
220        let elem_size = upper.unwrap_or(lower * 2);
221        let elem_size = std::cmp::max(1, elem_size);
222        Ok(byte_size / elem_size)
223    }
224
225    fn arbitrary_byte_size(&mut self) -> Result<usize> {
226        if self.data.is_empty() {
227            Ok(0)
228        } else if self.data.len() == 1 {
229            self.data = &[];
230            Ok(0)
231        } else {
232            // Take lengths from the end of the data, since the `libFuzzer` folks
233            // found that this lets fuzzers more efficiently explore the input
234            // space.
235            //
236            // https://github.com/rust-fuzz/libfuzzer-sys/blob/0c450753/libfuzzer/utils/FuzzedDataProvider.h#L92-L97
237
238            // We only consume as many bytes as necessary to cover the entire
239            // range of the byte string.
240            // Note: We cast to u64 so we don't overflow when checking u32::MAX + 4 on 32-bit archs
241            let len = if self.data.len() as u64 <= u8::MAX as u64 + 1 {
242                let bytes = 1;
243                let max_size = self.data.len() - bytes;
244                let (rest, for_size) = self.data.split_at(max_size);
245                self.data = rest;
246                Self::int_in_range_impl(0..=max_size as u8, for_size.iter().copied())?.0 as usize
247            } else if self.data.len() as u64 <= u16::MAX as u64 + 2 {
248                let bytes = 2;
249                let max_size = self.data.len() - bytes;
250                let (rest, for_size) = self.data.split_at(max_size);
251                self.data = rest;
252                Self::int_in_range_impl(0..=max_size as u16, for_size.iter().copied())?.0 as usize
253            } else if self.data.len() as u64 <= u32::MAX as u64 + 4 {
254                let bytes = 4;
255                let max_size = self.data.len() - bytes;
256                let (rest, for_size) = self.data.split_at(max_size);
257                self.data = rest;
258                Self::int_in_range_impl(0..=max_size as u32, for_size.iter().copied())?.0 as usize
259            } else {
260                let bytes = 8;
261                let max_size = self.data.len() - bytes;
262                let (rest, for_size) = self.data.split_at(max_size);
263                self.data = rest;
264                Self::int_in_range_impl(0..=max_size as u64, for_size.iter().copied())?.0 as usize
265            };
266
267            Ok(len)
268        }
269    }
270
271    /// Generate an integer within the given range.
272    ///
273    /// Do not use this to generate the size of a collection. Use
274    /// `arbitrary_len` instead.
275    ///
276    /// # Panics
277    ///
278    /// Panics if `range.start > range.end`. That is, the given range must be
279    /// non-empty.
280    ///
281    /// # Example
282    ///
283    /// ```
284    /// # fn foo() -> arbitrary::Result<()> {
285    /// use arbitrary::{Arbitrary, Unstructured};
286    ///
287    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
288    ///
289    /// let x: i32 = u.int_in_range(-5_000..=-1_000)?;
290    ///
291    /// assert!(-5_000 <= x);
292    /// assert!(x <= -1_000);
293    /// # Ok(()) }
294    /// ```
295    pub fn int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T>
296    where
297        T: Int,
298    {
299        let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?;
300        self.data = &self.data[bytes_consumed..];
301        Ok(result)
302    }
303
304    fn int_in_range_impl<T>(
305        range: ops::RangeInclusive<T>,
306        mut bytes: impl Iterator<Item = u8>,
307    ) -> Result<(T, usize)>
308    where
309        T: Int,
310    {
311        let start = *range.start();
312        let end = *range.end();
313        assert!(
314            start <= end,
315            "`arbitrary::Unstructured::int_in_range` requires a non-empty range"
316        );
317
318        // When there is only one possible choice, don't waste any entropy from
319        // the underlying data.
320        if start == end {
321            return Ok((start, 0));
322        }
323
324        // From here on out we work with the unsigned representation. All of the
325        // operations performed below work out just as well whether or not `T`
326        // is a signed or unsigned integer.
327        let start = start.to_unsigned();
328        let end = end.to_unsigned();
329
330        let delta = end.wrapping_sub(start);
331        debug_assert_ne!(delta, T::Unsigned::ZERO);
332
333        // Compute an arbitrary integer offset from the start of the range. We
334        // do this by consuming `size_of(T)` bytes from the input to create an
335        // arbitrary integer and then clamping that int into our range bounds
336        // with a modulo operation.
337        let mut arbitrary_int = T::Unsigned::ZERO;
338        let mut bytes_consumed: usize = 0;
339
340        while (bytes_consumed < mem::size_of::<T>())
341            && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO
342        {
343            let byte = match bytes.next() {
344                None => break,
345                Some(b) => b,
346            };
347            bytes_consumed += 1;
348
349            // Combine this byte into our arbitrary integer, but avoid
350            // overflowing the shift for `u8` and `i8`.
351            arbitrary_int = if mem::size_of::<T>() == 1 {
352                T::Unsigned::from_u8(byte)
353            } else {
354                (arbitrary_int << 8) | T::Unsigned::from_u8(byte)
355            };
356        }
357
358        let offset = if delta == T::Unsigned::MAX {
359            arbitrary_int
360        } else {
361            arbitrary_int % (delta.checked_add(T::Unsigned::ONE).unwrap())
362        };
363
364        // Finally, we add `start` to our offset from `start` to get the result
365        // actual value within the range.
366        let result = start.wrapping_add(offset);
367
368        // And convert back to our maybe-signed representation.
369        let result = T::from_unsigned(result);
370        debug_assert!(*range.start() <= result);
371        debug_assert!(result <= *range.end());
372
373        Ok((result, bytes_consumed))
374    }
375
376    /// Choose one of the given choices.
377    ///
378    /// This should only be used inside of `Arbitrary` implementations.
379    ///
380    /// Returns an error if there is not enough underlying data to make a
381    /// choice or if no choices are provided.
382    ///
383    /// # Examples
384    ///
385    /// Selecting from an array of choices:
386    ///
387    /// ```
388    /// use arbitrary::Unstructured;
389    ///
390    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
391    /// let choices = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
392    ///
393    /// let choice = u.choose(&choices).unwrap();
394    ///
395    /// println!("chose {}", choice);
396    /// ```
397    ///
398    /// An error is returned if no choices are provided:
399    ///
400    /// ```
401    /// use arbitrary::Unstructured;
402    ///
403    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
404    /// let choices: [char; 0] = [];
405    ///
406    /// let result = u.choose(&choices);
407    ///
408    /// assert!(result.is_err());
409    /// ```
410    pub fn choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T> {
411        let idx = self.choose_index(choices.len())?;
412        Ok(&choices[idx])
413    }
414
415    /// Choose one of the given iterator choices.
416    ///
417    /// This should only be used inside of `Arbitrary` implementations.
418    ///
419    /// Returns an error if there is not enough underlying data to make a
420    /// choice or if no choices are provided.
421    ///
422    /// # Examples
423    ///
424    /// Selecting a random item from a set:
425    ///
426    /// ```
427    /// use std::collections::BTreeSet;
428    /// use arbitrary::Unstructured;
429    ///
430    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
431    /// let set = BTreeSet::from(['a', 'b', 'c']);
432    ///
433    /// let choice = u.choose_iter(set.iter()).unwrap();
434    ///
435    /// println!("chose {}", choice);
436    /// ```
437    pub fn choose_iter<T, I>(&mut self, choices: I) -> Result<T>
438    where
439        I: IntoIterator<Item = T>,
440        I::IntoIter: ExactSizeIterator,
441    {
442        let mut choices = choices.into_iter();
443        let idx = self.choose_index(choices.len())?;
444        let choice = choices
445            .nth(idx)
446            .expect("ExactSizeIterator should have correct len");
447        Ok(choice)
448    }
449
450    /// Choose a value in `0..len`.
451    ///
452    /// Returns an error if the `len` is zero.
453    ///
454    /// # Examples
455    ///
456    /// Using Fisher–Yates shuffle shuffle to gerate an arbitrary permutation.
457    ///
458    /// [Fisher–Yates shuffle]: https://en.wikipedia.org/wiki/Fisher–Yates_shuffle
459    ///
460    /// ```
461    /// use arbitrary::Unstructured;
462    ///
463    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
464    /// let mut permutation = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
465    /// let mut to_permute = &mut permutation[..];
466    /// while to_permute.len() > 1 {
467    ///     let idx = u.choose_index(to_permute.len()).unwrap();
468    ///     to_permute.swap(0, idx);
469    ///     to_permute = &mut to_permute[1..];
470    /// }
471    ///
472    /// println!("permutation: {:?}", permutation);
473    /// ```
474    ///
475    /// An error is returned if the length is zero:
476    ///
477    /// ```
478    /// use arbitrary::Unstructured;
479    ///
480    /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
481    /// let array: [i32; 0] = [];
482    ///
483    /// let result = u.choose_index(array.len());
484    ///
485    /// assert!(result.is_err());
486    /// ```
487    pub fn choose_index(&mut self, len: usize) -> Result<usize> {
488        if len == 0 {
489            return Err(Error::EmptyChoose);
490        }
491        let idx = self.int_in_range(0..=len - 1)?;
492        Ok(idx)
493    }
494
495    /// Generate a boolean according to the given ratio.
496    ///
497    /// # Panics
498    ///
499    /// Panics when the numerator and denominator do not meet these constraints:
500    ///
501    /// * `0 < numerator <= denominator`
502    ///
503    /// # Example
504    ///
505    /// Generate a boolean that is `true` five sevenths of the time:
506    ///
507    /// ```
508    /// # fn foo() -> arbitrary::Result<()> {
509    /// use arbitrary::Unstructured;
510    ///
511    /// # let my_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
512    /// let mut u = Unstructured::new(&my_data);
513    ///
514    /// if u.ratio(5, 7)? {
515    ///     // Take this branch 5/7 of the time.
516    /// }
517    /// # Ok(())
518    /// # }
519    /// ```
520    pub fn ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool>
521    where
522        T: Int,
523    {
524        assert!(T::ZERO < numerator);
525        assert!(numerator <= denominator);
526        let x = self.int_in_range(T::ONE..=denominator)?;
527        Ok(x <= numerator)
528    }
529
530    /// Fill a `buffer` with bytes from the underlying raw data.
531    ///
532    /// This should only be called within an `Arbitrary` implementation. This is
533    /// a very low-level operation. You should generally prefer calling nested
534    /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
535    /// `String::arbitrary` over using this method directly.
536    ///
537    /// If this `Unstructured` does not have enough underlying data to fill the
538    /// whole `buffer`, it pads the buffer out with zeros.
539    ///
540    /// # Example
541    ///
542    /// ```
543    /// use arbitrary::Unstructured;
544    ///
545    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
546    ///
547    /// let mut buf = [0; 2];
548    ///
549    /// assert!(u.fill_buffer(&mut buf).is_ok());
550    /// assert_eq!(buf, [1, 2]);
551    ///
552    /// assert!(u.fill_buffer(&mut buf).is_ok());
553    /// assert_eq!(buf, [3, 4]);
554    ///
555    /// assert!(u.fill_buffer(&mut buf).is_ok());
556    /// assert_eq!(buf, [0, 0]);
557    /// ```
558    pub fn fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()> {
559        let n = std::cmp::min(buffer.len(), self.data.len());
560        buffer[..n].copy_from_slice(&self.data[..n]);
561        for byte in buffer[n..].iter_mut() {
562            *byte = 0;
563        }
564        self.data = &self.data[n..];
565        Ok(())
566    }
567
568    /// Provide `size` bytes from the underlying raw data.
569    ///
570    /// This should only be called within an `Arbitrary` implementation. This is
571    /// a very low-level operation. You should generally prefer calling nested
572    /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
573    /// `String::arbitrary` over using this method directly.
574    ///
575    /// # Example
576    ///
577    /// ```
578    /// use arbitrary::Unstructured;
579    ///
580    /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
581    ///
582    /// assert!(u.bytes(2).unwrap() == &[1, 2]);
583    /// assert!(u.bytes(2).unwrap() == &[3, 4]);
584    /// ```
585    pub fn bytes(&mut self, size: usize) -> Result<&'a [u8]> {
586        if self.data.len() < size {
587            return Err(Error::NotEnoughData);
588        }
589
590        let (for_buf, rest) = self.data.split_at(size);
591        self.data = rest;
592        Ok(for_buf)
593    }
594
595    /// Peek at `size` number of bytes of the underlying raw input.
596    ///
597    /// Does not consume the bytes, only peeks at them.
598    ///
599    /// Returns `None` if there are not `size` bytes left in the underlying raw
600    /// input.
601    ///
602    /// # Example
603    ///
604    /// ```
605    /// use arbitrary::Unstructured;
606    ///
607    /// let u = Unstructured::new(&[1, 2, 3]);
608    ///
609    /// assert_eq!(u.peek_bytes(0).unwrap(), []);
610    /// assert_eq!(u.peek_bytes(1).unwrap(), [1]);
611    /// assert_eq!(u.peek_bytes(2).unwrap(), [1, 2]);
612    /// assert_eq!(u.peek_bytes(3).unwrap(), [1, 2, 3]);
613    ///
614    /// assert!(u.peek_bytes(4).is_none());
615    /// ```
616    pub fn peek_bytes(&self, size: usize) -> Option<&'a [u8]> {
617        self.data.get(..size)
618    }
619
620    /// Consume all of the rest of the remaining underlying bytes.
621    ///
622    /// Returns a slice of all the remaining, unconsumed bytes.
623    ///
624    /// # Example
625    ///
626    /// ```
627    /// use arbitrary::Unstructured;
628    ///
629    /// let mut u = Unstructured::new(&[1, 2, 3]);
630    ///
631    /// let mut remaining = u.take_rest();
632    ///
633    /// assert_eq!(remaining, [1, 2, 3]);
634    /// ```
635    pub fn take_rest(mut self) -> &'a [u8] {
636        mem::take(&mut self.data)
637    }
638
639    /// Provide an iterator over elements for constructing a collection
640    ///
641    /// This is useful for implementing [`Arbitrary::arbitrary`] on collections
642    /// since the implementation is simply `u.arbitrary_iter()?.collect()`
643    pub fn arbitrary_iter<'b, ElementType: Arbitrary<'a>>(
644        &'b mut self,
645    ) -> Result<ArbitraryIter<'a, 'b, ElementType>> {
646        Ok(ArbitraryIter {
647            u: &mut *self,
648            _marker: PhantomData,
649        })
650    }
651
652    /// Provide an iterator over elements for constructing a collection from
653    /// all the remaining bytes.
654    ///
655    /// This is useful for implementing [`Arbitrary::arbitrary_take_rest`] on collections
656    /// since the implementation is simply `u.arbitrary_take_rest_iter()?.collect()`
657    pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>(
658        self,
659    ) -> Result<ArbitraryTakeRestIter<'a, ElementType>> {
660        Ok(ArbitraryTakeRestIter {
661            u: self,
662            _marker: PhantomData,
663        })
664    }
665
666    /// Call the given function an arbitrary number of times.
667    ///
668    /// The function is given this `Unstructured` so that it can continue to
669    /// generate arbitrary data and structures.
670    ///
671    /// You may optionaly specify minimum and maximum bounds on the number of
672    /// times the function is called.
673    ///
674    /// You may break out of the loop early by returning
675    /// `Ok(std::ops::ControlFlow::Break)`. To continue the loop, return
676    /// `Ok(std::ops::ControlFlow::Continue)`.
677    ///
678    /// # Panics
679    ///
680    /// Panics if `min > max`.
681    ///
682    /// # Example
683    ///
684    /// Call a closure that generates an arbitrary type inside a context an
685    /// arbitrary number of times:
686    ///
687    /// ```
688    /// use arbitrary::{Result, Unstructured};
689    /// use std::ops::ControlFlow;
690    ///
691    /// enum Type {
692    ///     /// A boolean type.
693    ///     Bool,
694    ///
695    ///     /// An integer type.
696    ///     Int,
697    ///
698    ///     /// A list of the `i`th type in this type's context.
699    ///     List(usize),
700    /// }
701    ///
702    /// fn arbitrary_types_context(u: &mut Unstructured) -> Result<Vec<Type>> {
703    ///     let mut context = vec![];
704    ///
705    ///     u.arbitrary_loop(Some(10), Some(20), |u| {
706    ///         let num_choices = if context.is_empty() {
707    ///             2
708    ///         } else {
709    ///             3
710    ///         };
711    ///         let ty = match u.int_in_range::<u8>(1..=num_choices)? {
712    ///             1 => Type::Bool,
713    ///             2 => Type::Int,
714    ///             3 => Type::List(u.int_in_range(0..=context.len() - 1)?),
715    ///             _ => unreachable!(),
716    ///         };
717    ///         context.push(ty);
718    ///         Ok(ControlFlow::Continue(()))
719    ///     })?;
720    ///
721    ///     // The number of loop iterations are constrained by the min/max
722    ///     // bounds that we provided.
723    ///     assert!(context.len() >= 10);
724    ///     assert!(context.len() <= 20);
725    ///
726    ///     Ok(context)
727    /// }
728    /// ```
729    pub fn arbitrary_loop(
730        &mut self,
731        min: Option<u32>,
732        max: Option<u32>,
733        mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>,
734    ) -> Result<()> {
735        let min = min.unwrap_or(0);
736        let max = max.unwrap_or(u32::MAX);
737
738        for _ in 0..self.int_in_range(min..=max)? {
739            match f(self)? {
740                ControlFlow::Continue(_) => continue,
741                ControlFlow::Break(_) => break,
742            }
743        }
744
745        Ok(())
746    }
747}
748
749/// Utility iterator produced by [`Unstructured::arbitrary_iter`]
750pub struct ArbitraryIter<'a, 'b, ElementType> {
751    u: &'b mut Unstructured<'a>,
752    _marker: PhantomData<ElementType>,
753}
754
755impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, ElementType> {
756    type Item = Result<ElementType>;
757    fn next(&mut self) -> Option<Result<ElementType>> {
758        let keep_going = self.u.arbitrary().unwrap_or(false);
759        if keep_going {
760            Some(Arbitrary::arbitrary(self.u))
761        } else {
762            None
763        }
764    }
765}
766
767/// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`]
768pub struct ArbitraryTakeRestIter<'a, ElementType> {
769    u: Unstructured<'a>,
770    _marker: PhantomData<ElementType>,
771}
772
773impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> {
774    type Item = Result<ElementType>;
775    fn next(&mut self) -> Option<Result<ElementType>> {
776        let keep_going = self.u.arbitrary().unwrap_or(false);
777        if keep_going {
778            Some(Arbitrary::arbitrary(&mut self.u))
779        } else {
780            None
781        }
782    }
783}
784
785/// A trait that is implemented for all of the primitive integers:
786///
787/// * `u8`
788/// * `u16`
789/// * `u32`
790/// * `u64`
791/// * `u128`
792/// * `usize`
793/// * `i8`
794/// * `i16`
795/// * `i32`
796/// * `i64`
797/// * `i128`
798/// * `isize`
799///
800/// Don't implement this trait yourself.
801pub trait Int:
802    Copy
803    + std::fmt::Debug
804    + PartialOrd
805    + Ord
806    + ops::Sub<Self, Output = Self>
807    + ops::Rem<Self, Output = Self>
808    + ops::Shr<Self, Output = Self>
809    + ops::Shl<usize, Output = Self>
810    + ops::BitOr<Self, Output = Self>
811{
812    #[doc(hidden)]
813    type Unsigned: Int;
814
815    #[doc(hidden)]
816    const ZERO: Self;
817
818    #[doc(hidden)]
819    const ONE: Self;
820
821    #[doc(hidden)]
822    const MAX: Self;
823
824    #[doc(hidden)]
825    fn from_u8(b: u8) -> Self;
826
827    #[doc(hidden)]
828    fn from_usize(u: usize) -> Self;
829
830    #[doc(hidden)]
831    fn checked_add(self, rhs: Self) -> Option<Self>;
832
833    #[doc(hidden)]
834    fn wrapping_add(self, rhs: Self) -> Self;
835
836    #[doc(hidden)]
837    fn wrapping_sub(self, rhs: Self) -> Self;
838
839    #[doc(hidden)]
840    fn to_unsigned(self) -> Self::Unsigned;
841
842    #[doc(hidden)]
843    fn from_unsigned(unsigned: Self::Unsigned) -> Self;
844}
845
846macro_rules! impl_int {
847    ( $( $ty:ty : $unsigned_ty: ty ; )* ) => {
848        $(
849            impl Int for $ty {
850                type Unsigned = $unsigned_ty;
851
852                const ZERO: Self = 0;
853
854                const ONE: Self = 1;
855
856                const MAX: Self = Self::MAX;
857
858                fn from_u8(b: u8) -> Self {
859                    b as Self
860                }
861
862                fn from_usize(u: usize) -> Self {
863                    u as Self
864                }
865
866                fn checked_add(self, rhs: Self) -> Option<Self> {
867                    <$ty>::checked_add(self, rhs)
868                }
869
870                fn wrapping_add(self, rhs: Self) -> Self {
871                    <$ty>::wrapping_add(self, rhs)
872                }
873
874                fn wrapping_sub(self, rhs: Self) -> Self {
875                    <$ty>::wrapping_sub(self, rhs)
876                }
877
878                fn to_unsigned(self) -> Self::Unsigned {
879                    self as $unsigned_ty
880                }
881
882                fn from_unsigned(unsigned: $unsigned_ty) -> Self {
883                    unsigned as Self
884                }
885            }
886        )*
887    }
888}
889
890impl_int! {
891    u8: u8;
892    u16: u16;
893    u32: u32;
894    u64: u64;
895    u128: u128;
896    usize: usize;
897    i8: u8;
898    i16: u16;
899    i32: u32;
900    i64: u64;
901    i128: u128;
902    isize: usize;
903}
904
905#[cfg(test)]
906mod tests {
907    use super::*;
908
909    #[test]
910    fn test_byte_size() {
911        let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
912        // Should take one byte off the end
913        assert_eq!(u.arbitrary_byte_size().unwrap(), 6);
914        assert_eq!(u.len(), 9);
915        let mut v = vec![0; 260];
916        v.push(1);
917        v.push(4);
918        let mut u = Unstructured::new(&v);
919        // Should read two bytes off the end
920        assert_eq!(u.arbitrary_byte_size().unwrap(), 0x104);
921        assert_eq!(u.len(), 260);
922    }
923
924    #[test]
925    fn int_in_range_of_one() {
926        let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
927        let x = u.int_in_range(0..=0).unwrap();
928        assert_eq!(x, 0);
929        let choice = *u.choose(&[42]).unwrap();
930        assert_eq!(choice, 42)
931    }
932
933    #[test]
934    fn int_in_range_uses_minimal_amount_of_bytes() {
935        let mut u = Unstructured::new(&[1, 2]);
936        assert_eq!(1, u.int_in_range::<u8>(0..=u8::MAX).unwrap());
937        assert_eq!(u.len(), 1);
938
939        let mut u = Unstructured::new(&[1, 2]);
940        assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32).unwrap());
941        assert_eq!(u.len(), 1);
942
943        let mut u = Unstructured::new(&[1]);
944        assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32 + 1).unwrap());
945        assert!(u.is_empty());
946    }
947
948    #[test]
949    fn int_in_range_in_bounds() {
950        for input in u8::MIN..=u8::MAX {
951            let input = [input];
952
953            let mut u = Unstructured::new(&input);
954            let x = u.int_in_range(1..=u8::MAX).unwrap();
955            assert_ne!(x, 0);
956
957            let mut u = Unstructured::new(&input);
958            let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
959            assert_ne!(x, u8::MAX);
960        }
961    }
962
963    #[test]
964    fn int_in_range_covers_unsigned_range() {
965        // Test that we generate all values within the range given to
966        // `int_in_range`.
967
968        let mut full = [false; u8::MAX as usize + 1];
969        let mut no_zero = [false; u8::MAX as usize];
970        let mut no_max = [false; u8::MAX as usize];
971        let mut narrow = [false; 10];
972
973        for input in u8::MIN..=u8::MAX {
974            let input = [input];
975
976            let mut u = Unstructured::new(&input);
977            let x = u.int_in_range(0..=u8::MAX).unwrap();
978            full[x as usize] = true;
979
980            let mut u = Unstructured::new(&input);
981            let x = u.int_in_range(1..=u8::MAX).unwrap();
982            no_zero[x as usize - 1] = true;
983
984            let mut u = Unstructured::new(&input);
985            let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
986            no_max[x as usize] = true;
987
988            let mut u = Unstructured::new(&input);
989            let x = u.int_in_range(100..=109).unwrap();
990            narrow[x as usize - 100] = true;
991        }
992
993        for (i, covered) in full.iter().enumerate() {
994            assert!(covered, "full[{}] should have been generated", i);
995        }
996        for (i, covered) in no_zero.iter().enumerate() {
997            assert!(covered, "no_zero[{}] should have been generated", i);
998        }
999        for (i, covered) in no_max.iter().enumerate() {
1000            assert!(covered, "no_max[{}] should have been generated", i);
1001        }
1002        for (i, covered) in narrow.iter().enumerate() {
1003            assert!(covered, "narrow[{}] should have been generated", i);
1004        }
1005    }
1006
1007    #[test]
1008    fn int_in_range_covers_signed_range() {
1009        // Test that we generate all values within the range given to
1010        // `int_in_range`.
1011
1012        let mut full = [false; u8::MAX as usize + 1];
1013        let mut no_min = [false; u8::MAX as usize];
1014        let mut no_max = [false; u8::MAX as usize];
1015        let mut narrow = [false; 21];
1016
1017        let abs_i8_min: isize = 128;
1018
1019        for input in 0..=u8::MAX {
1020            let input = [input];
1021
1022            let mut u = Unstructured::new(&input);
1023            let x = u.int_in_range(i8::MIN..=i8::MAX).unwrap();
1024            full[(x as isize + abs_i8_min) as usize] = true;
1025
1026            let mut u = Unstructured::new(&input);
1027            let x = u.int_in_range(i8::MIN + 1..=i8::MAX).unwrap();
1028            no_min[(x as isize + abs_i8_min - 1) as usize] = true;
1029
1030            let mut u = Unstructured::new(&input);
1031            let x = u.int_in_range(i8::MIN..=i8::MAX - 1).unwrap();
1032            no_max[(x as isize + abs_i8_min) as usize] = true;
1033
1034            let mut u = Unstructured::new(&input);
1035            let x = u.int_in_range(-10..=10).unwrap();
1036            narrow[(x as isize + 10) as usize] = true;
1037        }
1038
1039        for (i, covered) in full.iter().enumerate() {
1040            assert!(covered, "full[{}] should have been generated", i);
1041        }
1042        for (i, covered) in no_min.iter().enumerate() {
1043            assert!(covered, "no_min[{}] should have been generated", i);
1044        }
1045        for (i, covered) in no_max.iter().enumerate() {
1046            assert!(covered, "no_max[{}] should have been generated", i);
1047        }
1048        for (i, covered) in narrow.iter().enumerate() {
1049            assert!(covered, "narrow[{}] should have been generated", i);
1050        }
1051    }
1052}