arbitrary/unstructured.rs
1// Copyright © 2019 The Rust Fuzz Project Developers.
2//
3// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
4// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
5// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
6// option. This file may not be copied, modified, or distributed
7// except according to those terms.
8
9//! Wrappers around raw, unstructured bytes.
10
11use crate::{Arbitrary, Error, Result};
12use std::marker::PhantomData;
13use std::ops::ControlFlow;
14use std::{mem, ops};
15
16/// A source of unstructured data.
17///
18/// An `Unstructured` helps `Arbitrary` implementations interpret raw data
19/// (typically provided by a fuzzer) as a "DNA string" that describes how to
20/// construct the `Arbitrary` type. The goal is that a small change to the "DNA
21/// string" (the raw data wrapped by an `Unstructured`) results in a small
22/// change to the generated `Arbitrary` instance. This helps a fuzzer
23/// efficiently explore the `Arbitrary`'s input space.
24///
25/// `Unstructured` is deterministic: given the same raw data, the same series of
26/// API calls will return the same results (modulo system resource constraints,
27/// like running out of memory). However, `Unstructured` does not guarantee
28/// anything beyond that: it makes not guarantee that it will yield bytes from
29/// the underlying data in any particular order.
30///
31/// You shouldn't generally need to use an `Unstructured` unless you are writing
32/// a custom `Arbitrary` implementation by hand, instead of deriving it. Mostly,
33/// you should just be passing it through to nested `Arbitrary::arbitrary`
34/// calls.
35///
36/// # Example
37///
38/// Imagine you were writing a color conversion crate. You might want to write
39/// fuzz tests that take a random RGB color and assert various properties, run
40/// functions and make sure nothing panics, etc.
41///
42/// Below is what translating the fuzzer's raw input into an `Unstructured` and
43/// using that to generate an arbitrary RGB color might look like:
44///
45/// ```
46/// # #[cfg(feature = "derive")] fn foo() {
47/// use arbitrary::{Arbitrary, Unstructured};
48///
49/// /// An RGB color.
50/// #[derive(Arbitrary)]
51/// pub struct Rgb {
52/// r: u8,
53/// g: u8,
54/// b: u8,
55/// }
56///
57/// // Get the raw bytes from the fuzzer.
58/// # let get_input_from_fuzzer = || &[];
59/// let raw_data: &[u8] = get_input_from_fuzzer();
60///
61/// // Wrap it in an `Unstructured`.
62/// let mut unstructured = Unstructured::new(raw_data);
63///
64/// // Generate an `Rgb` color and run our checks.
65/// if let Ok(rgb) = Rgb::arbitrary(&mut unstructured) {
66/// # let run_my_color_conversion_checks = |_| {};
67/// run_my_color_conversion_checks(rgb);
68/// }
69/// # }
70/// ```
71#[derive(Debug)]
72pub struct Unstructured<'a> {
73 data: &'a [u8],
74}
75
76impl<'a> Unstructured<'a> {
77 /// Create a new `Unstructured` from the given raw data.
78 ///
79 /// # Example
80 ///
81 /// ```
82 /// use arbitrary::Unstructured;
83 ///
84 /// let u = Unstructured::new(&[1, 2, 3, 4]);
85 /// ```
86 pub fn new(data: &'a [u8]) -> Self {
87 Unstructured { data }
88 }
89
90 /// Get the number of remaining bytes of underlying data that are still
91 /// available.
92 ///
93 /// # Example
94 ///
95 /// ```
96 /// use arbitrary::{Arbitrary, Unstructured};
97 ///
98 /// let mut u = Unstructured::new(&[1, 2, 3]);
99 ///
100 /// // Initially have three bytes of data.
101 /// assert_eq!(u.len(), 3);
102 ///
103 /// // Generating a `bool` consumes one byte from the underlying data, so
104 /// // we are left with two bytes afterwards.
105 /// let _ = bool::arbitrary(&mut u);
106 /// assert_eq!(u.len(), 2);
107 /// ```
108 #[inline]
109 pub fn len(&self) -> usize {
110 self.data.len()
111 }
112
113 /// Is the underlying unstructured data exhausted?
114 ///
115 /// `unstructured.is_empty()` is the same as `unstructured.len() == 0`.
116 ///
117 /// # Example
118 ///
119 /// ```
120 /// use arbitrary::{Arbitrary, Unstructured};
121 ///
122 /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
123 ///
124 /// // Initially, we are not empty.
125 /// assert!(!u.is_empty());
126 ///
127 /// // Generating a `u32` consumes all four bytes of the underlying data, so
128 /// // we become empty afterwards.
129 /// let _ = u32::arbitrary(&mut u);
130 /// assert!(u.is_empty());
131 /// ```
132 #[inline]
133 pub fn is_empty(&self) -> bool {
134 self.len() == 0
135 }
136
137 /// Generate an arbitrary instance of `A`.
138 ///
139 /// This is simply a helper method that is equivalent to `<A as
140 /// Arbitrary>::arbitrary(self)`. This helper is a little bit more concise,
141 /// and can be used in situations where Rust's type inference will figure
142 /// out what `A` should be.
143 ///
144 /// # Example
145 ///
146 /// ```
147 /// # #[cfg(feature="derive")] fn foo() -> arbitrary::Result<()> {
148 /// use arbitrary::{Arbitrary, Unstructured};
149 ///
150 /// #[derive(Arbitrary)]
151 /// struct MyType {
152 /// // ...
153 /// }
154 ///
155 /// fn do_stuff(value: MyType) {
156 /// # let _ = value;
157 /// // ...
158 /// }
159 ///
160 /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
161 ///
162 /// // Rust's type inference can figure out that `value` should be of type
163 /// // `MyType` here:
164 /// let value = u.arbitrary()?;
165 /// do_stuff(value);
166 /// # Ok(()) }
167 /// ```
168 pub fn arbitrary<A>(&mut self) -> Result<A>
169 where
170 A: Arbitrary<'a>,
171 {
172 <A as Arbitrary<'a>>::arbitrary(self)
173 }
174
175 /// Get the number of elements to insert when building up a collection of
176 /// arbitrary `ElementType`s.
177 ///
178 /// This uses the [`<ElementType as
179 /// Arbitrary>::size_hint`][crate::Arbitrary::size_hint] method to smartly
180 /// choose a length such that we most likely have enough underlying bytes to
181 /// construct that many arbitrary `ElementType`s.
182 ///
183 /// This should only be called within an `Arbitrary` implementation.
184 ///
185 /// # Example
186 ///
187 /// ```
188 /// use arbitrary::{Arbitrary, Result, Unstructured};
189 /// # pub struct MyCollection<T> { _t: std::marker::PhantomData<T> }
190 /// # impl<T> MyCollection<T> {
191 /// # pub fn with_capacity(capacity: usize) -> Self { MyCollection { _t: std::marker::PhantomData } }
192 /// # pub fn insert(&mut self, element: T) {}
193 /// # }
194 ///
195 /// impl<'a, T> Arbitrary<'a> for MyCollection<T>
196 /// where
197 /// T: Arbitrary<'a>,
198 /// {
199 /// fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
200 /// // Get the number of `T`s we should insert into our collection.
201 /// let len = u.arbitrary_len::<T>()?;
202 ///
203 /// // And then create a collection of that length!
204 /// let mut my_collection = MyCollection::with_capacity(len);
205 /// for _ in 0..len {
206 /// let element = T::arbitrary(u)?;
207 /// my_collection.insert(element);
208 /// }
209 ///
210 /// Ok(my_collection)
211 /// }
212 /// }
213 /// ```
214 pub fn arbitrary_len<ElementType>(&mut self) -> Result<usize>
215 where
216 ElementType: Arbitrary<'a>,
217 {
218 let byte_size = self.arbitrary_byte_size()?;
219 let (lower, upper) = <ElementType as Arbitrary>::size_hint(0);
220 let elem_size = upper.unwrap_or(lower * 2);
221 let elem_size = std::cmp::max(1, elem_size);
222 Ok(byte_size / elem_size)
223 }
224
225 fn arbitrary_byte_size(&mut self) -> Result<usize> {
226 if self.data.is_empty() {
227 Ok(0)
228 } else if self.data.len() == 1 {
229 self.data = &[];
230 Ok(0)
231 } else {
232 // Take lengths from the end of the data, since the `libFuzzer` folks
233 // found that this lets fuzzers more efficiently explore the input
234 // space.
235 //
236 // https://github.com/rust-fuzz/libfuzzer-sys/blob/0c450753/libfuzzer/utils/FuzzedDataProvider.h#L92-L97
237
238 // We only consume as many bytes as necessary to cover the entire
239 // range of the byte string.
240 // Note: We cast to u64 so we don't overflow when checking u32::MAX + 4 on 32-bit archs
241 let len = if self.data.len() as u64 <= u8::MAX as u64 + 1 {
242 let bytes = 1;
243 let max_size = self.data.len() - bytes;
244 let (rest, for_size) = self.data.split_at(max_size);
245 self.data = rest;
246 Self::int_in_range_impl(0..=max_size as u8, for_size.iter().copied())?.0 as usize
247 } else if self.data.len() as u64 <= u16::MAX as u64 + 2 {
248 let bytes = 2;
249 let max_size = self.data.len() - bytes;
250 let (rest, for_size) = self.data.split_at(max_size);
251 self.data = rest;
252 Self::int_in_range_impl(0..=max_size as u16, for_size.iter().copied())?.0 as usize
253 } else if self.data.len() as u64 <= u32::MAX as u64 + 4 {
254 let bytes = 4;
255 let max_size = self.data.len() - bytes;
256 let (rest, for_size) = self.data.split_at(max_size);
257 self.data = rest;
258 Self::int_in_range_impl(0..=max_size as u32, for_size.iter().copied())?.0 as usize
259 } else {
260 let bytes = 8;
261 let max_size = self.data.len() - bytes;
262 let (rest, for_size) = self.data.split_at(max_size);
263 self.data = rest;
264 Self::int_in_range_impl(0..=max_size as u64, for_size.iter().copied())?.0 as usize
265 };
266
267 Ok(len)
268 }
269 }
270
271 /// Generate an integer within the given range.
272 ///
273 /// Do not use this to generate the size of a collection. Use
274 /// `arbitrary_len` instead.
275 ///
276 /// The probability distribution of the return value is not necessarily uniform.
277 ///
278 /// Returns `range.start()`, not an error,
279 /// if this `Unstructured` [is empty][Unstructured::is_empty].
280 ///
281 /// # Panics
282 ///
283 /// Panics if `range.start > range.end`. That is, the given range must be
284 /// non-empty.
285 ///
286 /// # Example
287 ///
288 /// ```
289 /// # fn foo() -> arbitrary::Result<()> {
290 /// use arbitrary::{Arbitrary, Unstructured};
291 ///
292 /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
293 ///
294 /// let x: i32 = u.int_in_range(-5_000..=-1_000)?;
295 ///
296 /// assert!(-5_000 <= x);
297 /// assert!(x <= -1_000);
298 /// # Ok(()) }
299 /// ```
300 pub fn int_in_range<T>(&mut self, range: ops::RangeInclusive<T>) -> Result<T>
301 where
302 T: Int,
303 {
304 let (result, bytes_consumed) = Self::int_in_range_impl(range, self.data.iter().cloned())?;
305 self.data = &self.data[bytes_consumed..];
306 Ok(result)
307 }
308
309 fn int_in_range_impl<T>(
310 range: ops::RangeInclusive<T>,
311 mut bytes: impl Iterator<Item = u8>,
312 ) -> Result<(T, usize)>
313 where
314 T: Int,
315 {
316 let start = *range.start();
317 let end = *range.end();
318 assert!(
319 start <= end,
320 "`arbitrary::Unstructured::int_in_range` requires a non-empty range"
321 );
322
323 // When there is only one possible choice, don't waste any entropy from
324 // the underlying data.
325 if start == end {
326 return Ok((start, 0));
327 }
328
329 // From here on out we work with the unsigned representation. All of the
330 // operations performed below work out just as well whether or not `T`
331 // is a signed or unsigned integer.
332 let start = start.to_unsigned();
333 let end = end.to_unsigned();
334
335 let delta = end.wrapping_sub(start);
336 debug_assert_ne!(delta, T::Unsigned::ZERO);
337
338 // Compute an arbitrary integer offset from the start of the range. We
339 // do this by consuming `size_of(T)` bytes from the input to create an
340 // arbitrary integer and then clamping that int into our range bounds
341 // with a modulo operation.
342 let mut arbitrary_int = T::Unsigned::ZERO;
343 let mut bytes_consumed: usize = 0;
344
345 while (bytes_consumed < mem::size_of::<T>())
346 && (delta >> T::Unsigned::from_usize(bytes_consumed * 8)) > T::Unsigned::ZERO
347 {
348 let byte = match bytes.next() {
349 None => break,
350 Some(b) => b,
351 };
352 bytes_consumed += 1;
353
354 // Combine this byte into our arbitrary integer, but avoid
355 // overflowing the shift for `u8` and `i8`.
356 arbitrary_int = if mem::size_of::<T>() == 1 {
357 T::Unsigned::from_u8(byte)
358 } else {
359 (arbitrary_int << 8) | T::Unsigned::from_u8(byte)
360 };
361 }
362
363 let offset = if delta == T::Unsigned::MAX {
364 arbitrary_int
365 } else {
366 arbitrary_int % (delta.checked_add(T::Unsigned::ONE).unwrap())
367 };
368
369 // Finally, we add `start` to our offset from `start` to get the result
370 // actual value within the range.
371 let result = start.wrapping_add(offset);
372
373 // And convert back to our maybe-signed representation.
374 let result = T::from_unsigned(result);
375 debug_assert!(*range.start() <= result);
376 debug_assert!(result <= *range.end());
377
378 Ok((result, bytes_consumed))
379 }
380
381 /// Choose one of the given choices.
382 ///
383 /// This should only be used inside of `Arbitrary` implementations.
384 ///
385 /// The probability distribution of choices is not necessarily uniform.
386 ///
387 /// Returns the first choice, not an error,
388 /// if this `Unstructured` [is empty][Unstructured::is_empty].
389 ///
390 /// Returns an error if no choices are provided.
391 ///
392 /// # Examples
393 ///
394 /// Selecting from an array of choices:
395 ///
396 /// ```
397 /// use arbitrary::Unstructured;
398 ///
399 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
400 /// let choices = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
401 ///
402 /// let choice = u.choose(&choices).unwrap();
403 ///
404 /// println!("chose {}", choice);
405 /// ```
406 ///
407 /// An error is returned if no choices are provided:
408 ///
409 /// ```
410 /// use arbitrary::Unstructured;
411 ///
412 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
413 /// let choices: [char; 0] = [];
414 ///
415 /// let result = u.choose(&choices);
416 ///
417 /// assert!(result.is_err());
418 /// ```
419 pub fn choose<'b, T>(&mut self, choices: &'b [T]) -> Result<&'b T> {
420 let idx = self.choose_index(choices.len())?;
421 Ok(&choices[idx])
422 }
423
424 /// Choose one of the given iterator choices.
425 ///
426 /// This should only be used inside of `Arbitrary` implementations.
427 ///
428 /// The probability distribution of choices is not necessarily uniform.
429 ///
430 /// Returns the first choice, not an error,
431 /// if this `Unstructured` [is empty][Unstructured::is_empty].
432 ///
433 /// Returns an error if no choices are provided.
434 ///
435 /// # Examples
436 ///
437 /// Selecting a random item from a set:
438 ///
439 /// ```
440 /// use std::collections::BTreeSet;
441 /// use arbitrary::Unstructured;
442 ///
443 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
444 /// let set = BTreeSet::from(['a', 'b', 'c']);
445 ///
446 /// let choice = u.choose_iter(set.iter()).unwrap();
447 ///
448 /// println!("chose {}", choice);
449 /// ```
450 pub fn choose_iter<T, I>(&mut self, choices: I) -> Result<T>
451 where
452 I: IntoIterator<Item = T>,
453 I::IntoIter: ExactSizeIterator,
454 {
455 let mut choices = choices.into_iter();
456 let idx = self.choose_index(choices.len())?;
457 let choice = choices
458 .nth(idx)
459 .expect("ExactSizeIterator should have correct len");
460 Ok(choice)
461 }
462
463 /// Choose a value in `0..len`.
464 ///
465 /// The probability distribution of return values is not necessarily uniform.
466 ///
467 /// Returns zero, not an error, if this `Unstructured` [is empty][Unstructured::is_empty].
468 ///
469 /// Returns an error if the `len` is zero.
470 ///
471 /// # Examples
472 ///
473 /// Using Fisher–Yates shuffle shuffle to generate an arbitrary permutation.
474 ///
475 /// [Fisher–Yates shuffle]: https://en.wikipedia.org/wiki/Fisher–Yates_shuffle
476 ///
477 /// ```
478 /// use arbitrary::Unstructured;
479 ///
480 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
481 /// let mut permutation = ['a', 'b', 'c', 'd', 'e', 'f', 'g'];
482 /// let mut to_permute = &mut permutation[..];
483 /// while to_permute.len() > 1 {
484 /// let idx = u.choose_index(to_permute.len()).unwrap();
485 /// to_permute.swap(0, idx);
486 /// to_permute = &mut to_permute[1..];
487 /// }
488 ///
489 /// println!("permutation: {:?}", permutation);
490 /// ```
491 ///
492 /// An error is returned if the length is zero:
493 ///
494 /// ```
495 /// use arbitrary::Unstructured;
496 ///
497 /// let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 0]);
498 /// let array: [i32; 0] = [];
499 ///
500 /// let result = u.choose_index(array.len());
501 ///
502 /// assert!(result.is_err());
503 /// ```
504 pub fn choose_index(&mut self, len: usize) -> Result<usize> {
505 if len == 0 {
506 return Err(Error::EmptyChoose);
507 }
508 let idx = self.int_in_range(0..=len - 1)?;
509 Ok(idx)
510 }
511
512 /// Generate a boolean which is true with probability approximately the given ratio.
513 ///
514 /// Returns true, not an error, if this `Unstructured` [is empty][Unstructured::is_empty].
515 ///
516 /// # Panics
517 ///
518 /// Panics when the numerator and denominator do not meet these constraints:
519 ///
520 /// * `0 < numerator <= denominator`
521 ///
522 /// # Example
523 ///
524 /// Generate a boolean that is `true` five sevenths of the time:
525 ///
526 /// ```
527 /// # fn foo() -> arbitrary::Result<()> {
528 /// use arbitrary::Unstructured;
529 ///
530 /// # let my_data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0];
531 /// let mut u = Unstructured::new(&my_data);
532 ///
533 /// if u.ratio(5, 7)? {
534 /// // Take this branch approximately 5/7 of the time.
535 /// }
536 /// # Ok(())
537 /// # }
538 /// ```
539 pub fn ratio<T>(&mut self, numerator: T, denominator: T) -> Result<bool>
540 where
541 T: Int,
542 {
543 assert!(T::ZERO < numerator);
544 assert!(numerator <= denominator);
545 let x = self.int_in_range(T::ONE..=denominator)?;
546 Ok(x <= numerator)
547 }
548
549 /// Fill a `buffer` with bytes from the underlying raw data.
550 ///
551 /// This should only be called within an `Arbitrary` implementation. This is
552 /// a very low-level operation. You should generally prefer calling nested
553 /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
554 /// `String::arbitrary` over using this method directly.
555 ///
556 /// If this `Unstructured` does not have enough underlying data to fill the
557 /// whole `buffer`, it pads the buffer out with zeros.
558 ///
559 /// # Example
560 ///
561 /// ```
562 /// use arbitrary::Unstructured;
563 ///
564 /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
565 ///
566 /// let mut buf = [0; 2];
567 ///
568 /// assert!(u.fill_buffer(&mut buf).is_ok());
569 /// assert_eq!(buf, [1, 2]);
570 ///
571 /// assert!(u.fill_buffer(&mut buf).is_ok());
572 /// assert_eq!(buf, [3, 4]);
573 ///
574 /// assert!(u.fill_buffer(&mut buf).is_ok());
575 /// assert_eq!(buf, [0, 0]);
576 /// ```
577 pub fn fill_buffer(&mut self, buffer: &mut [u8]) -> Result<()> {
578 let n = std::cmp::min(buffer.len(), self.data.len());
579 buffer[..n].copy_from_slice(&self.data[..n]);
580 for byte in buffer[n..].iter_mut() {
581 *byte = 0;
582 }
583 self.data = &self.data[n..];
584 Ok(())
585 }
586
587 /// Provide `size` bytes from the underlying raw data.
588 ///
589 /// This should only be called within an `Arbitrary` implementation. This is
590 /// a very low-level operation. You should generally prefer calling nested
591 /// `Arbitrary` implementations like `<Vec<u8>>::arbitrary` and
592 /// `String::arbitrary` over using this method directly.
593 ///
594 /// # Example
595 ///
596 /// ```
597 /// use arbitrary::Unstructured;
598 ///
599 /// let mut u = Unstructured::new(&[1, 2, 3, 4]);
600 ///
601 /// assert!(u.bytes(2).unwrap() == &[1, 2]);
602 /// assert!(u.bytes(2).unwrap() == &[3, 4]);
603 /// ```
604 pub fn bytes(&mut self, size: usize) -> Result<&'a [u8]> {
605 if self.data.len() < size {
606 return Err(Error::NotEnoughData);
607 }
608
609 let (for_buf, rest) = self.data.split_at(size);
610 self.data = rest;
611 Ok(for_buf)
612 }
613
614 /// Peek at `size` number of bytes of the underlying raw input.
615 ///
616 /// Does not consume the bytes, only peeks at them.
617 ///
618 /// Returns `None` if there are not `size` bytes left in the underlying raw
619 /// input.
620 ///
621 /// # Example
622 ///
623 /// ```
624 /// use arbitrary::Unstructured;
625 ///
626 /// let u = Unstructured::new(&[1, 2, 3]);
627 ///
628 /// assert_eq!(u.peek_bytes(0).unwrap(), []);
629 /// assert_eq!(u.peek_bytes(1).unwrap(), [1]);
630 /// assert_eq!(u.peek_bytes(2).unwrap(), [1, 2]);
631 /// assert_eq!(u.peek_bytes(3).unwrap(), [1, 2, 3]);
632 ///
633 /// assert!(u.peek_bytes(4).is_none());
634 /// ```
635 pub fn peek_bytes(&self, size: usize) -> Option<&'a [u8]> {
636 self.data.get(..size)
637 }
638
639 /// Consume all of the rest of the remaining underlying bytes.
640 ///
641 /// Returns a slice of all the remaining, unconsumed bytes.
642 ///
643 /// # Example
644 ///
645 /// ```
646 /// use arbitrary::Unstructured;
647 ///
648 /// let mut u = Unstructured::new(&[1, 2, 3]);
649 ///
650 /// let mut remaining = u.take_rest();
651 ///
652 /// assert_eq!(remaining, [1, 2, 3]);
653 /// ```
654 pub fn take_rest(mut self) -> &'a [u8] {
655 mem::take(&mut self.data)
656 }
657
658 /// Provide an iterator over elements for constructing a collection
659 ///
660 /// This is useful for implementing [`Arbitrary::arbitrary`] on collections
661 /// since the implementation is simply `u.arbitrary_iter()?.collect()`
662 pub fn arbitrary_iter<'b, ElementType: Arbitrary<'a>>(
663 &'b mut self,
664 ) -> Result<ArbitraryIter<'a, 'b, ElementType>> {
665 Ok(ArbitraryIter {
666 u: &mut *self,
667 _marker: PhantomData,
668 })
669 }
670
671 /// Provide an iterator over elements for constructing a collection from
672 /// all the remaining bytes.
673 ///
674 /// This is useful for implementing [`Arbitrary::arbitrary_take_rest`] on collections
675 /// since the implementation is simply `u.arbitrary_take_rest_iter()?.collect()`
676 pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>(
677 self,
678 ) -> Result<ArbitraryTakeRestIter<'a, ElementType>> {
679 Ok(ArbitraryTakeRestIter {
680 u: self,
681 _marker: PhantomData,
682 })
683 }
684
685 /// Call the given function an arbitrary number of times.
686 ///
687 /// The function is given this `Unstructured` so that it can continue to
688 /// generate arbitrary data and structures.
689 ///
690 /// You may optionaly specify minimum and maximum bounds on the number of
691 /// times the function is called.
692 ///
693 /// You may break out of the loop early by returning
694 /// `Ok(std::ops::ControlFlow::Break)`. To continue the loop, return
695 /// `Ok(std::ops::ControlFlow::Continue)`.
696 ///
697 /// # Panics
698 ///
699 /// Panics if `min > max`.
700 ///
701 /// # Example
702 ///
703 /// Call a closure that generates an arbitrary type inside a context an
704 /// arbitrary number of times:
705 ///
706 /// ```
707 /// use arbitrary::{Result, Unstructured};
708 /// use std::ops::ControlFlow;
709 ///
710 /// enum Type {
711 /// /// A boolean type.
712 /// Bool,
713 ///
714 /// /// An integer type.
715 /// Int,
716 ///
717 /// /// A list of the `i`th type in this type's context.
718 /// List(usize),
719 /// }
720 ///
721 /// fn arbitrary_types_context(u: &mut Unstructured) -> Result<Vec<Type>> {
722 /// let mut context = vec![];
723 ///
724 /// u.arbitrary_loop(Some(10), Some(20), |u| {
725 /// let num_choices = if context.is_empty() {
726 /// 2
727 /// } else {
728 /// 3
729 /// };
730 /// let ty = match u.int_in_range::<u8>(1..=num_choices)? {
731 /// 1 => Type::Bool,
732 /// 2 => Type::Int,
733 /// 3 => Type::List(u.int_in_range(0..=context.len() - 1)?),
734 /// _ => unreachable!(),
735 /// };
736 /// context.push(ty);
737 /// Ok(ControlFlow::Continue(()))
738 /// })?;
739 ///
740 /// // The number of loop iterations are constrained by the min/max
741 /// // bounds that we provided.
742 /// assert!(context.len() >= 10);
743 /// assert!(context.len() <= 20);
744 ///
745 /// Ok(context)
746 /// }
747 /// ```
748 pub fn arbitrary_loop(
749 &mut self,
750 min: Option<u32>,
751 max: Option<u32>,
752 mut f: impl FnMut(&mut Self) -> Result<ControlFlow<(), ()>>,
753 ) -> Result<()> {
754 let min = min.unwrap_or(0);
755 let max = max.unwrap_or(u32::MAX);
756
757 for _ in 0..self.int_in_range(min..=max)? {
758 match f(self)? {
759 ControlFlow::Continue(_) => continue,
760 ControlFlow::Break(_) => break,
761 }
762 }
763
764 Ok(())
765 }
766}
767
768/// Utility iterator produced by [`Unstructured::arbitrary_iter`]
769pub struct ArbitraryIter<'a, 'b, ElementType> {
770 u: &'b mut Unstructured<'a>,
771 _marker: PhantomData<ElementType>,
772}
773
774impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, '_, ElementType> {
775 type Item = Result<ElementType>;
776 fn next(&mut self) -> Option<Result<ElementType>> {
777 let keep_going = self.u.arbitrary().unwrap_or(false);
778 if keep_going {
779 Some(Arbitrary::arbitrary(self.u))
780 } else {
781 None
782 }
783 }
784}
785
786/// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`]
787pub struct ArbitraryTakeRestIter<'a, ElementType> {
788 u: Unstructured<'a>,
789 _marker: PhantomData<ElementType>,
790}
791
792impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> {
793 type Item = Result<ElementType>;
794 fn next(&mut self) -> Option<Result<ElementType>> {
795 let keep_going = self.u.arbitrary().unwrap_or(false);
796 if keep_going {
797 Some(Arbitrary::arbitrary(&mut self.u))
798 } else {
799 None
800 }
801 }
802}
803
804/// A trait that is implemented for all of the primitive integers:
805///
806/// * `u8`
807/// * `u16`
808/// * `u32`
809/// * `u64`
810/// * `u128`
811/// * `usize`
812/// * `i8`
813/// * `i16`
814/// * `i32`
815/// * `i64`
816/// * `i128`
817/// * `isize`
818///
819/// Don't implement this trait yourself.
820pub trait Int:
821 Copy
822 + std::fmt::Debug
823 + PartialOrd
824 + Ord
825 + ops::Sub<Self, Output = Self>
826 + ops::Rem<Self, Output = Self>
827 + ops::Shr<Self, Output = Self>
828 + ops::Shl<usize, Output = Self>
829 + ops::BitOr<Self, Output = Self>
830{
831 #[doc(hidden)]
832 type Unsigned: Int;
833
834 #[doc(hidden)]
835 const ZERO: Self;
836
837 #[doc(hidden)]
838 const ONE: Self;
839
840 #[doc(hidden)]
841 const MAX: Self;
842
843 #[doc(hidden)]
844 fn from_u8(b: u8) -> Self;
845
846 #[doc(hidden)]
847 fn from_usize(u: usize) -> Self;
848
849 #[doc(hidden)]
850 fn checked_add(self, rhs: Self) -> Option<Self>;
851
852 #[doc(hidden)]
853 fn wrapping_add(self, rhs: Self) -> Self;
854
855 #[doc(hidden)]
856 fn wrapping_sub(self, rhs: Self) -> Self;
857
858 #[doc(hidden)]
859 fn to_unsigned(self) -> Self::Unsigned;
860
861 #[doc(hidden)]
862 fn from_unsigned(unsigned: Self::Unsigned) -> Self;
863}
864
865macro_rules! impl_int {
866 ( $( $ty:ty : $unsigned_ty: ty ; )* ) => {
867 $(
868 impl Int for $ty {
869 type Unsigned = $unsigned_ty;
870
871 const ZERO: Self = 0;
872
873 const ONE: Self = 1;
874
875 const MAX: Self = Self::MAX;
876
877 fn from_u8(b: u8) -> Self {
878 b as Self
879 }
880
881 fn from_usize(u: usize) -> Self {
882 u as Self
883 }
884
885 fn checked_add(self, rhs: Self) -> Option<Self> {
886 <$ty>::checked_add(self, rhs)
887 }
888
889 fn wrapping_add(self, rhs: Self) -> Self {
890 <$ty>::wrapping_add(self, rhs)
891 }
892
893 fn wrapping_sub(self, rhs: Self) -> Self {
894 <$ty>::wrapping_sub(self, rhs)
895 }
896
897 fn to_unsigned(self) -> Self::Unsigned {
898 self as $unsigned_ty
899 }
900
901 fn from_unsigned(unsigned: $unsigned_ty) -> Self {
902 unsigned as Self
903 }
904 }
905 )*
906 }
907}
908
909impl_int! {
910 u8: u8;
911 u16: u16;
912 u32: u32;
913 u64: u64;
914 u128: u128;
915 usize: usize;
916 i8: u8;
917 i16: u16;
918 i32: u32;
919 i64: u64;
920 i128: u128;
921 isize: usize;
922}
923
924#[cfg(test)]
925mod tests {
926 use super::*;
927
928 #[test]
929 fn test_byte_size() {
930 let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
931 // Should take one byte off the end
932 assert_eq!(u.arbitrary_byte_size().unwrap(), 6);
933 assert_eq!(u.len(), 9);
934 let mut v = vec![0; 260];
935 v.push(1);
936 v.push(4);
937 let mut u = Unstructured::new(&v);
938 // Should read two bytes off the end
939 assert_eq!(u.arbitrary_byte_size().unwrap(), 0x104);
940 assert_eq!(u.len(), 260);
941 }
942
943 #[test]
944 fn int_in_range_of_one() {
945 let mut u = Unstructured::new(&[1, 2, 3, 4, 5, 6, 7, 8, 9, 6]);
946 let x = u.int_in_range(0..=0).unwrap();
947 assert_eq!(x, 0);
948 let choice = *u.choose(&[42]).unwrap();
949 assert_eq!(choice, 42)
950 }
951
952 #[test]
953 fn int_in_range_uses_minimal_amount_of_bytes() {
954 let mut u = Unstructured::new(&[1, 2]);
955 assert_eq!(1, u.int_in_range::<u8>(0..=u8::MAX).unwrap());
956 assert_eq!(u.len(), 1);
957
958 let mut u = Unstructured::new(&[1, 2]);
959 assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32).unwrap());
960 assert_eq!(u.len(), 1);
961
962 let mut u = Unstructured::new(&[1]);
963 assert_eq!(1, u.int_in_range::<u32>(0..=u8::MAX as u32 + 1).unwrap());
964 assert!(u.is_empty());
965 }
966
967 #[test]
968 fn int_in_range_in_bounds() {
969 for input in u8::MIN..=u8::MAX {
970 let input = [input];
971
972 let mut u = Unstructured::new(&input);
973 let x = u.int_in_range(1..=u8::MAX).unwrap();
974 assert_ne!(x, 0);
975
976 let mut u = Unstructured::new(&input);
977 let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
978 assert_ne!(x, u8::MAX);
979 }
980 }
981
982 #[test]
983 fn int_in_range_covers_unsigned_range() {
984 // Test that we generate all values within the range given to
985 // `int_in_range`.
986
987 let mut full = [false; u8::MAX as usize + 1];
988 let mut no_zero = [false; u8::MAX as usize];
989 let mut no_max = [false; u8::MAX as usize];
990 let mut narrow = [false; 10];
991
992 for input in u8::MIN..=u8::MAX {
993 let input = [input];
994
995 let mut u = Unstructured::new(&input);
996 let x = u.int_in_range(0..=u8::MAX).unwrap();
997 full[x as usize] = true;
998
999 let mut u = Unstructured::new(&input);
1000 let x = u.int_in_range(1..=u8::MAX).unwrap();
1001 no_zero[x as usize - 1] = true;
1002
1003 let mut u = Unstructured::new(&input);
1004 let x = u.int_in_range(0..=u8::MAX - 1).unwrap();
1005 no_max[x as usize] = true;
1006
1007 let mut u = Unstructured::new(&input);
1008 let x = u.int_in_range(100..=109).unwrap();
1009 narrow[x as usize - 100] = true;
1010 }
1011
1012 for (i, covered) in full.iter().enumerate() {
1013 assert!(covered, "full[{}] should have been generated", i);
1014 }
1015 for (i, covered) in no_zero.iter().enumerate() {
1016 assert!(covered, "no_zero[{}] should have been generated", i);
1017 }
1018 for (i, covered) in no_max.iter().enumerate() {
1019 assert!(covered, "no_max[{}] should have been generated", i);
1020 }
1021 for (i, covered) in narrow.iter().enumerate() {
1022 assert!(covered, "narrow[{}] should have been generated", i);
1023 }
1024 }
1025
1026 #[test]
1027 fn int_in_range_covers_signed_range() {
1028 // Test that we generate all values within the range given to
1029 // `int_in_range`.
1030
1031 let mut full = [false; u8::MAX as usize + 1];
1032 let mut no_min = [false; u8::MAX as usize];
1033 let mut no_max = [false; u8::MAX as usize];
1034 let mut narrow = [false; 21];
1035
1036 let abs_i8_min: isize = 128;
1037
1038 for input in 0..=u8::MAX {
1039 let input = [input];
1040
1041 let mut u = Unstructured::new(&input);
1042 let x = u.int_in_range(i8::MIN..=i8::MAX).unwrap();
1043 full[(x as isize + abs_i8_min) as usize] = true;
1044
1045 let mut u = Unstructured::new(&input);
1046 let x = u.int_in_range(i8::MIN + 1..=i8::MAX).unwrap();
1047 no_min[(x as isize + abs_i8_min - 1) as usize] = true;
1048
1049 let mut u = Unstructured::new(&input);
1050 let x = u.int_in_range(i8::MIN..=i8::MAX - 1).unwrap();
1051 no_max[(x as isize + abs_i8_min) as usize] = true;
1052
1053 let mut u = Unstructured::new(&input);
1054 let x = u.int_in_range(-10..=10).unwrap();
1055 narrow[(x as isize + 10) as usize] = true;
1056 }
1057
1058 for (i, covered) in full.iter().enumerate() {
1059 assert!(covered, "full[{}] should have been generated", i);
1060 }
1061 for (i, covered) in no_min.iter().enumerate() {
1062 assert!(covered, "no_min[{}] should have been generated", i);
1063 }
1064 for (i, covered) in no_max.iter().enumerate() {
1065 assert!(covered, "no_max[{}] should have been generated", i);
1066 }
1067 for (i, covered) in narrow.iter().enumerate() {
1068 assert!(covered, "narrow[{}] should have been generated", i);
1069 }
1070 }
1071}