blake3/
io.rs

1//! Helper functions for efficient IO.
2
3#[cfg(feature = "std")]
4pub(crate) fn copy_wide(
5    mut reader: impl std::io::Read,
6    hasher: &mut crate::Hasher,
7) -> std::io::Result<u64> {
8    let mut buffer = [0; 65536];
9    let mut total = 0;
10    loop {
11        match reader.read(&mut buffer) {
12            Ok(0) => return Ok(total),
13            Ok(n) => {
14                hasher.update(&buffer[..n]);
15                total += n as u64;
16            }
17            // see test_update_reader_interrupted
18            Err(e) if e.kind() == std::io::ErrorKind::Interrupted => continue,
19            Err(e) => return Err(e),
20        }
21    }
22}
23
24// Mmap a file, if it looks like a good idea. Return None in cases where we know mmap will fail, or
25// if the file is short enough that mmapping isn't worth it. However, if we do try to mmap and it
26// fails, return the error.
27//
28// SAFETY: Mmaps are fundamentally unsafe, because you can call invariant-checking functions like
29// str::from_utf8 on them and then have them change out from under you. Letting a safe caller get
30// their hands on an mmap, or even a &[u8] that's backed by an mmap, is unsound. However, because
31// this function is crate-private, we can guarantee that all can ever happen in the event of a race
32// condition is that we either hash nonsense bytes or crash with SIGBUS or similar, neither of
33// which should risk memory corruption in a safe caller.
34//
35// PARANOIA: But a data race...is a data race...is a data race...right? Even if we know that no
36// platform in the "real world" is ever going to do anything other than compute the "wrong answer"
37// if we race on this mmap while we hash it, aren't we still supposed to feel bad about doing this?
38// Well, maybe. This is IO, and IO gets special carve-outs in the memory model. Consider a
39// memory-mapped register that returns random 32-bit words. (This is actually realistic if you have
40// a hardware RNG.) It's probably sound to construct a *const i32 pointing to that register and do
41// some raw pointer reads from it. Those reads should be volatile if you don't want the compiler to
42// coalesce them, but either way the compiler isn't allowed to just _go nuts_ and insert
43// should-never-happen branches to wipe your hard drive if two adjacent reads happen to give
44// different values. As far as I'm aware, there's no such thing as a read that's allowed if it's
45// volatile but prohibited if it's not (unlike atomics). As mentioned above, it's not ok to
46// construct a safe &i32 to the register if you're going to leak that reference to unknown callers.
47// But if you "know what you're doing," I don't think *const i32 and &i32 are fundamentally
48// different here. Feedback needed.
49#[cfg(feature = "mmap")]
50pub(crate) fn maybe_mmap_file(file: &std::fs::File) -> std::io::Result<Option<memmap2::Mmap>> {
51    let metadata = file.metadata()?;
52    let file_size = metadata.len();
53    #[allow(clippy::if_same_then_else)]
54    if !metadata.is_file() {
55        // Not a real file.
56        Ok(None)
57    } else if file_size > isize::max_value() as u64 {
58        // Too long to safely map.
59        // https://github.com/danburkert/memmap-rs/issues/69
60        Ok(None)
61    } else if file_size == 0 {
62        // Mapping an empty file currently fails.
63        // https://github.com/danburkert/memmap-rs/issues/72
64        // See test_mmap_virtual_file.
65        Ok(None)
66    } else if file_size < 16 * 1024 {
67        // Mapping small files is not worth it.
68        Ok(None)
69    } else {
70        // Explicitly set the length of the memory map, so that filesystem
71        // changes can't race to violate the invariants we just checked.
72        let map = unsafe {
73            memmap2::MmapOptions::new()
74                .len(file_size as usize)
75                .map(file)?
76        };
77        Ok(Some(map))
78    }
79}