xxhash_rust/
xxh3.rs

1//!XXH3 implementation
2//!
3//!Provides `Hasher` only for 64bit as 128bit variant would not be much different due to trait
4//!being limited to `u64` outputs.
5
6use core::{ptr, mem, slice, hash};
7
8use crate::xxh32_common as xxh32;
9use crate::xxh64_common as xxh64;
10use crate::xxh3_common::*;
11use crate::utils::{Buffer, get_unaligned_chunk, get_aligned_chunk_ref};
12
13// Code is as close to original C implementation as possible
14// It does make it look ugly, but it is fast and easy to update once xxhash gets new version.
15
16#[cfg(all(any(target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128")), not(target_feature = "avx2")))]
17#[repr(align(16))]
18#[derive(Clone)]
19struct Acc([u64; ACC_NB]);
20#[cfg(target_feature = "avx2")]
21#[repr(align(32))]
22#[derive(Clone)]
23struct Acc([u64; ACC_NB]);
24#[cfg(not(any(target_feature = "avx2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"), target_feature = "sse2")))]
25#[repr(align(8))]
26#[derive(Clone)]
27struct Acc([u64; ACC_NB]);
28
29const INITIAL_ACC: Acc = Acc([
30    xxh32::PRIME_3 as u64, xxh64::PRIME_1, xxh64::PRIME_2, xxh64::PRIME_3,
31    xxh64::PRIME_4, xxh32::PRIME_2 as u64, xxh64::PRIME_5, xxh32::PRIME_1 as u64
32]);
33
34type LongHashFn = fn(&[u8], u64, &[u8]) -> u64;
35type LongHashFn128 = fn(&[u8], u64, &[u8]) -> u128;
36
37#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
38type StripeLanes = [[u8; mem::size_of::<core::arch::wasm32::v128>()]; STRIPE_LEN / mem::size_of::<core::arch::wasm32::v128>()];
39#[cfg(all(target_arch = "x86", target_feature = "avx2"))]
40type StripeLanes = [[u8; mem::size_of::<core::arch::x86::__m256i>()]; STRIPE_LEN / mem::size_of::<core::arch::x86::__m256i>()];
41#[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
42type StripeLanes = [[u8; mem::size_of::<core::arch::x86_64::__m256i>()]; STRIPE_LEN / mem::size_of::<core::arch::x86_64::__m256i>()];
43#[cfg(all(target_arch = "x86", target_feature = "sse2", not(target_feature = "avx2")))]
44type StripeLanes = [[u8; mem::size_of::<core::arch::x86::__m128i>()]; STRIPE_LEN / mem::size_of::<core::arch::x86::__m128i>()];
45#[cfg(all(target_arch = "x86_64", target_feature = "sse2", not(target_feature = "avx2")))]
46type StripeLanes = [[u8; mem::size_of::<core::arch::x86_64::__m128i>()]; STRIPE_LEN / mem::size_of::<core::arch::x86_64::__m128i>()];
47#[cfg(target_feature = "neon")]
48type StripeLanes = [[u8; mem::size_of::<core::arch::aarch64::uint8x16_t>()]; STRIPE_LEN / mem::size_of::<core::arch::aarch64::uint8x16_t>()];
49
50#[cfg(any(target_feature = "sse2", target_feature = "avx2"))]
51#[inline]
52const fn _mm_shuffle(z: u32, y: u32, x: u32, w: u32) -> i32 {
53    ((z << 6) | (y << 4) | (x << 2) | w) as i32
54}
55
56#[inline(always)]
57const fn mult32_to64(left: u32, right: u32) -> u64 {
58    (left as u64).wrapping_mul(right as u64)
59}
60
61//#[inline(always)]
62//fn _mm_prefetch(_ptr: *const i8, _offset: isize) {
63//    #[cfg(target_arch = "x86")]
64//    unsafe {
65//        core::arch::x86::_mm_prefetch(_ptr.offset(_offset), core::arch::x86::_MM_HINT_T0);
66//    }
67//
68//    #[cfg(target_arch = "x86_64")]
69//    unsafe {
70//        core::arch::x86_64::_mm_prefetch(_ptr.offset(_offset), core::arch::x86_64::_MM_HINT_T0);
71//    }
72//}
73
74macro_rules! to_u128 {
75    ($lo:expr, $hi:expr) => {
76        ($lo) as u128 | ((($hi) as u128) << 64)
77    };
78}
79
80macro_rules! slice_offset_ptr {
81    ($slice:expr, $offset:expr) => {{
82        let slice = $slice;
83        let offset = $offset;
84        debug_assert!(slice.len() >= offset);
85
86        #[allow(unused_unsafe)]
87        unsafe {
88            (slice.as_ptr() as *const u8).add(offset)
89        }
90    }}
91}
92
93#[inline(always)]
94fn read_32le_unaligned(data: &[u8], offset: usize) -> u32 {
95    u32::from_ne_bytes(*get_aligned_chunk_ref(data, offset)).to_le()
96}
97
98#[inline(always)]
99fn read_64le_unaligned(data: &[u8], offset: usize) -> u64 {
100    u64::from_ne_bytes(*get_aligned_chunk_ref(data, offset)).to_le()
101}
102
103#[inline(always)]
104fn mix_two_accs(acc: &mut Acc, offset: usize, secret: &[[u8; 8]; 2]) -> u64 {
105    mul128_fold64(acc.0[offset] ^ u64::from_ne_bytes(secret[0]).to_le(),
106                  acc.0[offset + 1] ^ u64::from_ne_bytes(secret[1]).to_le())
107}
108
109#[inline]
110fn merge_accs(acc: &mut Acc, secret: &[[[u8; 8]; 2]; 4], mut result: u64) -> u64 {
111    macro_rules! mix_two_accs {
112        ($idx:literal) => {
113            result = result.wrapping_add(mix_two_accs(acc, $idx * 2, &secret[$idx]))
114        }
115    }
116
117    mix_two_accs!(0);
118    mix_two_accs!(1);
119    mix_two_accs!(2);
120    mix_two_accs!(3);
121
122    avalanche(result)
123}
124
125#[inline(always)]
126fn mix16_b(input: &[[u8; 8]; 2], secret: &[[u8; 8]; 2], seed: u64) -> u64 {
127    let mut input_lo = u64::from_ne_bytes(input[0]).to_le();
128    let mut input_hi = u64::from_ne_bytes(input[1]).to_le();
129
130    input_lo ^= u64::from_ne_bytes(secret[0]).to_le().wrapping_add(seed);
131    input_hi ^= u64::from_ne_bytes(secret[1]).to_le().wrapping_sub(seed);
132
133    mul128_fold64(input_lo, input_hi)
134}
135
136#[inline(always)]
137//Inputs are two chunks of unaligned u64
138//Secret are two chunks of unaligned (u64, u64)
139fn mix32_b(lo: &mut u64, hi: &mut u64, input_1: &[[u8; 8]; 2], input_2: &[[u8; 8]; 2], secret: &[[[u8; 8]; 2]; 2], seed: u64) {
140    *lo = lo.wrapping_add(mix16_b(input_1, &secret[0], seed));
141    *lo ^= u64::from_ne_bytes(input_2[0]).to_le().wrapping_add(u64::from_ne_bytes(input_2[1]).to_le());
142
143    *hi = hi.wrapping_add(mix16_b(input_2, &secret[1], seed));
144    *hi ^= u64::from_ne_bytes(input_1[0]).to_le().wrapping_add(u64::from_ne_bytes(input_1[1]).to_le());
145}
146
147#[inline(always)]
148fn custom_default_secret(seed: u64) -> [u8; DEFAULT_SECRET_SIZE] {
149    let mut result = mem::MaybeUninit::<[u8; DEFAULT_SECRET_SIZE]>::uninit();
150
151    let nb_rounds = DEFAULT_SECRET_SIZE / 16;
152
153    for idx in 0..nb_rounds {
154        let low = get_unaligned_chunk::<u64>(&DEFAULT_SECRET, idx * 16).to_le().wrapping_add(seed);
155        let hi = get_unaligned_chunk::<u64>(&DEFAULT_SECRET, idx * 16 + 8).to_le().wrapping_sub(seed);
156
157        Buffer {
158            ptr: result.as_mut_ptr() as *mut u8,
159            len: DEFAULT_SECRET_SIZE,
160            offset: idx * 16,
161        }.copy_from_slice(&low.to_le_bytes());
162        Buffer {
163            ptr: result.as_mut_ptr() as *mut u8,
164            len: DEFAULT_SECRET_SIZE,
165            offset: idx * 16 + 8,
166        }.copy_from_slice(&hi.to_le_bytes());
167    }
168
169    unsafe {
170        result.assume_init()
171    }
172}
173
174#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
175fn accumulate_512_wasm(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
176    const LANES: usize = ACC_NB;
177
178    use core::arch::wasm32::*;
179
180    let mut idx = 0usize;
181    let xacc = acc.0.as_mut_ptr() as *mut v128;
182
183    unsafe {
184        while idx.wrapping_add(1) < LANES / 2 {
185            let data_vec_1 = v128_load(input[idx].as_ptr() as _);
186            let data_vec_2 = v128_load(input[idx.wrapping_add(1)].as_ptr() as _);
187
188            let key_vec_1 = v128_load(secret[idx].as_ptr() as _);
189            let key_vec_2 = v128_load(secret[idx.wrapping_add(1)].as_ptr() as _);
190
191            let data_key_1 = v128_xor(data_vec_1, key_vec_1);
192            let data_key_2 = v128_xor(data_vec_2, key_vec_2);
193
194            let data_swap_1 = i64x2_shuffle::<1, 0>(data_vec_1, data_vec_1);
195            let data_swap_2 = i64x2_shuffle::<1, 0>(data_vec_2, data_vec_2);
196
197            let mixed_lo = i32x4_shuffle::<0, 2, 4, 6>(data_key_1, data_key_2);
198            let mixed_hi = i32x4_shuffle::<1, 3, 5, 7>(data_key_1, data_key_2);
199
200            let prod_1 = u64x2_extmul_low_u32x4(mixed_lo, mixed_hi);
201            let prod_2 = u64x2_extmul_high_u32x4(mixed_lo, mixed_hi);
202
203            let sum_1 = i64x2_add(prod_1, data_swap_1);
204            let sum_2 = i64x2_add(prod_2, data_swap_2);
205
206            xacc.add(idx).write(i64x2_add(sum_1, *xacc.add(idx)));
207            xacc.add(idx.wrapping_add(1)).write(i64x2_add(sum_2, *xacc.add(idx.wrapping_add(1))));
208
209            idx = idx.wrapping_add(2);
210        }
211    }
212}
213
214#[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
215macro_rules! vld1q_u8 {
216    ($ptr:expr) => {
217        core::arch::aarch64::vld1q_u8($ptr)
218
219    }
220}
221
222//For some dumb reasons vld1q_u8 is unstable for arm
223#[cfg(all(target_arch = "arm", target_feature = "neon"))]
224macro_rules! vld1q_u8 {
225    ($ptr:expr) => {
226        core::ptr::read_unaligned($ptr as *const core::arch::arm::uint8x16_t)
227    }
228}
229
230#[cfg(target_feature = "neon")]
231fn accumulate_512_neon(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
232    //Full Neon version from xxhash source
233    const NEON_LANES: usize = ACC_NB;
234
235    unsafe {
236        #[cfg(target_arch = "arm")]
237        use core::arch::arm::*;
238        #[cfg(target_arch = "aarch64")]
239        use core::arch::aarch64::*;
240
241        let mut idx = 0usize;
242        let xacc = acc.0.as_mut_ptr() as *mut uint64x2_t;
243
244        while idx.wrapping_add(1) < NEON_LANES / 2 {
245            /* data_vec = xinput[i]; */
246            let data_vec_1 = vreinterpretq_u64_u8(vld1q_u8!(input[idx].as_ptr()));
247            let data_vec_2 = vreinterpretq_u64_u8(vld1q_u8!(input[idx.wrapping_add(1)].as_ptr()));
248            /* key_vec  = xsecret[i];  */
249            let key_vec_1  = vreinterpretq_u64_u8(vld1q_u8!(secret[idx].as_ptr()));
250            let key_vec_2  = vreinterpretq_u64_u8(vld1q_u8!(secret[idx.wrapping_add(1)].as_ptr()));
251            /* data_swap = swap(data_vec) */
252            let data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
253            let data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
254            /* data_key = data_vec ^ key_vec; */
255            let data_key_1 = veorq_u64(data_vec_1, key_vec_1);
256            let data_key_2 = veorq_u64(data_vec_2, key_vec_2);
257
258            let unzipped = vuzpq_u32(
259                vreinterpretq_u32_u64(data_key_1),
260                vreinterpretq_u32_u64(data_key_2)
261            );
262            /* data_key_lo = data_key & 0xFFFFFFFF */
263            let data_key_lo = unzipped.0;
264            /* data_key_hi = data_key >> 32 */
265            let data_key_hi = unzipped.1;
266
267            //xxhash does it with inline assembly, but idk if I want to embed it here
268            let sum_1 = vmlal_u32(data_swap_1, vget_low_u32(data_key_lo), vget_low_u32(data_key_hi));
269            #[cfg(target_arch = "aarch64")]
270            let sum_2 = vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
271            #[cfg(target_arch = "arm")]
272            let sum_2 = vmlal_u32(data_swap_2, vget_high_u32(data_key_lo), vget_high_u32(data_key_hi));
273
274            xacc.add(idx).write(vaddq_u64(*xacc.add(idx), sum_1));
275            xacc.add(idx.wrapping_add(1)).write(vaddq_u64(*xacc.add(idx.wrapping_add(1)), sum_2));
276
277            idx = idx.wrapping_add(2);
278        }
279    }
280}
281
282#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
283fn accumulate_512_sse2(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
284    unsafe {
285        #[cfg(target_arch = "x86")]
286        use core::arch::x86::*;
287        #[cfg(target_arch = "x86_64")]
288        use core::arch::x86_64::*;
289
290        let xacc = acc.0.as_mut_ptr() as *mut __m128i;
291
292        for idx in 0..secret.len() {
293            let data_vec = _mm_loadu_si128(input[idx].as_ptr() as _);
294            let key_vec = _mm_loadu_si128(secret[idx].as_ptr() as _);
295            let data_key = _mm_xor_si128(data_vec, key_vec);
296
297            let data_key_lo = _mm_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1));
298            let product = _mm_mul_epu32(data_key, data_key_lo);
299
300            let data_swap = _mm_shuffle_epi32(data_vec, _mm_shuffle(1,0,3,2));
301            let sum = _mm_add_epi64(*xacc.add(idx), data_swap);
302            xacc.add(idx).write(_mm_add_epi64(product, sum));
303        }
304    }
305}
306
307#[cfg(target_feature = "avx2")]
308fn accumulate_512_avx2(acc: &mut Acc, input: &StripeLanes, secret: &StripeLanes) {
309    unsafe {
310        #[cfg(target_arch = "x86")]
311        use core::arch::x86::*;
312        #[cfg(target_arch = "x86_64")]
313        use core::arch::x86_64::*;
314
315        let xacc = acc.0.as_mut_ptr() as *mut __m256i;
316
317        for idx in 0..secret.len() {
318            let data_vec = _mm256_loadu_si256(input[idx].as_ptr() as _);
319            let key_vec = _mm256_loadu_si256(secret[idx].as_ptr() as _);
320            let data_key = _mm256_xor_si256(data_vec, key_vec);
321
322            let data_key_lo = _mm256_srli_epi64(data_key, 32);
323            let product = _mm256_mul_epu32(data_key, data_key_lo);
324
325            let data_swap = _mm256_shuffle_epi32(data_vec, _mm_shuffle(1,0,3,2));
326            let sum = _mm256_add_epi64(*xacc.add(idx), data_swap);
327            xacc.add(idx).write(_mm256_add_epi64(product, sum));
328        }
329    }
330}
331
332#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"))))]
333fn accumulate_512_scalar(acc: &mut Acc, input: &[[u8; 8]; ACC_NB], secret: &[[u8; 8]; ACC_NB]) {
334    for idx in 0..ACC_NB {
335        let data_val = u64::from_ne_bytes(input[idx]).to_le();
336        let data_key = data_val ^ u64::from_ne_bytes(secret[idx]).to_le();
337
338        acc.0[idx ^ 1] = acc.0[idx ^ 1].wrapping_add(data_val);
339        acc.0[idx] = acc.0[idx].wrapping_add(mult32_to64((data_key & 0xFFFFFFFF) as u32, (data_key >> 32) as u32));
340    }
341}
342
343#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
344use accumulate_512_wasm as accumulate_512;
345#[cfg(target_feature = "neon")]
346use accumulate_512_neon as accumulate_512;
347#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
348use accumulate_512_sse2 as accumulate_512;
349#[cfg(target_feature = "avx2")]
350use accumulate_512_avx2 as accumulate_512;
351#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"))))]
352use accumulate_512_scalar as accumulate_512;
353
354#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
355fn scramble_acc_wasm(acc: &mut Acc, secret: &StripeLanes) {
356    use core::arch::wasm32::*;
357
358    let xacc = acc.0.as_mut_ptr() as *mut v128;
359    let prime = u64x2_splat(xxh32::PRIME_1 as _);
360
361    unsafe {
362        for idx in 0..secret.len() {
363            let acc_vec = v128_load(xacc.add(idx) as _);
364            let shifted = u64x2_shr(acc_vec, 47);
365            let data_vec = v128_xor(acc_vec, shifted);
366            let key_vec = v128_load(secret[idx].as_ptr() as _);
367            let mixed = v128_xor(data_vec, key_vec);
368            xacc.add(idx).write(i64x2_mul(mixed, prime));
369        }
370    }
371}
372
373#[cfg(target_feature = "neon")]
374fn scramble_acc_neon(acc: &mut Acc, secret: &StripeLanes) {
375    //Full Neon version from xxhash source
376    unsafe {
377        #[cfg(target_arch = "arm")]
378        use core::arch::arm::*;
379        #[cfg(target_arch = "aarch64")]
380        use core::arch::aarch64::*;
381
382        let xacc = acc.0.as_mut_ptr() as *mut uint64x2_t;
383
384        let prime_low = vdup_n_u32(xxh32::PRIME_1);
385        let prime_hi = vreinterpretq_u32_u64(vdupq_n_u64((xxh32::PRIME_1 as u64) << 32));
386
387        for idx in 0..secret.len() {
388           /* xacc[i] ^= (xacc[i] >> 47); */
389            let acc_vec  = *xacc.add(idx);
390            let shifted  = vshrq_n_u64(acc_vec, 47);
391            let data_vec = veorq_u64(acc_vec, shifted);
392
393            /* xacc[i] ^= xsecret[i]; */
394            //According to xxhash sources you can do unaligned read here
395            //but since Rust is kinda retarded about unaligned reads I'll avoid it for now
396            let key_vec  = vreinterpretq_u64_u8(vld1q_u8!(secret[idx].as_ptr()));
397            let data_key = veorq_u64(data_vec, key_vec);
398
399            let prod_hi = vmulq_u32(vreinterpretq_u32_u64(data_key), prime_hi);
400            let data_key_lo = vmovn_u64(data_key);
401            xacc.add(idx).write(vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, prime_low));
402        }
403    }
404}
405
406#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
407fn scramble_acc_sse2(acc: &mut Acc, secret: &StripeLanes) {
408    unsafe {
409        #[cfg(target_arch = "x86")]
410        use core::arch::x86::*;
411        #[cfg(target_arch = "x86_64")]
412        use core::arch::x86_64::*;
413
414        let xacc = acc.0.as_mut_ptr() as *mut __m128i;
415        let prime32 = _mm_set1_epi32(xxh32::PRIME_1 as i32);
416
417        for idx in 0..secret.len() {
418            let acc_vec = *xacc.add(idx);
419            let shifted = _mm_srli_epi64(acc_vec, 47);
420            let data_vec = _mm_xor_si128(acc_vec, shifted);
421
422            let key_vec = _mm_loadu_si128(secret[idx].as_ptr() as _);
423            let data_key = _mm_xor_si128(data_vec, key_vec);
424
425            let data_key_hi = _mm_shuffle_epi32(data_key, _mm_shuffle(0, 3, 0, 1));
426            let prod_lo = _mm_mul_epu32(data_key, prime32);
427            let prod_hi = _mm_mul_epu32(data_key_hi, prime32);
428            xacc.add(idx).write(_mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32)));
429        }
430    }
431}
432
433#[cfg(target_feature = "avx2")]
434fn scramble_acc_avx2(acc: &mut Acc, secret: &StripeLanes) {
435    unsafe {
436        #[cfg(target_arch = "x86")]
437        use core::arch::x86::*;
438        #[cfg(target_arch = "x86_64")]
439        use core::arch::x86_64::*;
440
441        let xacc = acc.0.as_mut_ptr() as *mut __m256i;
442        let prime32 = _mm256_set1_epi32(xxh32::PRIME_1 as i32);
443
444        for idx in 0..secret.len() {
445            let acc_vec = *xacc.add(idx);
446            let shifted = _mm256_srli_epi64(acc_vec, 47);
447            let data_vec = _mm256_xor_si256(acc_vec, shifted);
448
449            let key_vec = _mm256_loadu_si256(secret[idx].as_ptr() as _);
450            let data_key = _mm256_xor_si256(data_vec, key_vec);
451
452            let data_key_hi = _mm256_srli_epi64(data_key, 32);
453            let prod_lo = _mm256_mul_epu32(data_key, prime32);
454            let prod_hi = _mm256_mul_epu32(data_key_hi, prime32);
455            xacc.add(idx).write(_mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32)));
456        }
457    }
458}
459
460#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"))))]
461fn scramble_acc_scalar(acc: &mut Acc, secret: &[[u8; 8]; ACC_NB]) {
462    for idx in 0..secret.len() {
463        let key = u64::from_ne_bytes(secret[idx]).to_le();
464        let mut acc_val = xorshift64(acc.0[idx], 47);
465        acc_val ^= key;
466        acc.0[idx] = acc_val.wrapping_mul(xxh32::PRIME_1 as u64);
467    }
468}
469
470#[cfg(all(target_family = "wasm", target_feature = "simd128"))]
471use scramble_acc_wasm as scramble_acc;
472
473#[cfg(target_feature = "neon")]
474use scramble_acc_neon as scramble_acc;
475
476#[cfg(all(target_feature = "sse2", not(target_feature = "avx2")))]
477use scramble_acc_sse2 as scramble_acc;
478
479#[cfg(target_feature = "avx2")]
480use scramble_acc_avx2 as scramble_acc;
481
482#[cfg(not(any(target_feature = "avx2", target_feature = "sse2", target_feature = "neon", all(target_family = "wasm", target_feature = "simd128"))))]
483use scramble_acc_scalar as scramble_acc;
484
485#[inline(always)]
486fn accumulate_loop(acc: &mut Acc, input: *const u8, secret: *const u8, nb_stripes: usize) {
487    for idx in 0..nb_stripes {
488        unsafe {
489            let input = input.add(idx * STRIPE_LEN);
490            //Miri complains about it for dumb reason so for not turn off prefetch
491            //_mm_prefetch(input as _, 320);
492
493            accumulate_512(acc,
494                &*(input as *const _),
495                &*(secret.add(idx * SECRET_CONSUME_RATE) as *const _)
496            );
497        }
498    }
499}
500
501#[inline]
502fn hash_long_internal_loop(acc: &mut Acc, input: &[u8], secret: &[u8]) {
503    let nb_stripes = (secret.len() - STRIPE_LEN) / SECRET_CONSUME_RATE;
504    let block_len = STRIPE_LEN * nb_stripes;
505    let nb_blocks = (input.len() - 1) / block_len;
506
507    for idx in 0..nb_blocks {
508        accumulate_loop(acc, slice_offset_ptr!(input, idx * block_len), secret.as_ptr(), nb_stripes);
509        scramble_acc(acc, get_aligned_chunk_ref(secret, secret.len() - STRIPE_LEN));
510    }
511
512    //last partial block
513    debug_assert!(input.len() > STRIPE_LEN);
514
515    let nb_stripes = ((input.len() - 1) - (block_len * nb_blocks)) / STRIPE_LEN;
516    debug_assert!(nb_stripes <= (secret.len() / SECRET_CONSUME_RATE));
517    accumulate_loop(acc, slice_offset_ptr!(input, nb_blocks * block_len), secret.as_ptr(), nb_stripes);
518
519    //last stripe
520    accumulate_512(acc, get_aligned_chunk_ref(input, input.len() - STRIPE_LEN), get_aligned_chunk_ref(secret, secret.len() - STRIPE_LEN - SECRET_LASTACC_START));
521}
522
523#[inline(always)]
524fn xxh3_64_1to3(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
525    let c1; let c2; let c3;
526    unsafe {
527        c1 = *input.get_unchecked(0);
528        c2 = *input.get_unchecked(input.len() >> 1);
529        c3 = *input.get_unchecked(input.len() - 1);
530    };
531
532    let combo = (c1 as u32) << 16 | (c2 as u32) << 24 | (c3 as u32) << 0 | (input.len() as u32) << 8;
533    let flip = ((read_32le_unaligned(secret, 0) ^ read_32le_unaligned(secret, 4)) as u64).wrapping_add(seed);
534    xxh64::avalanche((combo as u64) ^ flip)
535}
536
537#[inline(always)]
538fn xxh3_64_4to8(input: &[u8], mut seed: u64, secret: &[u8]) -> u64 {
539    debug_assert!(input.len() >= 4 && input.len() <= 8);
540
541    seed ^= ((seed as u32).swap_bytes() as u64) << 32;
542
543    let input1 = read_32le_unaligned(input, 0);
544    let input2 = read_32le_unaligned(input, input.len() - 4);
545
546    let flip = (read_64le_unaligned(secret, 8) ^ read_64le_unaligned(secret, 16)).wrapping_sub(seed);
547    let input64 = (input2 as u64).wrapping_add((input1 as u64) << 32);
548    let keyed = input64 ^ flip;
549
550    strong_avalanche(keyed, input.len() as u64)
551}
552
553#[inline(always)]
554fn xxh3_64_9to16(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
555    debug_assert!(input.len() >= 9 && input.len() <= 16);
556
557    let flip1 = (read_64le_unaligned(secret, 24) ^ read_64le_unaligned(secret, 32)).wrapping_add(seed);
558    let flip2 = (read_64le_unaligned(secret, 40) ^ read_64le_unaligned(secret, 48)).wrapping_sub(seed);
559
560    let input_lo = read_64le_unaligned(input, 0) ^ flip1;
561    let input_hi = read_64le_unaligned(input, input.len() - 8) ^ flip2;
562
563    let acc = (input.len() as u64).wrapping_add(input_lo.swap_bytes())
564                                  .wrapping_add(input_hi)
565                                  .wrapping_add(mul128_fold64(input_lo, input_hi));
566
567    avalanche(acc)
568}
569
570#[inline(always)]
571fn xxh3_64_0to16(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
572    if input.len() > 8 {
573        xxh3_64_9to16(input, seed, secret)
574    } else if input.len() >= 4 {
575        xxh3_64_4to8(input, seed, secret)
576    } else if input.len() > 0 {
577        xxh3_64_1to3(input, seed, secret)
578    } else {
579        xxh64::avalanche(seed ^ (read_64le_unaligned(secret, 56) ^ read_64le_unaligned(secret, 64)))
580    }
581}
582
583#[inline(always)]
584fn xxh3_64_7to128(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
585    let mut acc = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
586
587    if input.len() > 32 {
588        if input.len() > 64 {
589            if input.len() > 96 {
590                acc = acc.wrapping_add(mix16_b(
591                    get_aligned_chunk_ref(input, 48),
592                    get_aligned_chunk_ref(secret, 96),
593                    seed
594                ));
595                acc = acc.wrapping_add(mix16_b(
596                    get_aligned_chunk_ref(input, input.len() - 64),
597                    get_aligned_chunk_ref(secret, 112),
598                    seed
599                ));
600            }
601
602            acc = acc.wrapping_add(mix16_b(
603                get_aligned_chunk_ref(input, 32),
604                get_aligned_chunk_ref(secret, 64),
605                seed
606            ));
607            acc = acc.wrapping_add(mix16_b(
608                get_aligned_chunk_ref(input, input.len() - 48),
609                get_aligned_chunk_ref(secret, 80),
610                seed
611            ));
612        }
613
614        acc = acc.wrapping_add(mix16_b(
615            get_aligned_chunk_ref(input, 16),
616            get_aligned_chunk_ref(secret, 32),
617            seed
618        ));
619        acc = acc.wrapping_add(mix16_b(
620            get_aligned_chunk_ref(input, input.len() - 32),
621            get_aligned_chunk_ref(secret, 48),
622            seed
623        ));
624    }
625
626    acc = acc.wrapping_add(mix16_b(
627        get_aligned_chunk_ref(input, 0),
628        get_aligned_chunk_ref(secret, 0),
629        seed
630    ));
631    acc = acc.wrapping_add(mix16_b(
632        get_aligned_chunk_ref(input, input.len() - 16),
633        get_aligned_chunk_ref(secret, 16),
634        seed
635    ));
636
637    avalanche(acc)
638}
639
640#[inline(never)]
641fn xxh3_64_129to240(input: &[u8], seed: u64, secret: &[u8]) -> u64 {
642    const START_OFFSET: usize = 3;
643    const LAST_OFFSET: usize = 17;
644
645    let mut acc = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
646    let nb_rounds = input.len() / 16;
647    debug_assert!(nb_rounds >= 8);
648
649    let mut idx = 0;
650    while idx < 8 {
651        acc = acc.wrapping_add(
652            mix16_b(
653                get_aligned_chunk_ref(input, 16*idx),
654                get_aligned_chunk_ref(secret, 16*idx),
655                seed
656            )
657        );
658        idx = idx.wrapping_add(1);
659    }
660    acc = avalanche(acc);
661
662    while idx < nb_rounds {
663        acc = acc.wrapping_add(
664            mix16_b(
665                get_aligned_chunk_ref(input, 16*idx),
666                get_aligned_chunk_ref(secret, 16*(idx-8) + START_OFFSET),
667                seed
668            )
669        );
670        idx = idx.wrapping_add(1);
671    }
672
673    acc = acc.wrapping_add(
674        mix16_b(
675            get_aligned_chunk_ref(input, input.len()-16),
676            get_aligned_chunk_ref(secret, SECRET_SIZE_MIN-LAST_OFFSET),
677            seed
678        )
679    );
680
681    avalanche(acc)
682}
683
684#[inline(always)]
685fn xxh3_64_internal(input: &[u8], seed: u64, secret: &[u8], long_hash_fn: LongHashFn) -> u64 {
686    debug_assert!(secret.len() >= SECRET_SIZE_MIN);
687
688    if input.len() <= 16 {
689        xxh3_64_0to16(input, seed, secret)
690    } else if input.len() <= 128 {
691        xxh3_64_7to128(input, seed, secret)
692    } else if input.len() <= MID_SIZE_MAX {
693        xxh3_64_129to240(input, seed, secret)
694    } else {
695        long_hash_fn(input, seed, secret)
696    }
697}
698
699#[inline(always)]
700fn xxh3_64_long_impl(input: &[u8], secret: &[u8]) -> u64 {
701    let mut acc = INITIAL_ACC;
702
703    hash_long_internal_loop(&mut acc, input, secret);
704
705    merge_accs(&mut acc, get_aligned_chunk_ref(secret, SECRET_MERGEACCS_START), (input.len() as u64).wrapping_mul(xxh64::PRIME_1))
706}
707
708#[inline(never)]
709fn xxh3_64_long_with_seed(input: &[u8], seed: u64, _secret: &[u8]) -> u64 {
710    match seed {
711        0 => xxh3_64_long_impl(input, &DEFAULT_SECRET),
712        seed => xxh3_64_long_impl(input, &custom_default_secret(seed)),
713    }
714}
715
716#[inline(never)]
717fn xxh3_64_long_default(input: &[u8], _seed: u64, _secret: &[u8]) -> u64 {
718    xxh3_64_long_impl(input, &DEFAULT_SECRET)
719}
720
721#[inline(never)]
722fn xxh3_64_long_with_secret(input: &[u8], _seed: u64, secret: &[u8]) -> u64 {
723    xxh3_64_long_impl(input, secret)
724}
725
726#[inline]
727///Returns 64bit hash for provided input.
728pub fn xxh3_64(input: &[u8]) -> u64 {
729    xxh3_64_internal(input, 0, &DEFAULT_SECRET, xxh3_64_long_default)
730}
731
732#[inline]
733///Returns 64bit hash for provided input using seed.
734///
735///Note: While overhead of deriving new secret from provided seed is low,
736///it would more efficient to generate secret at compile time using special function
737///`const_custom_default_secret` from `const_xxh3`
738pub fn xxh3_64_with_seed(input: &[u8], seed: u64) -> u64 {
739    xxh3_64_internal(input, seed, &DEFAULT_SECRET, xxh3_64_long_with_seed)
740}
741
742#[inline]
743///Returns 64bit hash for provided input using custom secret.
744pub fn xxh3_64_with_secret(input: &[u8], secret: &[u8]) -> u64 {
745    xxh3_64_internal(input, 0, secret, xxh3_64_long_with_secret)
746}
747
748const INTERNAL_BUFFER_SIZE: usize = 256;
749const STRIPES_PER_BLOCK: usize = (DEFAULT_SECRET_SIZE - STRIPE_LEN) / SECRET_CONSUME_RATE;
750
751#[derive(Clone)]
752#[repr(align(64))]
753struct Aligned64<T>(T);
754
755#[inline]
756//Internal function shared between Xxh3 and Xxh3Default
757fn xxh3_stateful_consume_stripes(acc: &mut Acc, nb_stripes: usize, nb_stripes_acc: usize, input: *const u8, secret: &[u8; DEFAULT_SECRET_SIZE]) -> usize {
758    if (STRIPES_PER_BLOCK - nb_stripes_acc) <= nb_stripes {
759        let stripes_to_end = STRIPES_PER_BLOCK - nb_stripes_acc;
760        let stripes_after_end = nb_stripes - stripes_to_end;
761
762        accumulate_loop(acc, input, slice_offset_ptr!(secret, nb_stripes_acc * SECRET_CONSUME_RATE), stripes_to_end);
763        scramble_acc(acc, get_aligned_chunk_ref(secret, DEFAULT_SECRET_SIZE - STRIPE_LEN));
764        accumulate_loop(acc, unsafe { input.add(stripes_to_end * STRIPE_LEN) }, secret.as_ptr(), stripes_after_end);
765        stripes_after_end
766    } else {
767        accumulate_loop(acc, input, slice_offset_ptr!(secret, nb_stripes_acc * SECRET_CONSUME_RATE), nb_stripes);
768        nb_stripes_acc.wrapping_add(nb_stripes)
769    }
770}
771
772//Internal function shared between Xxh3 and Xxh3Default
773fn xxh3_stateful_update(
774    input: &[u8],
775    total_len: &mut u64,
776    acc: &mut Acc,
777    buffer: &mut Aligned64<[mem::MaybeUninit<u8>; INTERNAL_BUFFER_SIZE]>, buffered_size: &mut u16,
778    nb_stripes_acc: &mut usize,
779    secret: &Aligned64<[u8; DEFAULT_SECRET_SIZE]>
780) {
781    const INTERNAL_BUFFER_STRIPES: usize = INTERNAL_BUFFER_SIZE / STRIPE_LEN;
782
783    let mut input_ptr = input.as_ptr();
784    let mut input_len = input.len();
785    *total_len = total_len.wrapping_add(input_len as u64);
786
787    if (input_len + *buffered_size as usize) <= INTERNAL_BUFFER_SIZE {
788        unsafe {
789            ptr::copy_nonoverlapping(input_ptr, (buffer.0.as_mut_ptr() as *mut u8).offset(*buffered_size as isize), input_len)
790        }
791        *buffered_size += input_len as u16;
792        return;
793    }
794
795    if *buffered_size > 0 {
796        let fill_len = INTERNAL_BUFFER_SIZE - *buffered_size as usize;
797
798        unsafe {
799            ptr::copy_nonoverlapping(input_ptr, (buffer.0.as_mut_ptr() as *mut u8).offset(*buffered_size as isize), fill_len);
800            input_ptr = input_ptr.add(fill_len);
801            input_len -= fill_len;
802        }
803
804        *nb_stripes_acc = xxh3_stateful_consume_stripes(acc, INTERNAL_BUFFER_STRIPES, *nb_stripes_acc, buffer.0.as_ptr() as *const u8, &secret.0);
805
806        *buffered_size = 0;
807    }
808
809    debug_assert_ne!(input_len, 0);
810    if input_len > INTERNAL_BUFFER_SIZE {
811        loop {
812            *nb_stripes_acc = xxh3_stateful_consume_stripes(acc, INTERNAL_BUFFER_STRIPES, *nb_stripes_acc, input_ptr, &secret.0);
813            input_ptr = unsafe {
814                input_ptr.add(INTERNAL_BUFFER_SIZE)
815            };
816            input_len = input_len - INTERNAL_BUFFER_SIZE;
817
818            if input_len <= INTERNAL_BUFFER_SIZE {
819                break;
820            }
821        }
822
823        unsafe {
824            ptr::copy_nonoverlapping(input_ptr.offset(-(STRIPE_LEN as isize)), (buffer.0.as_mut_ptr() as *mut u8).add(buffer.0.len() - STRIPE_LEN), STRIPE_LEN)
825        }
826    }
827
828    debug_assert_ne!(input_len, 0);
829    debug_assert_eq!(*buffered_size, 0);
830    unsafe {
831        ptr::copy_nonoverlapping(input_ptr, buffer.0.as_mut_ptr() as *mut u8, input_len)
832    }
833    *buffered_size = input_len as u16;
834}
835
836#[inline(always)]
837//Internal function shared between Xxh3 and Xxh3Default
838fn xxh3_stateful_digest_internal(acc: &mut Acc, nb_stripes_acc: usize, buffer: &[u8], old_buffer: &[mem::MaybeUninit<u8>], secret: &Aligned64<[u8; DEFAULT_SECRET_SIZE]>) {
839    if buffer.len() >= STRIPE_LEN {
840        let nb_stripes = (buffer.len() - 1) / STRIPE_LEN;
841        xxh3_stateful_consume_stripes(acc, nb_stripes, nb_stripes_acc, buffer.as_ptr(), &secret.0);
842
843        accumulate_512(acc,
844            get_aligned_chunk_ref(buffer, buffer.len() - STRIPE_LEN),
845            get_aligned_chunk_ref(&secret.0, DEFAULT_SECRET_SIZE - STRIPE_LEN - SECRET_LASTACC_START)
846        );
847    } else {
848        let mut last_stripe = mem::MaybeUninit::<[u8; STRIPE_LEN]>::uninit();
849        let catchup_size = STRIPE_LEN - buffer.len();
850        debug_assert!(buffer.len() > 0);
851
852        let last_stripe = unsafe {
853            ptr::copy_nonoverlapping((old_buffer.as_ptr() as *const u8).add(INTERNAL_BUFFER_SIZE - buffer.len() - catchup_size), last_stripe.as_mut_ptr() as _, catchup_size);
854            ptr::copy_nonoverlapping(buffer.as_ptr(), (last_stripe.as_mut_ptr() as *mut u8).add(catchup_size), buffer.len());
855            slice::from_raw_parts(last_stripe.as_ptr() as *const u8, buffer.len() + catchup_size)
856        };
857
858        accumulate_512(acc, get_aligned_chunk_ref(&last_stripe, 0), get_aligned_chunk_ref(&secret.0, DEFAULT_SECRET_SIZE - STRIPE_LEN - SECRET_LASTACC_START));
859    }
860}
861
862#[derive(Clone)]
863///Default XXH3 Streaming algorithm
864///
865///This is optimized version of Xxh3 struct that uses default seed/secret
866///
867///Optimal for use in hash maps
868pub struct Xxh3Default {
869    acc: Acc,
870    buffer: Aligned64<[mem::MaybeUninit<u8>; INTERNAL_BUFFER_SIZE]>,
871    buffered_size: u16,
872    nb_stripes_acc: usize,
873    total_len: u64,
874}
875
876impl Xxh3Default {
877    const DEFAULT_SECRET: Aligned64<[u8; DEFAULT_SECRET_SIZE]> = Aligned64(DEFAULT_SECRET);
878
879    #[inline(always)]
880    ///Creates new hasher with default settings
881    pub const fn new() -> Self {
882        Self {
883            acc: INITIAL_ACC,
884            buffer: Aligned64([mem::MaybeUninit::uninit(); INTERNAL_BUFFER_SIZE]),
885            buffered_size: 0,
886            nb_stripes_acc: 0,
887            total_len: 0,
888        }
889    }
890
891    #[inline(always)]
892    ///Resets state
893    pub fn reset(&mut self) {
894        self.acc = INITIAL_ACC;
895        self.total_len = 0;
896        self.buffered_size = 0;
897        self.nb_stripes_acc = 0;
898    }
899
900    #[inline(always)]
901    fn buffered_input(&self) -> &[u8] {
902        let ptr = self.buffer.0.as_ptr();
903        unsafe {
904            slice::from_raw_parts(ptr as *const u8, self.buffered_size as usize)
905        }
906    }
907
908    #[inline(always)]
909    fn processed_buffer(&self) -> &[mem::MaybeUninit<u8>] {
910        let ptr = self.buffer.0.as_ptr();
911        unsafe {
912            slice::from_raw_parts(ptr.add(self.buffered_size as usize), self.buffer.0.len() - self.buffered_size as usize)
913        }
914    }
915
916    #[inline(always)]
917    ///Hashes provided chunk
918    pub fn update(&mut self, input: &[u8]) {
919        xxh3_stateful_update(input, &mut self.total_len, &mut self.acc, &mut self.buffer, &mut self.buffered_size, &mut self.nb_stripes_acc, &Self::DEFAULT_SECRET);
920    }
921
922    #[inline(never)]
923    fn digest_mid_sized(&self) -> u64 {
924        let mut acc = self.acc.clone();
925        xxh3_stateful_digest_internal(&mut acc, self.nb_stripes_acc, self.buffered_input(), self.processed_buffer(), &Self::DEFAULT_SECRET);
926
927        merge_accs(&mut acc, get_aligned_chunk_ref(&Self::DEFAULT_SECRET.0, SECRET_MERGEACCS_START),
928                    self.total_len.wrapping_mul(xxh64::PRIME_1))
929    }
930
931    #[inline(never)]
932    fn digest_mid_sized_128(&self) -> u128 {
933        let mut acc = self.acc.clone();
934        xxh3_stateful_digest_internal(&mut acc, self.nb_stripes_acc, self.buffered_input(), self.processed_buffer(), &Self::DEFAULT_SECRET);
935
936        let low = merge_accs(&mut acc, get_aligned_chunk_ref(&Self::DEFAULT_SECRET.0, SECRET_MERGEACCS_START),
937                                self.total_len.wrapping_mul(xxh64::PRIME_1));
938        let high = merge_accs(&mut acc, get_aligned_chunk_ref(&Self::DEFAULT_SECRET.0,
939                                  DEFAULT_SECRET_SIZE - mem::size_of_val(&self.acc) - SECRET_MERGEACCS_START),
940                              !self.total_len.wrapping_mul(xxh64::PRIME_2));
941        ((high as u128) << 64) | (low as u128)
942    }
943
944    ///Computes hash.
945    pub fn digest(&self) -> u64 {
946        //Separating digest mid sized allows us to inline this function, which benefits
947        //code generation when hashing fixed size types and/or if the seed is known.
948        if self.total_len > MID_SIZE_MAX as u64 {
949            self.digest_mid_sized()
950        } else {
951            xxh3_64_internal(self.buffered_input(), 0, &Self::DEFAULT_SECRET.0, xxh3_64_long_default)
952        }
953    }
954
955    ///Computes hash as 128bit integer.
956    pub fn digest128(&self) -> u128 {
957        //Separating digest mid sized allows us to inline this function, which benefits
958        //code generation when hashing fixed size types and/or if the seed is known.
959        if self.total_len > MID_SIZE_MAX as u64 {
960            self.digest_mid_sized_128()
961        } else {
962            xxh3_128_internal(self.buffered_input(), 0, &Self::DEFAULT_SECRET.0, xxh3_128_long_default)
963        }
964    }
965}
966
967impl Default for Xxh3Default {
968    #[inline(always)]
969    fn default() -> Self {
970        Self::new()
971    }
972}
973
974
975impl hash::Hasher for Xxh3Default {
976    #[inline(always)]
977    fn finish(&self) -> u64 {
978        self.digest()
979    }
980
981    #[inline(always)]
982    fn write(&mut self, input: &[u8]) {
983        self.update(input)
984    }
985}
986
987#[cfg(feature = "std")]
988impl std::io::Write for Xxh3Default {
989    #[inline]
990    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
991        self.update(buf);
992        Ok(buf.len())
993    }
994
995    #[inline]
996    fn flush(&mut self) -> std::io::Result<()> {
997        Ok(())
998    }
999}
1000
1001#[derive(Clone)]
1002///XXH3 Streaming algorithm
1003///
1004///Internal state uses rather large buffers, therefore it might be beneficial
1005///to store hasher on heap rather than stack.
1006///Implementation makes no attempts at that, leaving choice entirely to user.
1007///
1008///Note that it is better to use [Xxh3Default](struct.Xxh3Default.html) in hash maps
1009///due to Rust hash interface which requires to create new instance of hasher every time.
1010pub struct Xxh3 {
1011    acc: Acc,
1012    custom_secret: Aligned64<[u8; DEFAULT_SECRET_SIZE]>,
1013    buffer: Aligned64<[mem::MaybeUninit<u8>; INTERNAL_BUFFER_SIZE]>,
1014    buffered_size: u16,
1015    nb_stripes_acc: usize,
1016    total_len: u64,
1017    seed: u64,
1018}
1019
1020impl Xxh3 {
1021    #[inline(always)]
1022    ///Creates new hasher with default settings
1023    pub const fn new() -> Self {
1024        Self::with_custom_ops(0, DEFAULT_SECRET)
1025    }
1026
1027    #[inline]
1028    ///Creates new hasher with all options.
1029    const fn with_custom_ops(seed: u64, secret: [u8; DEFAULT_SECRET_SIZE]) -> Self {
1030        Self {
1031            acc: INITIAL_ACC,
1032            custom_secret: Aligned64(secret),
1033            buffer: Aligned64([mem::MaybeUninit::uninit(); INTERNAL_BUFFER_SIZE]),
1034            buffered_size: 0,
1035            nb_stripes_acc: 0,
1036            total_len: 0,
1037            seed,
1038        }
1039    }
1040
1041    #[inline(always)]
1042    ///Creates new hasher with custom seed.
1043    pub const fn with_secret(secret: [u8; DEFAULT_SECRET_SIZE]) -> Self {
1044        Self::with_custom_ops(0, secret)
1045    }
1046
1047    #[inline(always)]
1048    ///Creates new hasher with custom seed.
1049    pub fn with_seed(seed: u64) -> Self {
1050        Self::with_custom_ops(seed, custom_default_secret(seed))
1051    }
1052
1053    #[inline(always)]
1054    ///Resets state
1055    pub fn reset(&mut self) {
1056        self.acc = INITIAL_ACC;
1057        self.total_len = 0;
1058        self.buffered_size = 0;
1059        self.nb_stripes_acc = 0;
1060    }
1061
1062    #[inline(always)]
1063    fn buffered_input(&self) -> &[u8] {
1064        let ptr = self.buffer.0.as_ptr();
1065        unsafe {
1066            slice::from_raw_parts(ptr as *const u8, self.buffered_size as usize)
1067        }
1068    }
1069
1070    #[inline(always)]
1071    fn processed_buffer(&self) -> &[mem::MaybeUninit<u8>] {
1072        let ptr = self.buffer.0.as_ptr();
1073        unsafe {
1074            slice::from_raw_parts(ptr.add(self.buffered_size as usize), self.buffer.0.len() - self.buffered_size as usize)
1075        }
1076    }
1077
1078    #[inline]
1079    ///Hashes provided chunk
1080    pub fn update(&mut self, input: &[u8]) {
1081        xxh3_stateful_update(input, &mut self.total_len, &mut self.acc, &mut self.buffer, &mut self.buffered_size, &mut self.nb_stripes_acc, &self.custom_secret);
1082    }
1083
1084    #[inline(never)]
1085    fn digest_mid_sized(&self) -> u64 {
1086        let mut acc = self.acc.clone();
1087        xxh3_stateful_digest_internal(&mut acc, self.nb_stripes_acc, self.buffered_input(), self.processed_buffer(), &self.custom_secret);
1088
1089        merge_accs(&mut acc, get_aligned_chunk_ref(&self.custom_secret.0, SECRET_MERGEACCS_START),
1090                    self.total_len.wrapping_mul(xxh64::PRIME_1))
1091    }
1092
1093    #[inline(never)]
1094    fn digest_mid_sized_128(&self) -> u128 {
1095        let mut acc = self.acc.clone();
1096        xxh3_stateful_digest_internal(&mut acc, self.nb_stripes_acc, self.buffered_input(), self.processed_buffer(), &self.custom_secret);
1097
1098        let low = merge_accs(&mut acc, get_aligned_chunk_ref(&self.custom_secret.0, SECRET_MERGEACCS_START), self.total_len.wrapping_mul(xxh64::PRIME_1));
1099        let high = merge_accs(&mut acc, get_aligned_chunk_ref(&self.custom_secret.0, self.custom_secret.0.len() - mem::size_of_val(&self.acc) - SECRET_MERGEACCS_START), !self.total_len.wrapping_mul(xxh64::PRIME_2));
1100        ((high as u128) << 64) | (low as u128)
1101    }
1102
1103    ///Computes hash.
1104    pub fn digest(&self) -> u64 {
1105        //Separating digest mid sized allows us to inline this function, which benefits
1106        //code generation when hashing fixed size types and/or if the seed is known.
1107        if self.total_len > MID_SIZE_MAX as u64 {
1108            self.digest_mid_sized()
1109        } else if self.seed > 0 {
1110            //Technically we should not need to use it.
1111            //But in all actuality original xxh3 implementation uses default secret for input with size less or equal to MID_SIZE_MAX
1112            xxh3_64_internal(self.buffered_input(), self.seed, &DEFAULT_SECRET, xxh3_64_long_with_seed)
1113        } else {
1114            xxh3_64_internal(self.buffered_input(), self.seed, &self.custom_secret.0, xxh3_64_long_with_secret)
1115        }
1116    }
1117
1118    ///Computes hash as 128bit integer.
1119    pub fn digest128(&self) -> u128 {
1120        //Separating digest mid sized allows us to inline this function, which benefits
1121        //code generation when hashing fixed size types and/or if the seed is known.
1122        if self.total_len > MID_SIZE_MAX as u64 {
1123            self.digest_mid_sized_128()
1124        } else if self.seed > 0 {
1125            //Technically we should not need to use it.
1126            //But in all actuality original xxh3 implementation uses default secret for input with size less or equal to MID_SIZE_MAX
1127            xxh3_128_internal(self.buffered_input(), self.seed, &DEFAULT_SECRET, xxh3_128_long_with_seed)
1128        } else {
1129            xxh3_128_internal(self.buffered_input(), self.seed, &self.custom_secret.0, xxh3_128_long_with_secret)
1130        }
1131    }
1132}
1133
1134impl Default for Xxh3 {
1135    #[inline(always)]
1136    fn default() -> Self {
1137        Self::new()
1138    }
1139}
1140
1141impl core::hash::Hasher for Xxh3 {
1142    #[inline(always)]
1143    fn finish(&self) -> u64 {
1144        self.digest()
1145    }
1146
1147    #[inline(always)]
1148    fn write(&mut self, input: &[u8]) {
1149        self.update(input)
1150    }
1151}
1152
1153#[cfg(feature = "std")]
1154impl std::io::Write for Xxh3 {
1155    #[inline]
1156    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
1157        self.update(buf);
1158        Ok(buf.len())
1159    }
1160
1161    #[inline]
1162    fn flush(&mut self) -> std::io::Result<()> {
1163        Ok(())
1164    }
1165}
1166
1167#[derive(Clone, Copy)]
1168///Hash builder for `Xxh3`
1169pub struct Xxh3Builder {
1170    seed: u64,
1171    secret: [u8; DEFAULT_SECRET_SIZE],
1172}
1173
1174impl Xxh3Builder {
1175    #[inline(always)]
1176    ///Creates new instance with default params.
1177    pub const fn new() -> Self {
1178        Self {
1179            seed: 0,
1180            secret: DEFAULT_SECRET,
1181        }
1182    }
1183
1184    #[inline(always)]
1185    ///Sets `seed` for `xxh3` algorithm
1186    pub const fn with_seed(mut self, seed: u64) -> Self {
1187        self.seed = seed;
1188        self
1189    }
1190
1191    #[inline(always)]
1192    ///Sets custom `secret` for `xxh3` algorithm
1193    pub const fn with_secret(mut self, secret: [u8; DEFAULT_SECRET_SIZE]) -> Self {
1194        self.secret = secret;
1195        self
1196    }
1197
1198    #[inline(always)]
1199    ///Creates `Xxh3` instance
1200    pub const fn build(self) -> Xxh3 {
1201        Xxh3::with_custom_ops(self.seed, self.secret)
1202    }
1203}
1204
1205impl core::hash::BuildHasher for Xxh3Builder {
1206    type Hasher = Xxh3;
1207
1208    #[inline(always)]
1209    fn build_hasher(&self) -> Self::Hasher {
1210        self.build()
1211    }
1212}
1213
1214impl Default for Xxh3Builder {
1215    #[inline(always)]
1216    fn default() -> Self {
1217        Self::new()
1218    }
1219}
1220
1221#[derive(Clone, Copy)]
1222///Hash builder for `Xxh3Default`
1223pub struct Xxh3DefaultBuilder;
1224
1225impl Xxh3DefaultBuilder {
1226    #[inline(always)]
1227    ///Creates new instance with default params.
1228    pub const fn new() -> Self {
1229        Self
1230    }
1231
1232    #[inline(always)]
1233    ///Creates `Xxh3` instance
1234    pub const fn build(self) -> Xxh3Default {
1235        Xxh3Default::new()
1236    }
1237}
1238
1239impl core::hash::BuildHasher for Xxh3DefaultBuilder {
1240    type Hasher = Xxh3Default;
1241
1242    #[inline(always)]
1243    fn build_hasher(&self) -> Self::Hasher {
1244        self.build()
1245    }
1246}
1247
1248impl Default for Xxh3DefaultBuilder {
1249    #[inline(always)]
1250    fn default() -> Self {
1251        Self::new()
1252    }
1253}
1254
1255//
1256//128bit
1257//
1258
1259#[inline]
1260fn xxh3_128_long_impl(input: &[u8], secret: &[u8]) -> u128 {
1261    let mut acc = INITIAL_ACC;
1262
1263    hash_long_internal_loop(&mut acc, input, secret);
1264
1265    debug_assert!(secret.len() >= mem::size_of::<Acc>() + SECRET_MERGEACCS_START);
1266    let lo = merge_accs(&mut acc, get_aligned_chunk_ref(secret, SECRET_MERGEACCS_START), (input.len() as u64).wrapping_mul(xxh64::PRIME_1));
1267    let hi = merge_accs(&mut acc,
1268                        get_aligned_chunk_ref(secret, secret.len() - mem::size_of::<Acc>() - SECRET_MERGEACCS_START),
1269                        !(input.len() as u64).wrapping_mul(xxh64::PRIME_2));
1270
1271    lo as u128 | (hi as u128) << 64
1272}
1273
1274#[inline(always)]
1275fn xxh3_128_9to16(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
1276    let flip_lo = (read_64le_unaligned(secret, 32) ^ read_64le_unaligned(secret, 40)).wrapping_sub(seed);
1277    let flip_hi = (read_64le_unaligned(secret, 48) ^ read_64le_unaligned(secret, 56)).wrapping_add(seed);
1278    let input_lo = read_64le_unaligned(input, 0);
1279    let mut input_hi = read_64le_unaligned(input, input.len() - 8);
1280
1281    let (mut mul_low, mut mul_high) = mul64_to128(input_lo ^ input_hi ^ flip_lo, xxh64::PRIME_1);
1282
1283    mul_low = mul_low.wrapping_add((input.len() as u64 - 1) << 54);
1284    input_hi ^= flip_hi;
1285    mul_high = mul_high.wrapping_add(
1286        input_hi.wrapping_add(mult32_to64(input_hi as u32, xxh32::PRIME_2 - 1))
1287    );
1288
1289    mul_low ^= mul_high.swap_bytes();
1290
1291    let (result_low, mut result_hi) = mul64_to128(mul_low, xxh64::PRIME_2);
1292    result_hi = result_hi.wrapping_add(
1293        mul_high.wrapping_mul(xxh64::PRIME_2)
1294    );
1295
1296    to_u128!(avalanche(result_low), avalanche(result_hi))
1297}
1298
1299#[inline(always)]
1300fn xxh3_128_4to8(input: &[u8], mut seed: u64, secret: &[u8]) -> u128 {
1301    seed ^= ((seed as u32).swap_bytes() as u64) << 32;
1302
1303    let lo = read_32le_unaligned(input, 0);
1304    let hi = read_32le_unaligned(input, input.len() - 4);
1305    let input_64 = (lo as u64).wrapping_add((hi as u64) << 32);
1306
1307    let flip = (read_64le_unaligned(secret, 16) ^ read_64le_unaligned(secret, 24)).wrapping_add(seed);
1308    let keyed = input_64 ^ flip;
1309
1310    let (mut lo, mut hi) = mul64_to128(keyed, xxh64::PRIME_1.wrapping_add((input.len() as u64) << 2));
1311
1312    hi = hi.wrapping_add(lo << 1);
1313    lo ^= hi >> 3;
1314
1315    lo = xorshift64(lo, 35).wrapping_mul(0x9FB21C651E98DF25);
1316    lo = xorshift64(lo, 28);
1317    hi = avalanche(hi);
1318
1319    lo as u128 | (hi as u128) << 64
1320}
1321
1322#[inline(always)]
1323fn xxh3_128_1to3(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
1324    let c1; let c2; let c3;
1325    unsafe {
1326        c1 = *input.get_unchecked(0);
1327        c2 = *input.get_unchecked(input.len() >> 1);
1328        c3 = *input.get_unchecked(input.len() - 1);
1329    };
1330    let input_lo = (c1 as u32) << 16 | (c2 as u32) << 24 | (c3 as u32) << 0 | (input.len() as u32) << 8;
1331    let input_hi = input_lo.swap_bytes().rotate_left(13);
1332
1333    let flip_lo = (read_32le_unaligned(secret, 0) as u64 ^ read_32le_unaligned(secret, 4) as u64).wrapping_add(seed);
1334    let flip_hi = (read_32le_unaligned(secret, 8) as u64 ^ read_32le_unaligned(secret, 12) as u64).wrapping_sub(seed);
1335    let keyed_lo = input_lo as u64 ^ flip_lo;
1336    let keyed_hi = input_hi as u64 ^ flip_hi;
1337
1338    xxh64::avalanche(keyed_lo) as u128 | (xxh64::avalanche(keyed_hi) as u128) << 64
1339}
1340
1341#[inline(always)]
1342fn xxh3_128_0to16(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
1343    if input.len() > 8 {
1344        xxh3_128_9to16(input, seed, secret)
1345    } else if input.len() >= 4 {
1346        xxh3_128_4to8(input, seed, secret)
1347    } else if input.len() > 0 {
1348        xxh3_128_1to3(input, seed, secret)
1349    } else {
1350        let flip_lo = read_64le_unaligned(secret, 64) ^ read_64le_unaligned(secret, 72);
1351        let flip_hi = read_64le_unaligned(secret, 80) ^ read_64le_unaligned(secret, 88);
1352        xxh64::avalanche(seed ^ flip_lo) as u128 | (xxh64::avalanche(seed ^ flip_hi) as u128) << 64
1353    }
1354}
1355
1356#[inline(always)]
1357fn xxh3_128_7to128(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
1358    let mut lo = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
1359    let mut hi = 0;
1360
1361    if input.len() > 32 {
1362        if input.len() > 64 {
1363            if input.len() > 96 {
1364
1365                mix32_b(&mut lo, &mut hi,
1366                    get_aligned_chunk_ref(input, 48),
1367                    get_aligned_chunk_ref(input, input.len() - 64),
1368                    get_aligned_chunk_ref(secret, 96),
1369                    seed
1370                );
1371            }
1372
1373            mix32_b(&mut lo, &mut hi,
1374                get_aligned_chunk_ref(input, 32),
1375                get_aligned_chunk_ref(input, input.len() - 48),
1376                get_aligned_chunk_ref(secret, 64),
1377                seed
1378            );
1379        }
1380
1381        mix32_b(&mut lo, &mut hi,
1382            get_aligned_chunk_ref(input, 16),
1383            get_aligned_chunk_ref(input, input.len() - 32),
1384            get_aligned_chunk_ref(secret, 32),
1385            seed
1386        );
1387    }
1388
1389    mix32_b(&mut lo, &mut hi,
1390        get_aligned_chunk_ref(input, 0),
1391        get_aligned_chunk_ref(input, input.len() - 16),
1392        get_aligned_chunk_ref(secret, 0),
1393        seed
1394    );
1395
1396    to_u128!(
1397        avalanche(
1398            lo.wrapping_add(hi)
1399        ),
1400        0u64.wrapping_sub(
1401            avalanche(
1402                lo.wrapping_mul(xxh64::PRIME_1)
1403                  .wrapping_add(hi.wrapping_mul(xxh64::PRIME_4))
1404                  .wrapping_add((input.len() as u64).wrapping_sub(seed).wrapping_mul(xxh64::PRIME_2))
1405            )
1406        )
1407    )
1408}
1409
1410#[inline(never)]
1411fn xxh3_128_129to240(input: &[u8], seed: u64, secret: &[u8]) -> u128 {
1412    const START_OFFSET: usize = 3;
1413    const LAST_OFFSET: usize = 17;
1414    let nb_rounds = input.len() / 32;
1415    debug_assert!(nb_rounds >= 4);
1416
1417    let mut lo = (input.len() as u64).wrapping_mul(xxh64::PRIME_1);
1418    let mut hi = 0;
1419
1420    let mut idx = 0;
1421    while idx < 4 {
1422        let offset_idx = 32 * idx;
1423        mix32_b(&mut lo, &mut hi,
1424            get_aligned_chunk_ref(input, offset_idx),
1425            get_aligned_chunk_ref(input, offset_idx + 16),
1426            get_aligned_chunk_ref(secret, offset_idx),
1427            seed
1428        );
1429        idx = idx.wrapping_add(1);
1430    }
1431
1432    lo = avalanche(lo);
1433    hi = avalanche(hi);
1434
1435    while idx < nb_rounds {
1436        mix32_b(&mut lo, &mut hi,
1437            get_aligned_chunk_ref(input, 32 * idx),
1438            get_aligned_chunk_ref(input, (32 * idx) + 16),
1439            get_aligned_chunk_ref(secret, START_OFFSET.wrapping_add(32 * (idx - 4))),
1440            seed
1441        );
1442        idx = idx.wrapping_add(1);
1443    }
1444
1445    mix32_b(&mut lo, &mut hi,
1446        get_aligned_chunk_ref(input, input.len() - 16),
1447        get_aligned_chunk_ref(input, input.len() - 32),
1448        get_aligned_chunk_ref(secret, SECRET_SIZE_MIN - LAST_OFFSET - 16),
1449        0u64.wrapping_sub(seed)
1450    );
1451
1452    to_u128!(
1453        avalanche(
1454            lo.wrapping_add(hi)
1455        ),
1456        0u64.wrapping_sub(
1457            avalanche(
1458                lo.wrapping_mul(xxh64::PRIME_1)
1459                  .wrapping_add(hi.wrapping_mul(xxh64::PRIME_4))
1460                  .wrapping_add((input.len() as u64).wrapping_sub(seed).wrapping_mul(xxh64::PRIME_2))
1461            )
1462        )
1463    )
1464}
1465
1466#[inline(always)]
1467fn xxh3_128_internal(input: &[u8], seed: u64, secret: &[u8], long_hash_fn: LongHashFn128) -> u128 {
1468    debug_assert!(secret.len() >= SECRET_SIZE_MIN);
1469
1470    if input.len() <= 16 {
1471        xxh3_128_0to16(input, seed, secret)
1472    } else if input.len() <= 128 {
1473        xxh3_128_7to128(input, seed, secret)
1474    } else if input.len() <= MID_SIZE_MAX {
1475        xxh3_128_129to240(input, seed, secret)
1476    } else {
1477        long_hash_fn(input, seed, secret)
1478    }
1479}
1480
1481#[inline(never)]
1482fn xxh3_128_long_default(input: &[u8], _seed: u64, _secret: &[u8]) -> u128 {
1483    xxh3_128_long_impl(input, &DEFAULT_SECRET)
1484}
1485
1486#[inline(never)]
1487fn xxh3_128_long_with_seed(input: &[u8], seed: u64, _secret: &[u8]) -> u128 {
1488    match seed {
1489        0 => xxh3_128_long_impl(input, &DEFAULT_SECRET),
1490        seed => xxh3_128_long_impl(input, &custom_default_secret(seed)),
1491    }
1492}
1493
1494#[inline(never)]
1495fn xxh3_128_long_with_secret(input: &[u8], _seed: u64, secret: &[u8]) -> u128 {
1496    xxh3_128_long_impl(input, secret)
1497}
1498
1499#[inline]
1500///Returns 128bit hash for provided input.
1501pub fn xxh3_128(input: &[u8]) -> u128 {
1502    xxh3_128_internal(input, 0, &DEFAULT_SECRET, xxh3_128_long_default)
1503}
1504
1505#[inline]
1506///Returns 128 hash for provided input using seed.
1507///
1508///Note: While overhead of deriving new secret from provided seed is low,
1509///it would more efficient to generate secret at compile time using special function
1510///`const_custom_default_secret` from `const_xxh3`
1511pub fn xxh3_128_with_seed(input: &[u8], seed: u64) -> u128 {
1512    xxh3_128_internal(input, seed, &DEFAULT_SECRET, xxh3_128_long_with_seed)
1513}
1514
1515#[inline]
1516///Returns 128 hash for provided input using custom secret.
1517pub fn xxh3_128_with_secret(input: &[u8], secret: &[u8]) -> u128 {
1518    xxh3_128_internal(input, 0, secret, xxh3_128_long_with_secret)
1519}