redb/tree_store/page_store/
xxh3.rs

1// Copyright (c) 2022 Christopher Berner
2//
3// MIT License
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23// Copied from xxh3 crate, commit hash e91f09d1e930e179c11d5cfda6d14284bfb006f8
24
25use std::mem::size_of;
26
27const STRIPE_LENGTH: usize = 64;
28const SECRET_CONSUME_RATE: usize = 8;
29
30const MIN_SECRET_SIZE: usize = 136;
31const DEFAULT_SECRET: [u8; 192] = [
32    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
33    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
34    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
35    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
36    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
37    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
38    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
39    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
40    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
41    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
42    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
43    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
44];
45
46const PRIME32: [u64; 3] = [0x9E3779B1, 0x85EBCA77, 0xC2B2AE3D];
47const PRIME64: [u64; 5] = [
48    0x9E3779B185EBCA87,
49    0xC2B2AE3D27D4EB4F,
50    0x165667B19E3779F9,
51    0x85EBCA77C2B2AE63,
52    0x27D4EB2F165667C5,
53];
54
55const INIT_ACCUMULATORS: [u64; 8] = [
56    PRIME32[2], PRIME64[0], PRIME64[1], PRIME64[2], PRIME64[3], PRIME32[1], PRIME64[4], PRIME32[0],
57];
58
59pub fn hash64_with_seed(data: &[u8], seed: u64) -> u64 {
60    if data.len() <= 240 {
61        hash64_0to240(data, &DEFAULT_SECRET, seed)
62    } else {
63        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
64        {
65            if is_x86_feature_detected!("avx2") {
66                unsafe {
67                    return hash64_large_avx2(data, seed);
68                }
69            }
70        }
71        #[cfg(target_arch = "aarch64")]
72        {
73            unsafe { hash64_large_neon(data, seed) }
74        }
75        #[cfg(not(target_arch = "aarch64"))]
76        hash64_large_generic(
77            data,
78            seed,
79            gen_secret_generic,
80            scramble_accumulators_generic,
81            accumulate_stripe_generic,
82        )
83    }
84}
85
86pub fn hash128_with_seed(data: &[u8], seed: u64) -> u128 {
87    if data.len() <= 240 {
88        hash128_0to240(data, &DEFAULT_SECRET, seed)
89    } else {
90        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
91        if is_x86_feature_detected!("avx2") {
92            unsafe {
93                return hash128_large_avx2(data, seed);
94            }
95        }
96        #[cfg(target_arch = "aarch64")]
97        unsafe {
98            hash128_large_neon(data, seed)
99        }
100        #[cfg(not(target_arch = "aarch64"))]
101        hash128_large_generic(
102            data,
103            seed,
104            gen_secret_generic,
105            scramble_accumulators_generic,
106            accumulate_stripe_generic,
107        )
108    }
109}
110
111fn get_u32(data: &[u8], i: usize) -> u32 {
112    u32::from_le_bytes(
113        data[i * size_of::<u32>()..(i + 1) * size_of::<u32>()]
114            .try_into()
115            .unwrap(),
116    )
117}
118
119fn get_u64(data: &[u8], i: usize) -> u64 {
120    u64::from_le_bytes(
121        data[i * size_of::<u64>()..(i + 1) * size_of::<u64>()]
122            .try_into()
123            .unwrap(),
124    )
125}
126
127fn xxh64_avalanche(mut x: u64) -> u64 {
128    x ^= x >> 33;
129    x = x.wrapping_mul(PRIME64[1]);
130    x ^= x >> 29;
131    x = x.wrapping_mul(PRIME64[2]);
132    x ^= x >> 32;
133    x
134}
135
136fn xxh3_avalanche(mut x: u64) -> u64 {
137    x = xorshift(x, 37);
138    x = x.wrapping_mul(0x165667919E3779F9);
139    x = xorshift(x, 32);
140    x
141}
142
143#[inline(always)]
144fn merge_accumulators(
145    accumulators: [u64; INIT_ACCUMULATORS.len()],
146    secret: &[u8],
147    init: u64,
148) -> u64 {
149    let mut result = init;
150    for i in 0..4 {
151        let a1 = accumulators[2 * i];
152        let a2 = accumulators[2 * i + 1];
153        let s1 = get_u64(&secret[16 * i..], 0);
154        let s2 = get_u64(&secret[16 * i..], 1);
155        result = result.wrapping_add(mul128_and_xor(a1 ^ s1, a2 ^ s2));
156    }
157    xxh3_avalanche(result)
158}
159
160#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
161#[target_feature(enable = "avx2")]
162unsafe fn scramble_accumulators_avx2(
163    accumulators: &mut [u64; INIT_ACCUMULATORS.len()],
164    secret: &[u8],
165) {
166    unsafe {
167        #[cfg(target_arch = "x86")]
168        use std::arch::x86::*;
169        #[cfg(target_arch = "x86_64")]
170        use std::arch::x86_64::*;
171
172        #[allow(clippy::cast_possible_truncation)]
173        let simd_prime = _mm256_set1_epi32(PRIME32[0] as i32);
174        let secret_ptr = secret.as_ptr();
175        let accumulators_ptr = accumulators.as_mut_ptr();
176
177        for i in 0..(STRIPE_LENGTH / 32) {
178            let a = _mm256_loadu_si256((accumulators_ptr as *const __m256i).add(i));
179            let shifted = _mm256_srli_epi64::<47>(a);
180            let b = _mm256_xor_si256(a, shifted);
181
182            let s = _mm256_loadu_si256((secret_ptr as *const __m256i).add(i));
183            let c = _mm256_xor_si256(b, s);
184            let c_high = _mm256_shuffle_epi32::<49>(c);
185
186            let low = _mm256_mul_epu32(c, simd_prime);
187            let high = _mm256_mul_epu32(c_high, simd_prime);
188            let high = _mm256_slli_epi64::<32>(high);
189            let result = _mm256_add_epi64(low, high);
190            _mm256_storeu_si256((accumulators_ptr as *mut __m256i).add(i), result);
191        }
192    }
193}
194
195#[cfg(target_arch = "aarch64")]
196unsafe fn scramble_accumulators_neon(
197    accumulators: &mut [u64; INIT_ACCUMULATORS.len()],
198    secret: &[u8],
199) {
200    #[cfg(target_arch = "aarch64")]
201    use std::arch::aarch64::*;
202    #[cfg(target_arch = "arm")]
203    use std::arch::arm::*;
204
205    unsafe {
206        let prime = vdup_n_u32(PRIME32[0].try_into().unwrap());
207
208        let accum_ptr = accumulators.as_mut_ptr();
209        let secret_ptr = secret.as_ptr();
210        assert!(secret.len() >= STRIPE_LENGTH);
211        for i in 0..(STRIPE_LENGTH / 16) {
212            // xorshift
213            let accum = vld1q_u64(accum_ptr.add(i * 2));
214            let shifted = vshrq_n_u64(accum, 47);
215            let accum = veorq_u64(accum, shifted);
216
217            // xor with secret
218            let s = vld1q_u8(secret_ptr.add(i * 16));
219            let accum = veorq_u64(accum, vreinterpretq_u64_u8(s));
220
221            // mul with prime. Sadly there's no vmulq_u64
222            let accum_low = vmovn_u64(accum);
223            let accum_high = vshrn_n_u64(accum, 32);
224            let prod_high = vshlq_n_u64(vmull_u32(accum_high, prime), 32);
225            let accum = vmlal_u32(prod_high, accum_low, prime);
226            vst1q_u64(accum_ptr.add(i * 2), accum);
227        }
228    }
229}
230
231#[cfg(not(target_arch = "aarch64"))]
232fn scramble_accumulators_generic(accumulators: &mut [u64; INIT_ACCUMULATORS.len()], secret: &[u8]) {
233    for (i, x) in accumulators.iter_mut().enumerate() {
234        let s = get_u64(secret, i);
235        *x = xorshift(*x, 47);
236        *x ^= s;
237        *x = x.wrapping_mul(PRIME32[0]);
238    }
239}
240
241fn xorshift(x: u64, shift: u64) -> u64 {
242    x ^ (x >> shift)
243}
244
245fn rrmxmx(mut x: u64, y: u64) -> u64 {
246    x ^= x.rotate_left(49) ^ x.rotate_left(24);
247    x = x.wrapping_mul(0x9FB21C651E98DF25);
248    x ^= (x >> 35).wrapping_add(y);
249    x = x.wrapping_mul(0x9FB21C651E98DF25);
250    xorshift(x, 28)
251}
252
253fn mul128_and_xor(x: u64, y: u64) -> u64 {
254    let z = u128::from(x) * u128::from(y);
255    #[allow(clippy::cast_possible_truncation)]
256    (z as u64 ^ (z >> 64) as u64)
257}
258
259fn mix16(data: &[u8], secret: &[u8], seed: u64) -> u64 {
260    let x1 = get_u64(data, 0);
261    let x2 = get_u64(data, 1);
262    let s1 = get_u64(secret, 0).wrapping_add(seed);
263    let s2 = get_u64(secret, 1).wrapping_sub(seed);
264
265    mul128_and_xor(x1 ^ s1, x2 ^ s2)
266}
267
268fn mix32(state: (u64, u64), data1: &[u8], data2: &[u8], secret: &[u8], seed: u64) -> (u64, u64) {
269    let (mut r_low, mut r_high) = state;
270
271    r_low = r_low.wrapping_add(mix16(data1, secret, seed));
272    r_low ^= get_u64(data2, 0).wrapping_add(get_u64(data2, 1));
273    r_high = r_high.wrapping_add(mix16(data2, &secret[16..], seed));
274    r_high ^= get_u64(data1, 0).wrapping_add(get_u64(data1, 1));
275
276    (r_low, r_high)
277}
278
279fn gen_secret_generic(seed: u64) -> [u8; DEFAULT_SECRET.len()] {
280    let mut secret = [0u8; DEFAULT_SECRET.len()];
281    let iterations = DEFAULT_SECRET.len() / 16;
282    for i in 0..iterations {
283        let x = get_u64(&DEFAULT_SECRET, 2 * i).wrapping_add(seed);
284        secret[16 * i..16 * i + 8].copy_from_slice(&x.to_le_bytes());
285        let x = get_u64(&DEFAULT_SECRET, 2 * i + 1).wrapping_sub(seed);
286        secret[16 * i + 8..16 * (i + 1)].copy_from_slice(&x.to_le_bytes());
287    }
288    secret
289}
290
291#[allow(clippy::cast_possible_truncation)]
292#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
293#[target_feature(enable = "avx2")]
294unsafe fn gen_secret_avx2(seed: u64) -> [u8; DEFAULT_SECRET.len()] {
295    unsafe {
296        #[cfg(target_arch = "x86")]
297        use std::arch::x86::*;
298        #[cfg(target_arch = "x86_64")]
299        use std::arch::x86_64::*;
300
301        #[allow(clippy::cast_possible_wrap)]
302        let xxh_i64 = 0u64.wrapping_sub(seed) as i64;
303        #[allow(clippy::cast_possible_wrap)]
304        let seed = seed as i64;
305
306        let simd_seed = _mm256_set_epi64x(xxh_i64, seed, xxh_i64, seed);
307
308        let mut output = [0u8; DEFAULT_SECRET.len()];
309        let output_ptr = output.as_mut_ptr();
310        let secret_ptr = DEFAULT_SECRET.as_ptr();
311        for i in 0..6 {
312            let s = _mm256_loadu_si256((secret_ptr as *const __m256i).add(i));
313            let x = _mm256_add_epi64(s, simd_seed);
314            _mm256_storeu_si256((output_ptr as *mut __m256i).add(i), x);
315        }
316
317        output
318    }
319}
320
321#[cfg(target_arch = "aarch64")]
322unsafe fn accumulate_stripe_neon(accumulators: &mut [u64; 8], data: &[u8], secret: &[u8]) {
323    #[cfg(target_arch = "aarch64")]
324    use std::arch::aarch64::*;
325    #[cfg(target_arch = "arm")]
326    use std::arch::arm::*;
327
328    unsafe {
329        let accum_ptr = accumulators.as_mut_ptr();
330        let data_ptr = data.as_ptr();
331        let secret_ptr = secret.as_ptr();
332        assert!(data.len() >= STRIPE_LENGTH);
333        assert!(secret.len() >= STRIPE_LENGTH);
334        for i in 0..(STRIPE_LENGTH / 16) {
335            let x = vld1q_u8(data_ptr.add(i * 16));
336            let s = vld1q_u8(secret_ptr.add(i * 16));
337            let x64 = vreinterpretq_u64_u8(x);
338            let y = vextq_u64(x64, x64, 1);
339
340            let result = vld1q_u64(accum_ptr.add(i * 2));
341            let result = vaddq_u64(result, y);
342
343            let z = vreinterpretq_u64_u8(veorq_u8(x, s));
344            let z_low = vmovn_u64(z);
345            let z_high = vshrn_n_u64(z, 32);
346
347            let result = vmlal_u32(result, z_low, z_high);
348            vst1q_u64(accum_ptr.add(i * 2), result);
349        }
350    }
351}
352
353#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
354#[target_feature(enable = "avx2")]
355unsafe fn accumulate_stripe_avx2(accumulators: &mut [u64; 8], data: &[u8], secret: &[u8]) {
356    unsafe {
357        #[cfg(target_arch = "x86")]
358        use std::arch::x86::*;
359        #[cfg(target_arch = "x86_64")]
360        use std::arch::x86_64::*;
361
362        let data_ptr = data.as_ptr();
363        let secret_ptr = secret.as_ptr();
364        let accumulator_ptr = accumulators.as_mut_ptr();
365
366        assert!(data.len() >= STRIPE_LENGTH);
367        assert!(secret.len() >= STRIPE_LENGTH);
368        for i in 0..(STRIPE_LENGTH / 32) {
369            let x = _mm256_loadu_si256((data_ptr as *const __m256i).add(i));
370            let s = _mm256_loadu_si256((secret_ptr as *const __m256i).add(i));
371
372            let z = _mm256_xor_si256(x, s);
373            let z_low = _mm256_shuffle_epi32::<49>(z);
374
375            let product = _mm256_mul_epu32(z, z_low);
376            let shuffled = _mm256_shuffle_epi32::<78>(x);
377
378            let result = _mm256_loadu_si256((accumulator_ptr as *const __m256i).add(i));
379            let result = _mm256_add_epi64(result, shuffled);
380            let result = _mm256_add_epi64(result, product);
381            _mm256_storeu_si256((accumulator_ptr as *mut __m256i).add(i), result);
382        }
383    }
384}
385
386#[cfg(not(target_arch = "aarch64"))]
387fn accumulate_stripe_generic(accumulators: &mut [u64; 8], data: &[u8], secret: &[u8]) {
388    for i in 0..accumulators.len() {
389        let x = get_u64(&data[i * 8..], 0);
390        let y = x ^ get_u64(&secret[i * 8..], 0);
391        accumulators[i ^ 1] = accumulators[i ^ 1].wrapping_add(x);
392        let z = (y & 0xFFFF_FFFF) * (y >> 32);
393        accumulators[i] = accumulators[i].wrapping_add(z)
394    }
395}
396
397#[inline(always)]
398fn accumulate_block(
399    accumulators: &mut [u64; 8],
400    data: &[u8],
401    secret: &[u8],
402    stripes: usize,
403    accum_stripe: unsafe fn(&mut [u64; 8], &[u8], &[u8]),
404) {
405    for i in 0..stripes {
406        unsafe {
407            accum_stripe(
408                accumulators,
409                &data[i * STRIPE_LENGTH..],
410                &secret[i * SECRET_CONSUME_RATE..],
411            );
412        }
413    }
414}
415
416#[inline(always)]
417fn hash_large_helper(
418    data: &[u8],
419    secret: &[u8],
420    scramble: unsafe fn(&mut [u64; 8], &[u8]),
421    accum_stripe: unsafe fn(&mut [u64; 8], &[u8], &[u8]),
422) -> [u64; INIT_ACCUMULATORS.len()] {
423    let mut accumulators = INIT_ACCUMULATORS;
424
425    let stripes_per_block = (secret.len() - STRIPE_LENGTH) / SECRET_CONSUME_RATE;
426    let block_len = STRIPE_LENGTH * stripes_per_block;
427    let blocks = (data.len() - 1) / block_len;
428
429    // accumulate all the blocks
430    for i in 0..blocks {
431        accumulate_block(
432            &mut accumulators,
433            &data[i * block_len..],
434            secret,
435            stripes_per_block,
436            accum_stripe,
437        );
438        unsafe { scramble(&mut accumulators, &secret[secret.len() - STRIPE_LENGTH..]) };
439    }
440
441    // trailing partial block
442    let stripes = ((data.len() - 1) - block_len * blocks) / STRIPE_LENGTH;
443    accumulate_block(
444        &mut accumulators,
445        &data[blocks * block_len..],
446        secret,
447        stripes,
448        accum_stripe,
449    );
450
451    // trailing stripe
452    unsafe {
453        accum_stripe(
454            &mut accumulators,
455            &data[data.len() - STRIPE_LENGTH..],
456            &secret[secret.len() - STRIPE_LENGTH - 7..],
457        );
458    }
459
460    accumulators
461}
462
463fn hash64_0(secret: &[u8], seed: u64) -> u64 {
464    let mut result = seed;
465    result ^= get_u64(secret, 7);
466    result ^= get_u64(secret, 8);
467    xxh64_avalanche(result)
468}
469
470fn hash64_1to3(data: &[u8], secret: &[u8], seed: u64) -> u64 {
471    let x1 = data[0] as u32;
472    let x2 = data[data.len() >> 1] as u32;
473    let x3 = (*data.last().unwrap()) as u32;
474    #[allow(clippy::cast_possible_truncation)]
475    let x4 = data.len() as u32;
476
477    let combined = ((x1 << 16) | (x2 << 24) | x3 | (x4 << 8)) as u64;
478    let mut result = (get_u32(secret, 0) ^ get_u32(secret, 1)) as u64;
479    result = result.wrapping_add(seed);
480    result ^= combined;
481    xxh64_avalanche(result)
482}
483
484fn hash64_4to8(data: &[u8], secret: &[u8], mut seed: u64) -> u64 {
485    #[allow(clippy::cast_possible_truncation)]
486    let truncate_seed = seed as u32;
487    seed ^= u64::from(truncate_seed.swap_bytes()) << 32;
488    let x1 = get_u32(data, 0) as u64;
489    let x2 = get_u32(&data[data.len() - 4..], 0) as u64;
490    let x = x2 | (x1 << 32);
491    let s = (get_u64(secret, 1) ^ get_u64(secret, 2)).wrapping_sub(seed);
492    rrmxmx(x ^ s, data.len() as u64)
493}
494
495fn hash64_9to16(data: &[u8], secret: &[u8], seed: u64) -> u64 {
496    let s1 = (get_u64(secret, 3) ^ get_u64(secret, 4)).wrapping_add(seed);
497    let s2 = (get_u64(secret, 5) ^ get_u64(secret, 6)).wrapping_sub(seed);
498    let x1 = get_u64(data, 0) ^ s1;
499    let x2 = get_u64(&data[data.len() - 8..], 0) ^ s2;
500    let mut result = data.len() as u64;
501    result = result.wrapping_add(x1.swap_bytes());
502    result = result.wrapping_add(x2);
503    result = result.wrapping_add(mul128_and_xor(x1, x2));
504    xxh3_avalanche(result)
505}
506
507fn hash64_0to16(data: &[u8], secret: &[u8], seed: u64) -> u64 {
508    if data.is_empty() {
509        hash64_0(secret, seed)
510    } else if data.len() < 4 {
511        hash64_1to3(data, secret, seed)
512    } else if data.len() <= 8 {
513        hash64_4to8(data, secret, seed)
514    } else {
515        hash64_9to16(data, secret, seed)
516    }
517}
518
519fn hash64_17to128(data: &[u8], secret: &[u8], seed: u64) -> u64 {
520    let mut result = PRIME64[0].wrapping_mul(data.len() as u64);
521    let iterations = (data.len() - 1) / 32;
522    for i in (0..=iterations).rev() {
523        result = result.wrapping_add(mix16(&data[16 * i..], &secret[32 * i..], seed));
524        result = result.wrapping_add(mix16(
525            &data[data.len() - 16 * (i + 1)..],
526            &secret[32 * i + 16..],
527            seed,
528        ));
529    }
530    xxh3_avalanche(result)
531}
532
533fn hash64_129to240(data: &[u8], secret: &[u8], seed: u64) -> u64 {
534    let mut result = PRIME64[0].wrapping_mul(data.len() as u64);
535    for i in 0..8 {
536        result = result.wrapping_add(mix16(&data[16 * i..], &secret[16 * i..], seed));
537    }
538    result = xxh3_avalanche(result);
539    let iterations = data.len() / 16;
540    for i in 8..iterations {
541        result = result.wrapping_add(mix16(&data[16 * i..], &secret[16 * (i - 8) + 3..], seed));
542    }
543    result = result.wrapping_add(mix16(
544        &data[data.len() - 16..],
545        &secret[MIN_SECRET_SIZE - 17..],
546        seed,
547    ));
548
549    xxh3_avalanche(result)
550}
551
552fn hash64_0to240(data: &[u8], secret: &[u8], seed: u64) -> u64 {
553    if data.len() <= 16 {
554        hash64_0to16(data, secret, seed)
555    } else if data.len() <= 128 {
556        hash64_17to128(data, secret, seed)
557    } else {
558        hash64_129to240(data, secret, seed)
559    }
560}
561
562#[cfg(target_arch = "aarch64")]
563unsafe fn hash64_large_neon(data: &[u8], seed: u64) -> u64 {
564    hash64_large_generic(
565        data,
566        seed,
567        gen_secret_generic,
568        scramble_accumulators_neon,
569        accumulate_stripe_neon,
570    )
571}
572
573#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
574#[target_feature(enable = "avx2")]
575unsafe fn hash64_large_avx2(data: &[u8], seed: u64) -> u64 {
576    hash64_large_generic(
577        data,
578        seed,
579        gen_secret_avx2,
580        scramble_accumulators_avx2,
581        accumulate_stripe_avx2,
582    )
583}
584
585#[inline(always)]
586fn hash64_large_generic(
587    data: &[u8],
588    seed: u64,
589    generate: unsafe fn(u64) -> [u8; DEFAULT_SECRET.len()],
590    scramble: unsafe fn(&mut [u64; 8], &[u8]),
591    accum_stripe: unsafe fn(&mut [u64; 8], &[u8], &[u8]),
592) -> u64 {
593    let secret = unsafe { generate(seed) };
594    let accumulators = hash_large_helper(data, &secret, scramble, accum_stripe);
595
596    merge_accumulators(
597        accumulators,
598        &secret[11..],
599        PRIME64[0].wrapping_mul(data.len() as u64),
600    )
601}
602
603fn hash128_0(secret: &[u8], seed: u64) -> u128 {
604    let high = (hash64_0(&secret[3 * 8..], seed) as u128) << 64;
605    let low = hash64_0(&secret[8..], seed) as u128;
606    high | low
607}
608
609fn hash128_1to3(data: &[u8], secret: &[u8], seed: u64) -> u128 {
610    let x1 = data[0] as u32;
611    let x2 = data[data.len() >> 1] as u32;
612    let x3 = (*data.last().unwrap()) as u32;
613    #[allow(clippy::cast_possible_truncation)]
614    let x4 = data.len() as u32;
615
616    let combined_low = (x1 << 16) | (x2 << 24) | x3 | (x4 << 8);
617    let combined_high: u64 = combined_low.swap_bytes().rotate_left(13).into();
618    let s_low = ((get_u32(secret, 0) ^ get_u32(secret, 1)) as u64).wrapping_add(seed);
619    let s_high = ((get_u32(secret, 2) ^ get_u32(secret, 3)) as u64).wrapping_sub(seed);
620    let high = (xxh64_avalanche(combined_high ^ s_high) as u128) << 64;
621    let low = xxh64_avalanche(combined_low as u64 ^ s_low) as u128;
622    high | low
623}
624
625fn hash128_4to8(data: &[u8], secret: &[u8], mut seed: u64) -> u128 {
626    #[allow(clippy::cast_possible_truncation)]
627    let truncate_seed = seed as u32;
628    seed ^= u64::from(truncate_seed.swap_bytes()) << 32;
629    let x_low = get_u32(data, 0) as u64;
630    let x_high = u32::from_le_bytes(data[data.len() - 4..].try_into().unwrap()) as u64;
631    let x = x_low | (x_high << 32);
632    let s = (get_u64(secret, 2) ^ get_u64(secret, 3)).wrapping_add(seed);
633
634    let mut y = (x ^ s) as u128;
635    y = y.wrapping_mul(PRIME64[0].wrapping_add((data.len() << 2) as u64) as u128);
636
637    #[allow(clippy::cast_possible_truncation)]
638    let mut r_low = y as u64;
639    let mut r_high: u64 = (y >> 64).try_into().unwrap();
640    r_high = r_high.wrapping_add(r_low << 1);
641    r_low ^= r_high >> 3;
642    r_low = xorshift(r_low, 35);
643    r_low = r_low.wrapping_mul(0x9FB21C651E98DF25);
644    r_low = xorshift(r_low, 28);
645    r_high = xxh3_avalanche(r_high);
646
647    ((r_high as u128) << 64) | r_low as u128
648}
649
650fn hash128_9to16(data: &[u8], secret: &[u8], seed: u64) -> u128 {
651    let s_low = (get_u64(secret, 4) ^ get_u64(secret, 5)).wrapping_sub(seed);
652    let s_high = (get_u64(secret, 6) ^ get_u64(secret, 7)).wrapping_add(seed);
653    let x_low = get_u64(data, 0);
654    let x_high = u64::from_le_bytes(data[data.len() - 8..].try_into().unwrap());
655    let mixed = x_low ^ x_high ^ s_low;
656    let x_high = x_high ^ s_high;
657
658    let result = (mixed as u128).wrapping_mul(PRIME64[0] as u128);
659    #[allow(clippy::cast_possible_truncation)]
660    let mut r_low = result as u64;
661    let mut r_high = (result >> 64) as u64;
662    r_low = r_low.wrapping_add((data.len() as u64 - 1) << 54);
663    r_high = r_high.wrapping_add(x_high);
664    r_high = r_high.wrapping_add((x_high & 0xFFFF_FFFF).wrapping_mul(PRIME32[1] - 1));
665    r_low ^= r_high.swap_bytes();
666
667    let result2 = (r_low as u128).wrapping_mul(PRIME64[1] as u128);
668    #[allow(clippy::cast_possible_truncation)]
669    let mut r2_low = result2 as u64;
670    let mut r2_high = (result2 >> 64) as u64;
671    r2_high = r2_high.wrapping_add(r_high.wrapping_mul(PRIME64[1]));
672    r2_low = xxh3_avalanche(r2_low);
673    r2_high = xxh3_avalanche(r2_high);
674
675    ((r2_high as u128) << 64) | r2_low as u128
676}
677
678fn hash128_0to16(data: &[u8], secret: &[u8], seed: u64) -> u128 {
679    if data.is_empty() {
680        hash128_0(secret, seed)
681    } else if data.len() < 4 {
682        hash128_1to3(data, secret, seed)
683    } else if data.len() <= 8 {
684        hash128_4to8(data, secret, seed)
685    } else {
686        hash128_9to16(data, secret, seed)
687    }
688}
689
690fn hash128_17to128(data: &[u8], secret: &[u8], seed: u64) -> u128 {
691    let len = data.len();
692    let mut state = (PRIME64[0].wrapping_mul(len as u64), 0);
693    if len > 32 {
694        if len > 64 {
695            if len > 96 {
696                state = mix32(state, &data[48..], &data[len - 64..], &secret[96..], seed);
697            }
698            state = mix32(state, &data[32..], &data[len - 48..], &secret[64..], seed);
699        }
700        state = mix32(state, &data[16..], &data[len - 32..], &secret[32..], seed);
701    }
702    state = mix32(state, data, &data[len - 16..], secret, seed);
703
704    let mut r_low = state.0.wrapping_add(state.1);
705    let mut r_high = state.0.wrapping_mul(PRIME64[0]);
706    r_high = r_high.wrapping_add(state.1.wrapping_mul(PRIME64[3]));
707    r_high = r_high.wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64[1]));
708    r_low = xxh3_avalanche(r_low);
709    r_high = 0u64.wrapping_sub(xxh3_avalanche(r_high));
710
711    ((r_high as u128) << 64) | r_low as u128
712}
713
714fn hash128_129to240(data: &[u8], secret: &[u8], seed: u64) -> u128 {
715    let len = data.len();
716    let iterations = len / 32;
717    let mut state = (PRIME64[0].wrapping_mul(len as u64), 0);
718
719    for i in 0..4 {
720        state = mix32(
721            state,
722            &data[32 * i..],
723            &data[32 * i + 16..],
724            &secret[32 * i..],
725            seed,
726        );
727    }
728    state.0 = xxh3_avalanche(state.0);
729    state.1 = xxh3_avalanche(state.1);
730
731    for i in 4..iterations {
732        state = mix32(
733            state,
734            &data[32 * i..],
735            &data[32 * i + 16..],
736            &secret[3 + 32 * (i - 4)..],
737            seed,
738        );
739    }
740    state = mix32(
741        state,
742        &data[len - 16..],
743        &data[len - 32..],
744        &secret[MIN_SECRET_SIZE - 33..],
745        0u64.wrapping_sub(seed),
746    );
747
748    let mut r_low = state.0.wrapping_add(state.1);
749    let mut r_high = state.0.wrapping_mul(PRIME64[0]);
750    r_high = r_high.wrapping_add(state.1.wrapping_mul(PRIME64[3]));
751    r_high = r_high.wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64[1]));
752    r_low = xxh3_avalanche(r_low);
753    r_high = 0u64.wrapping_sub(xxh3_avalanche(r_high));
754
755    ((r_high as u128) << 64) | r_low as u128
756}
757
758fn hash128_0to240(data: &[u8], secret: &[u8], seed: u64) -> u128 {
759    if data.len() <= 16 {
760        hash128_0to16(data, secret, seed)
761    } else if data.len() <= 128 {
762        hash128_17to128(data, secret, seed)
763    } else {
764        hash128_129to240(data, secret, seed)
765    }
766}
767
768#[cfg(target_arch = "aarch64")]
769unsafe fn hash128_large_neon(data: &[u8], seed: u64) -> u128 {
770    hash128_large_generic(
771        data,
772        seed,
773        gen_secret_generic,
774        scramble_accumulators_neon,
775        accumulate_stripe_neon,
776    )
777}
778
779#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
780#[target_feature(enable = "avx2")]
781unsafe fn hash128_large_avx2(data: &[u8], seed: u64) -> u128 {
782    hash128_large_generic(
783        data,
784        seed,
785        gen_secret_avx2,
786        scramble_accumulators_avx2,
787        accumulate_stripe_avx2,
788    )
789}
790
791#[inline(always)]
792fn hash128_large_generic(
793    data: &[u8],
794    seed: u64,
795    generate: unsafe fn(u64) -> [u8; DEFAULT_SECRET.len()],
796    scramble: unsafe fn(&mut [u64; 8], &[u8]),
797    accum_stripe: unsafe fn(&mut [u64; 8], &[u8], &[u8]),
798) -> u128 {
799    let secret = unsafe { generate(seed) };
800    let accumulators = hash_large_helper(data, &secret, scramble, accum_stripe);
801
802    let low = merge_accumulators(
803        accumulators,
804        &secret[11..],
805        PRIME64[0].wrapping_mul(data.len() as u64),
806    );
807    let high = merge_accumulators(
808        accumulators,
809        &secret[secret.len() - 64 - 11..],
810        !(PRIME64[1].wrapping_mul(data.len() as u64)),
811    );
812
813    ((high as u128) << 64) | low as u128
814}
815
816#[cfg(test)]
817mod test {
818    use crate::tree_store::page_store::xxh3::hash64_with_seed;
819
820    #[test]
821    fn test_empty() {
822        let actual = hash64_with_seed(&[], 0);
823        assert_eq!(actual, 3244421341483603138);
824    }
825}