redb/tree_store/page_store/
xxh3.rs

1// Copyright (c) 2022 Christopher Berner
2//
3// MIT License
4//
5// Permission is hereby granted, free of charge, to any person obtaining a copy
6// of this software and associated documentation files (the "Software"), to deal
7// in the Software without restriction, including without limitation the rights
8// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9// copies of the Software, and to permit persons to whom the Software is
10// furnished to do so, subject to the following conditions:
11//
12// The above copyright notice and this permission notice shall be included in all
13// copies or substantial portions of the Software.
14//
15// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21// SOFTWARE.
22
23// Copied from xxh3 crate, commit hash a2bfd3a
24
25use std::mem::size_of;
26
27const STRIPE_LENGTH: usize = 64;
28const SECRET_CONSUME_RATE: usize = 8;
29
30const MIN_SECRET_SIZE: usize = 136;
31const DEFAULT_SECRET: [u8; 192] = [
32    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
33    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
34    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
35    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
36    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
37    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
38    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
39    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
40    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
41    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
42    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
43    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
44];
45
46const PRIME32: [u64; 3] = [0x9E3779B1, 0x85EBCA77, 0xC2B2AE3D];
47const PRIME64: [u64; 5] = [
48    0x9E3779B185EBCA87,
49    0xC2B2AE3D27D4EB4F,
50    0x165667B19E3779F9,
51    0x85EBCA77C2B2AE63,
52    0x27D4EB2F165667C5,
53];
54
55const INIT_ACCUMULATORS: [u64; 8] = [
56    PRIME32[2], PRIME64[0], PRIME64[1], PRIME64[2], PRIME64[3], PRIME32[1], PRIME64[4], PRIME32[0],
57];
58
59#[allow(clippy::needless_return)]
60pub fn hash64_with_seed(data: &[u8], seed: u64) -> u64 {
61    if data.len() <= 240 {
62        hash64_0to240(data, &DEFAULT_SECRET, seed)
63    } else {
64        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
65        {
66            if is_x86_feature_detected!("avx2") {
67                unsafe {
68                    return hash64_large_avx2(data, seed);
69                }
70            }
71        }
72        #[cfg(target_arch = "aarch64")]
73        {
74            unsafe {
75                return hash64_large_neon(data, seed);
76            }
77        }
78        #[cfg(not(target_arch = "aarch64"))]
79        hash64_large_generic(
80            data,
81            seed,
82            gen_secret_generic,
83            scramble_accumulators_generic,
84            accumulate_stripe_generic,
85        )
86    }
87}
88
89#[allow(clippy::needless_return)]
90pub fn hash128_with_seed(data: &[u8], seed: u64) -> u128 {
91    if data.len() <= 240 {
92        hash128_0to240(data, &DEFAULT_SECRET, seed)
93    } else {
94        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
95        if is_x86_feature_detected!("avx2") {
96            unsafe {
97                return hash128_large_avx2(data, seed);
98            }
99        }
100        #[cfg(target_arch = "aarch64")]
101        unsafe {
102            return hash128_large_neon(data, seed);
103        }
104        #[cfg(not(target_arch = "aarch64"))]
105        hash128_large_generic(
106            data,
107            seed,
108            gen_secret_generic,
109            scramble_accumulators_generic,
110            accumulate_stripe_generic,
111        )
112    }
113}
114
115fn get_u32(data: &[u8], i: usize) -> u32 {
116    u32::from_le_bytes(
117        data[i * size_of::<u32>()..(i + 1) * size_of::<u32>()]
118            .try_into()
119            .unwrap(),
120    )
121}
122
123fn get_u64(data: &[u8], i: usize) -> u64 {
124    u64::from_le_bytes(
125        data[i * size_of::<u64>()..(i + 1) * size_of::<u64>()]
126            .try_into()
127            .unwrap(),
128    )
129}
130
131fn xxh64_avalanche(mut x: u64) -> u64 {
132    x ^= x >> 33;
133    x = x.wrapping_mul(PRIME64[1]);
134    x ^= x >> 29;
135    x = x.wrapping_mul(PRIME64[2]);
136    x ^= x >> 32;
137    x
138}
139
140fn xxh3_avalanche(mut x: u64) -> u64 {
141    x = xorshift(x, 37);
142    x = x.wrapping_mul(0x165667919E3779F9);
143    x = xorshift(x, 32);
144    x
145}
146
147#[inline(always)]
148fn merge_accumulators(
149    accumulators: [u64; INIT_ACCUMULATORS.len()],
150    secret: &[u8],
151    init: u64,
152) -> u64 {
153    let mut result = init;
154    for i in 0..4 {
155        let a1 = accumulators[2 * i];
156        let a2 = accumulators[2 * i + 1];
157        let s1 = get_u64(&secret[16 * i..], 0);
158        let s2 = get_u64(&secret[16 * i..], 1);
159        result = result.wrapping_add(mul128_and_xor(a1 ^ s1, a2 ^ s2));
160    }
161    xxh3_avalanche(result)
162}
163
164#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
165#[target_feature(enable = "avx2")]
166unsafe fn scramble_accumulators_avx2(
167    accumulators: &mut [u64; INIT_ACCUMULATORS.len()],
168    secret: &[u8],
169) {
170    #[cfg(target_arch = "x86")]
171    use std::arch::x86::*;
172    #[cfg(target_arch = "x86_64")]
173    use std::arch::x86_64::*;
174
175    #[allow(clippy::cast_possible_truncation)]
176    let simd_prime = _mm256_set1_epi32(PRIME32[0] as i32);
177    let secret_ptr = secret.as_ptr();
178    let accumulators_ptr = accumulators.as_mut_ptr();
179
180    for i in 0..(STRIPE_LENGTH / 32) {
181        let a = _mm256_loadu_si256((accumulators_ptr as *const __m256i).add(i));
182        let shifted = _mm256_srli_epi64::<47>(a);
183        let b = _mm256_xor_si256(a, shifted);
184
185        let s = _mm256_loadu_si256((secret_ptr as *const __m256i).add(i));
186        let c = _mm256_xor_si256(b, s);
187        let c_high = _mm256_shuffle_epi32::<49>(c);
188
189        let low = _mm256_mul_epu32(c, simd_prime);
190        let high = _mm256_mul_epu32(c_high, simd_prime);
191        let high = _mm256_slli_epi64::<32>(high);
192        let result = _mm256_add_epi64(low, high);
193        _mm256_storeu_si256((accumulators_ptr as *mut __m256i).add(i), result);
194    }
195}
196
197#[cfg(target_arch = "aarch64")]
198unsafe fn scramble_accumulators_neon(
199    accumulators: &mut [u64; INIT_ACCUMULATORS.len()],
200    secret: &[u8],
201) {
202    #[cfg(target_arch = "aarch64")]
203    use std::arch::aarch64::*;
204    #[cfg(target_arch = "arm")]
205    use std::arch::arm::*;
206
207    let prime = vdup_n_u32(PRIME32[0].try_into().unwrap());
208
209    let accum_ptr = accumulators.as_mut_ptr();
210    let secret_ptr = secret.as_ptr();
211    assert!(secret.len() >= STRIPE_LENGTH);
212    for i in 0..(STRIPE_LENGTH / 16) {
213        // xorshift
214        let accum = vld1q_u64(accum_ptr.add(i * 2));
215        let shifted = vshrq_n_u64(accum, 47);
216        let accum = veorq_u64(accum, shifted);
217
218        // xor with secret
219        let s = vld1q_u8(secret_ptr.add(i * 16));
220        let accum = veorq_u64(accum, vreinterpretq_u64_u8(s));
221
222        // mul with prime. Sadly there's no vmulq_u64
223        let accum_low = vmovn_u64(accum);
224        let accum_high = vshrn_n_u64(accum, 32);
225        let prod_high = vshlq_n_u64(vmull_u32(accum_high, prime), 32);
226        let accum = vmlal_u32(prod_high, accum_low, prime);
227        vst1q_u64(accum_ptr.add(i * 2), accum);
228    }
229}
230
231#[cfg(not(target_arch = "aarch64"))]
232fn scramble_accumulators_generic(accumulators: &mut [u64; INIT_ACCUMULATORS.len()], secret: &[u8]) {
233    for (i, x) in accumulators.iter_mut().enumerate() {
234        let s = get_u64(secret, i);
235        *x = xorshift(*x, 47);
236        *x ^= s;
237        *x = x.wrapping_mul(PRIME32[0]);
238    }
239}
240
241fn xorshift(x: u64, shift: u64) -> u64 {
242    x ^ (x >> shift)
243}
244
245fn rrmxmx(mut x: u64, y: u64) -> u64 {
246    x ^= x.rotate_left(49) ^ x.rotate_left(24);
247    x = x.wrapping_mul(0x9FB21C651E98DF25);
248    x ^= (x >> 35).wrapping_add(y);
249    x = x.wrapping_mul(0x9FB21C651E98DF25);
250    xorshift(x, 28)
251}
252
253fn mul128_and_xor(x: u64, y: u64) -> u64 {
254    let z = u128::from(x) * u128::from(y);
255    #[allow(clippy::cast_possible_truncation)]
256    (z as u64 ^ (z >> 64) as u64)
257}
258
259fn mix16(data: &[u8], secret: &[u8], seed: u64) -> u64 {
260    let x1 = get_u64(data, 0);
261    let x2 = get_u64(data, 1);
262    let s1 = get_u64(secret, 0).wrapping_add(seed);
263    let s2 = get_u64(secret, 1).wrapping_sub(seed);
264
265    mul128_and_xor(x1 ^ s1, x2 ^ s2)
266}
267
268fn mix32(state: (u64, u64), data1: &[u8], data2: &[u8], secret: &[u8], seed: u64) -> (u64, u64) {
269    let (mut r_low, mut r_high) = state;
270
271    r_low = r_low.wrapping_add(mix16(data1, secret, seed));
272    r_low ^= get_u64(data2, 0).wrapping_add(get_u64(data2, 1));
273    r_high = r_high.wrapping_add(mix16(data2, &secret[16..], seed));
274    r_high ^= get_u64(data1, 0).wrapping_add(get_u64(data1, 1));
275
276    (r_low, r_high)
277}
278
279fn gen_secret_generic(seed: u64) -> [u8; DEFAULT_SECRET.len()] {
280    let mut secret = [0u8; DEFAULT_SECRET.len()];
281    let iterations = DEFAULT_SECRET.len() / 16;
282    for i in 0..iterations {
283        let x = get_u64(&DEFAULT_SECRET, 2 * i).wrapping_add(seed);
284        secret[16 * i..16 * i + 8].copy_from_slice(&x.to_le_bytes());
285        let x = get_u64(&DEFAULT_SECRET, 2 * i + 1).wrapping_sub(seed);
286        secret[16 * i + 8..16 * (i + 1)].copy_from_slice(&x.to_le_bytes());
287    }
288    secret
289}
290
291#[allow(clippy::cast_possible_truncation)]
292#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
293#[target_feature(enable = "avx2")]
294unsafe fn gen_secret_avx2(seed: u64) -> [u8; DEFAULT_SECRET.len()] {
295    #[cfg(target_arch = "x86")]
296    use std::arch::x86::*;
297    #[cfg(target_arch = "x86_64")]
298    use std::arch::x86_64::*;
299
300    #[allow(clippy::cast_possible_wrap)]
301    let xxh_i64 = 0u64.wrapping_sub(seed) as i64;
302    #[allow(clippy::cast_possible_wrap)]
303    let seed = seed as i64;
304
305    let simd_seed = _mm256_set_epi64x(xxh_i64, seed, xxh_i64, seed);
306
307    let mut output = [0u8; DEFAULT_SECRET.len()];
308    let output_ptr = output.as_mut_ptr();
309    let secret_ptr = DEFAULT_SECRET.as_ptr();
310    for i in 0..6 {
311        let s = _mm256_loadu_si256((secret_ptr as *const __m256i).add(i));
312        let x = _mm256_add_epi64(s, simd_seed);
313        _mm256_storeu_si256((output_ptr as *mut __m256i).add(i), x);
314    }
315
316    output
317}
318
319#[cfg(target_arch = "aarch64")]
320unsafe fn accumulate_stripe_neon(accumulators: &mut [u64; 8], data: &[u8], secret: &[u8]) {
321    #[cfg(target_arch = "aarch64")]
322    use std::arch::aarch64::*;
323    #[cfg(target_arch = "arm")]
324    use std::arch::arm::*;
325
326    let accum_ptr = accumulators.as_mut_ptr();
327    let data_ptr = data.as_ptr();
328    let secret_ptr = secret.as_ptr();
329    assert!(data.len() >= STRIPE_LENGTH);
330    assert!(secret.len() >= STRIPE_LENGTH);
331    for i in 0..(STRIPE_LENGTH / 16) {
332        let x = vld1q_u8(data_ptr.add(i * 16));
333        let s = vld1q_u8(secret_ptr.add(i * 16));
334        let x64 = vreinterpretq_u64_u8(x);
335        let y = vextq_u64(x64, x64, 1);
336
337        let result = vld1q_u64(accum_ptr.add(i * 2));
338        let result = vaddq_u64(result, y);
339
340        let z = vreinterpretq_u64_u8(veorq_u8(x, s));
341        let z_low = vmovn_u64(z);
342        let z_high = vshrn_n_u64(z, 32);
343
344        let result = vmlal_u32(result, z_low, z_high);
345        vst1q_u64(accum_ptr.add(i * 2), result);
346    }
347}
348
349#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
350#[target_feature(enable = "avx2")]
351unsafe fn accumulate_stripe_avx2(accumulators: &mut [u64; 8], data: &[u8], secret: &[u8]) {
352    #[cfg(target_arch = "x86")]
353    use std::arch::x86::*;
354    #[cfg(target_arch = "x86_64")]
355    use std::arch::x86_64::*;
356
357    let data_ptr = data.as_ptr();
358    let secret_ptr = secret.as_ptr();
359    let accumulator_ptr = accumulators.as_mut_ptr();
360
361    assert!(data.len() >= STRIPE_LENGTH);
362    assert!(secret.len() >= STRIPE_LENGTH);
363    for i in 0..(STRIPE_LENGTH / 32) {
364        let x = _mm256_loadu_si256((data_ptr as *const __m256i).add(i));
365        let s = _mm256_loadu_si256((secret_ptr as *const __m256i).add(i));
366
367        let z = _mm256_xor_si256(x, s);
368        let z_low = _mm256_shuffle_epi32::<49>(z);
369
370        let product = _mm256_mul_epu32(z, z_low);
371        let shuffled = _mm256_shuffle_epi32::<78>(x);
372
373        let result = _mm256_loadu_si256((accumulator_ptr as *const __m256i).add(i));
374        let result = _mm256_add_epi64(result, shuffled);
375        let result = _mm256_add_epi64(result, product);
376        _mm256_storeu_si256((accumulator_ptr as *mut __m256i).add(i), result);
377    }
378}
379
380#[cfg(not(target_arch = "aarch64"))]
381fn accumulate_stripe_generic(accumulators: &mut [u64; 8], data: &[u8], secret: &[u8]) {
382    for i in 0..accumulators.len() {
383        let x = get_u64(&data[i * 8..], 0);
384        let y = x ^ get_u64(&secret[i * 8..], 0);
385        accumulators[i ^ 1] = accumulators[i ^ 1].wrapping_add(x);
386        let z = (y & 0xFFFF_FFFF) * (y >> 32);
387        accumulators[i] = accumulators[i].wrapping_add(z)
388    }
389}
390
391#[inline(always)]
392fn accumulate_block(
393    accumulators: &mut [u64; 8],
394    data: &[u8],
395    secret: &[u8],
396    stripes: usize,
397    accum_stripe: unsafe fn(&mut [u64; 8], &[u8], &[u8]),
398) {
399    for i in 0..stripes {
400        unsafe {
401            accum_stripe(
402                accumulators,
403                &data[i * STRIPE_LENGTH..],
404                &secret[i * SECRET_CONSUME_RATE..],
405            );
406        }
407    }
408}
409
410#[inline(always)]
411fn hash_large_helper(
412    data: &[u8],
413    secret: &[u8],
414    scramble: unsafe fn(&mut [u64; 8], &[u8]),
415    accum_stripe: unsafe fn(&mut [u64; 8], &[u8], &[u8]),
416) -> [u64; INIT_ACCUMULATORS.len()] {
417    let mut accumulators = INIT_ACCUMULATORS;
418
419    let stripes_per_block = (secret.len() - STRIPE_LENGTH) / SECRET_CONSUME_RATE;
420    let block_len = STRIPE_LENGTH * stripes_per_block;
421    let blocks = (data.len() - 1) / block_len;
422
423    // accumulate all the blocks
424    for i in 0..blocks {
425        accumulate_block(
426            &mut accumulators,
427            &data[i * block_len..],
428            secret,
429            stripes_per_block,
430            accum_stripe,
431        );
432        unsafe { scramble(&mut accumulators, &secret[secret.len() - STRIPE_LENGTH..]) };
433    }
434
435    // trailing partial block
436    let stripes = ((data.len() - 1) - block_len * blocks) / STRIPE_LENGTH;
437    accumulate_block(
438        &mut accumulators,
439        &data[blocks * block_len..],
440        secret,
441        stripes,
442        accum_stripe,
443    );
444
445    // trailing stripe
446    unsafe {
447        accum_stripe(
448            &mut accumulators,
449            &data[data.len() - STRIPE_LENGTH..],
450            &secret[secret.len() - STRIPE_LENGTH - 7..],
451        );
452    }
453
454    accumulators
455}
456
457fn hash64_0(secret: &[u8], seed: u64) -> u64 {
458    let mut result = seed;
459    result ^= get_u64(secret, 7);
460    result ^= get_u64(secret, 8);
461    xxh64_avalanche(result)
462}
463
464fn hash64_1to3(data: &[u8], secret: &[u8], seed: u64) -> u64 {
465    let x1 = data[0] as u32;
466    let x2 = data[data.len() >> 1] as u32;
467    let x3 = (*data.last().unwrap()) as u32;
468    #[allow(clippy::cast_possible_truncation)]
469    let x4 = data.len() as u32;
470
471    let combined = ((x1 << 16) | (x2 << 24) | x3 | (x4 << 8)) as u64;
472    let mut result = (get_u32(secret, 0) ^ get_u32(secret, 1)) as u64;
473    result = result.wrapping_add(seed);
474    result ^= combined;
475    xxh64_avalanche(result)
476}
477
478fn hash64_4to8(data: &[u8], secret: &[u8], mut seed: u64) -> u64 {
479    #[allow(clippy::cast_possible_truncation)]
480    let truncate_seed = seed as u32;
481    seed ^= u64::from(truncate_seed.swap_bytes()) << 32;
482    let x1 = get_u32(data, 0) as u64;
483    let x2 = get_u32(&data[data.len() - 4..], 0) as u64;
484    let x = x2 | (x1 << 32);
485    let s = (get_u64(secret, 1) ^ get_u64(secret, 2)).wrapping_sub(seed);
486    rrmxmx(x ^ s, data.len() as u64)
487}
488
489fn hash64_9to16(data: &[u8], secret: &[u8], seed: u64) -> u64 {
490    let s1 = (get_u64(secret, 3) ^ get_u64(secret, 4)).wrapping_add(seed);
491    let s2 = (get_u64(secret, 5) ^ get_u64(secret, 6)).wrapping_sub(seed);
492    let x1 = get_u64(data, 0) ^ s1;
493    let x2 = get_u64(&data[data.len() - 8..], 0) ^ s2;
494    let mut result = data.len() as u64;
495    result = result.wrapping_add(x1.swap_bytes());
496    result = result.wrapping_add(x2);
497    result = result.wrapping_add(mul128_and_xor(x1, x2));
498    xxh3_avalanche(result)
499}
500
501fn hash64_0to16(data: &[u8], secret: &[u8], seed: u64) -> u64 {
502    if data.is_empty() {
503        hash64_0(secret, seed)
504    } else if data.len() < 4 {
505        hash64_1to3(data, secret, seed)
506    } else if data.len() <= 8 {
507        hash64_4to8(data, secret, seed)
508    } else {
509        hash64_9to16(data, secret, seed)
510    }
511}
512
513fn hash64_17to128(data: &[u8], secret: &[u8], seed: u64) -> u64 {
514    let mut result = PRIME64[0].wrapping_mul(data.len() as u64);
515    let iterations = (data.len() - 1) / 32;
516    for i in (0..=iterations).rev() {
517        result = result.wrapping_add(mix16(&data[16 * i..], &secret[32 * i..], seed));
518        result = result.wrapping_add(mix16(
519            &data[data.len() - 16 * (i + 1)..],
520            &secret[32 * i + 16..],
521            seed,
522        ));
523    }
524    xxh3_avalanche(result)
525}
526
527fn hash64_129to240(data: &[u8], secret: &[u8], seed: u64) -> u64 {
528    let mut result = PRIME64[0].wrapping_mul(data.len() as u64);
529    for i in 0..8 {
530        result = result.wrapping_add(mix16(&data[16 * i..], &secret[16 * i..], seed));
531    }
532    result = xxh3_avalanche(result);
533    let iterations = data.len() / 16;
534    for i in 8..iterations {
535        result = result.wrapping_add(mix16(&data[16 * i..], &secret[16 * (i - 8) + 3..], seed));
536    }
537    result = result.wrapping_add(mix16(
538        &data[data.len() - 16..],
539        &secret[MIN_SECRET_SIZE - 17..],
540        seed,
541    ));
542
543    xxh3_avalanche(result)
544}
545
546fn hash64_0to240(data: &[u8], secret: &[u8], seed: u64) -> u64 {
547    if data.len() <= 16 {
548        hash64_0to16(data, secret, seed)
549    } else if data.len() <= 128 {
550        hash64_17to128(data, secret, seed)
551    } else {
552        hash64_129to240(data, secret, seed)
553    }
554}
555
556#[cfg(target_arch = "aarch64")]
557unsafe fn hash64_large_neon(data: &[u8], seed: u64) -> u64 {
558    hash64_large_generic(
559        data,
560        seed,
561        gen_secret_generic,
562        scramble_accumulators_neon,
563        accumulate_stripe_neon,
564    )
565}
566
567#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
568#[target_feature(enable = "avx2")]
569unsafe fn hash64_large_avx2(data: &[u8], seed: u64) -> u64 {
570    hash64_large_generic(
571        data,
572        seed,
573        gen_secret_avx2,
574        scramble_accumulators_avx2,
575        accumulate_stripe_avx2,
576    )
577}
578
579#[inline(always)]
580fn hash64_large_generic(
581    data: &[u8],
582    seed: u64,
583    gen: unsafe fn(u64) -> [u8; DEFAULT_SECRET.len()],
584    scramble: unsafe fn(&mut [u64; 8], &[u8]),
585    accum_stripe: unsafe fn(&mut [u64; 8], &[u8], &[u8]),
586) -> u64 {
587    let secret = unsafe { gen(seed) };
588    let accumulators = hash_large_helper(data, &secret, scramble, accum_stripe);
589
590    merge_accumulators(
591        accumulators,
592        &secret[11..],
593        PRIME64[0].wrapping_mul(data.len() as u64),
594    )
595}
596
597fn hash128_0(secret: &[u8], seed: u64) -> u128 {
598    let high = (hash64_0(&secret[3 * 8..], seed) as u128) << 64;
599    let low = hash64_0(&secret[8..], seed) as u128;
600    high | low
601}
602
603fn hash128_1to3(data: &[u8], secret: &[u8], seed: u64) -> u128 {
604    let x1 = data[0] as u32;
605    let x2 = data[data.len() >> 1] as u32;
606    let x3 = (*data.last().unwrap()) as u32;
607    #[allow(clippy::cast_possible_truncation)]
608    let x4 = data.len() as u32;
609
610    let combined_low = (x1 << 16) | (x2 << 24) | x3 | (x4 << 8);
611    let combined_high: u64 = combined_low.swap_bytes().rotate_left(13).into();
612    let s_low = ((get_u32(secret, 0) ^ get_u32(secret, 1)) as u64).wrapping_add(seed);
613    let s_high = ((get_u32(secret, 2) ^ get_u32(secret, 3)) as u64).wrapping_sub(seed);
614    let high = (xxh64_avalanche(combined_high ^ s_high) as u128) << 64;
615    let low = xxh64_avalanche(combined_low as u64 ^ s_low) as u128;
616    high | low
617}
618
619fn hash128_4to8(data: &[u8], secret: &[u8], mut seed: u64) -> u128 {
620    #[allow(clippy::cast_possible_truncation)]
621    let truncate_seed = seed as u32;
622    seed ^= u64::from(truncate_seed.swap_bytes()) << 32;
623    let x_low = get_u32(data, 0) as u64;
624    let x_high = u32::from_le_bytes(data[data.len() - 4..].try_into().unwrap()) as u64;
625    let x = x_low | (x_high << 32);
626    let s = (get_u64(secret, 2) ^ get_u64(secret, 3)).wrapping_add(seed);
627
628    let mut y = (x ^ s) as u128;
629    y = y.wrapping_mul(PRIME64[0].wrapping_add((data.len() << 2) as u64) as u128);
630
631    #[allow(clippy::cast_possible_truncation)]
632    let mut r_low = y as u64;
633    let mut r_high: u64 = (y >> 64).try_into().unwrap();
634    r_high = r_high.wrapping_add(r_low << 1);
635    r_low ^= r_high >> 3;
636    r_low = xorshift(r_low, 35);
637    r_low = r_low.wrapping_mul(0x9FB21C651E98DF25);
638    r_low = xorshift(r_low, 28);
639    r_high = xxh3_avalanche(r_high);
640
641    (r_high as u128) << 64 | r_low as u128
642}
643
644fn hash128_9to16(data: &[u8], secret: &[u8], seed: u64) -> u128 {
645    let s_low = (get_u64(secret, 4) ^ get_u64(secret, 5)).wrapping_sub(seed);
646    let s_high = (get_u64(secret, 6) ^ get_u64(secret, 7)).wrapping_add(seed);
647    let x_low = get_u64(data, 0);
648    let x_high = u64::from_le_bytes(data[data.len() - 8..].try_into().unwrap());
649    let mixed = x_low ^ x_high ^ s_low;
650    let x_high = x_high ^ s_high;
651
652    let result = (mixed as u128).wrapping_mul(PRIME64[0] as u128);
653    #[allow(clippy::cast_possible_truncation)]
654    let mut r_low = result as u64;
655    let mut r_high = (result >> 64) as u64;
656    r_low = r_low.wrapping_add((data.len() as u64 - 1) << 54);
657    r_high = r_high.wrapping_add(x_high);
658    r_high = r_high.wrapping_add((x_high & 0xFFFF_FFFF).wrapping_mul(PRIME32[1] - 1));
659    r_low ^= r_high.swap_bytes();
660
661    let result2 = (r_low as u128).wrapping_mul(PRIME64[1] as u128);
662    #[allow(clippy::cast_possible_truncation)]
663    let mut r2_low = result2 as u64;
664    let mut r2_high = (result2 >> 64) as u64;
665    r2_high = r2_high.wrapping_add(r_high.wrapping_mul(PRIME64[1]));
666    r2_low = xxh3_avalanche(r2_low);
667    r2_high = xxh3_avalanche(r2_high);
668
669    (r2_high as u128) << 64 | r2_low as u128
670}
671
672fn hash128_0to16(data: &[u8], secret: &[u8], seed: u64) -> u128 {
673    if data.is_empty() {
674        hash128_0(secret, seed)
675    } else if data.len() < 4 {
676        hash128_1to3(data, secret, seed)
677    } else if data.len() <= 8 {
678        hash128_4to8(data, secret, seed)
679    } else {
680        hash128_9to16(data, secret, seed)
681    }
682}
683
684fn hash128_17to128(data: &[u8], secret: &[u8], seed: u64) -> u128 {
685    let len = data.len();
686    let mut state = (PRIME64[0].wrapping_mul(len as u64), 0);
687    if len > 32 {
688        if len > 64 {
689            if len > 96 {
690                state = mix32(state, &data[48..], &data[len - 64..], &secret[96..], seed);
691            }
692            state = mix32(state, &data[32..], &data[len - 48..], &secret[64..], seed);
693        }
694        state = mix32(state, &data[16..], &data[len - 32..], &secret[32..], seed);
695    }
696    state = mix32(state, data, &data[len - 16..], secret, seed);
697
698    let mut r_low = state.0.wrapping_add(state.1);
699    let mut r_high = state.0.wrapping_mul(PRIME64[0]);
700    r_high = r_high.wrapping_add(state.1.wrapping_mul(PRIME64[3]));
701    r_high = r_high.wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64[1]));
702    r_low = xxh3_avalanche(r_low);
703    r_high = 0u64.wrapping_sub(xxh3_avalanche(r_high));
704
705    (r_high as u128) << 64 | r_low as u128
706}
707
708fn hash128_129to240(data: &[u8], secret: &[u8], seed: u64) -> u128 {
709    let len = data.len();
710    let iterations = len / 32;
711    let mut state = (PRIME64[0].wrapping_mul(len as u64), 0);
712
713    for i in 0..4 {
714        state = mix32(
715            state,
716            &data[32 * i..],
717            &data[32 * i + 16..],
718            &secret[32 * i..],
719            seed,
720        );
721    }
722    state.0 = xxh3_avalanche(state.0);
723    state.1 = xxh3_avalanche(state.1);
724
725    for i in 4..iterations {
726        state = mix32(
727            state,
728            &data[32 * i..],
729            &data[32 * i + 16..],
730            &secret[3 + 32 * (i - 4)..],
731            seed,
732        );
733    }
734    state = mix32(
735        state,
736        &data[len - 16..],
737        &data[len - 32..],
738        &secret[MIN_SECRET_SIZE - 33..],
739        0u64.wrapping_sub(seed),
740    );
741
742    let mut r_low = state.0.wrapping_add(state.1);
743    let mut r_high = state.0.wrapping_mul(PRIME64[0]);
744    r_high = r_high.wrapping_add(state.1.wrapping_mul(PRIME64[3]));
745    r_high = r_high.wrapping_add((len as u64).wrapping_sub(seed).wrapping_mul(PRIME64[1]));
746    r_low = xxh3_avalanche(r_low);
747    r_high = 0u64.wrapping_sub(xxh3_avalanche(r_high));
748
749    (r_high as u128) << 64 | r_low as u128
750}
751
752fn hash128_0to240(data: &[u8], secret: &[u8], seed: u64) -> u128 {
753    if data.len() <= 16 {
754        hash128_0to16(data, secret, seed)
755    } else if data.len() <= 128 {
756        hash128_17to128(data, secret, seed)
757    } else {
758        hash128_129to240(data, secret, seed)
759    }
760}
761
762#[cfg(target_arch = "aarch64")]
763unsafe fn hash128_large_neon(data: &[u8], seed: u64) -> u128 {
764    hash128_large_generic(
765        data,
766        seed,
767        gen_secret_generic,
768        scramble_accumulators_neon,
769        accumulate_stripe_neon,
770    )
771}
772
773#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
774#[target_feature(enable = "avx2")]
775unsafe fn hash128_large_avx2(data: &[u8], seed: u64) -> u128 {
776    hash128_large_generic(
777        data,
778        seed,
779        gen_secret_avx2,
780        scramble_accumulators_avx2,
781        accumulate_stripe_avx2,
782    )
783}
784
785#[inline(always)]
786fn hash128_large_generic(
787    data: &[u8],
788    seed: u64,
789    gen: unsafe fn(u64) -> [u8; DEFAULT_SECRET.len()],
790    scramble: unsafe fn(&mut [u64; 8], &[u8]),
791    accum_stripe: unsafe fn(&mut [u64; 8], &[u8], &[u8]),
792) -> u128 {
793    let secret = unsafe { gen(seed) };
794    let accumulators = hash_large_helper(data, &secret, scramble, accum_stripe);
795
796    let low = merge_accumulators(
797        accumulators,
798        &secret[11..],
799        PRIME64[0].wrapping_mul(data.len() as u64),
800    );
801    let high = merge_accumulators(
802        accumulators,
803        &secret[secret.len() - 64 - 11..],
804        !(PRIME64[1].wrapping_mul(data.len() as u64)),
805    );
806
807    (high as u128) << 64 | low as u128
808}
809
810#[cfg(test)]
811mod test {
812    use crate::tree_store::page_store::xxh3::hash64_with_seed;
813
814    #[test]
815    fn test_empty() {
816        let actual = hash64_with_seed(&[], 0);
817        assert_eq!(actual, 3244421341483603138);
818    }
819}