1use crate::{portable, CVWords, IncrementCounter, BLOCK_LEN};
2use arrayref::{array_mut_ref, array_ref};
3
4cfg_if::cfg_if! {
5 if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
6 cfg_if::cfg_if! {
7 if #[cfg(blake3_avx512_ffi)] {
8 pub const MAX_SIMD_DEGREE: usize = 16;
9 } else {
10 pub const MAX_SIMD_DEGREE: usize = 8;
11 }
12 }
13 } else if #[cfg(blake3_neon)] {
14 pub const MAX_SIMD_DEGREE: usize = 4;
15 } else {
16 pub const MAX_SIMD_DEGREE: usize = 1;
17 }
18}
19
20cfg_if::cfg_if! {
25 if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
26 cfg_if::cfg_if! {
27 if #[cfg(blake3_avx512_ffi)] {
28 pub const MAX_SIMD_DEGREE_OR_2: usize = 16;
29 } else {
30 pub const MAX_SIMD_DEGREE_OR_2: usize = 8;
31 }
32 }
33 } else if #[cfg(blake3_neon)] {
34 pub const MAX_SIMD_DEGREE_OR_2: usize = 4;
35 } else {
36 pub const MAX_SIMD_DEGREE_OR_2: usize = 2;
37 }
38}
39
40#[derive(Clone, Copy, Debug)]
41pub enum Platform {
42 Portable,
43 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
44 SSE2,
45 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
46 SSE41,
47 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
48 AVX2,
49 #[cfg(blake3_avx512_ffi)]
50 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
51 AVX512,
52 #[cfg(blake3_neon)]
53 NEON,
54}
55
56impl Platform {
57 #[allow(unreachable_code)]
58 pub fn detect() -> Self {
59 #[cfg(miri)]
60 {
61 return Platform::Portable;
62 }
63
64 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
65 {
66 #[cfg(blake3_avx512_ffi)]
67 {
68 if avx512_detected() {
69 return Platform::AVX512;
70 }
71 }
72 if avx2_detected() {
73 return Platform::AVX2;
74 }
75 if sse41_detected() {
76 return Platform::SSE41;
77 }
78 if sse2_detected() {
79 return Platform::SSE2;
80 }
81 }
82 #[cfg(blake3_neon)]
85 {
86 return Platform::NEON;
87 }
88 Platform::Portable
89 }
90
91 pub fn simd_degree(&self) -> usize {
92 let degree = match self {
93 Platform::Portable => 1,
94 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
95 Platform::SSE2 => 4,
96 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
97 Platform::SSE41 => 4,
98 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
99 Platform::AVX2 => 8,
100 #[cfg(blake3_avx512_ffi)]
101 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
102 Platform::AVX512 => 16,
103 #[cfg(blake3_neon)]
104 Platform::NEON => 4,
105 };
106 debug_assert!(degree <= MAX_SIMD_DEGREE);
107 degree
108 }
109
110 pub fn compress_in_place(
111 &self,
112 cv: &mut CVWords,
113 block: &[u8; BLOCK_LEN],
114 block_len: u8,
115 counter: u64,
116 flags: u8,
117 ) {
118 match self {
119 Platform::Portable => portable::compress_in_place(cv, block, block_len, counter, flags),
120 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
122 Platform::SSE2 => unsafe {
123 crate::sse2::compress_in_place(cv, block, block_len, counter, flags)
124 },
125 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
127 Platform::SSE41 | Platform::AVX2 => unsafe {
128 crate::sse41::compress_in_place(cv, block, block_len, counter, flags)
129 },
130 #[cfg(blake3_avx512_ffi)]
132 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
133 Platform::AVX512 => unsafe {
134 crate::avx512::compress_in_place(cv, block, block_len, counter, flags)
135 },
136 #[cfg(blake3_neon)]
138 Platform::NEON => portable::compress_in_place(cv, block, block_len, counter, flags),
139 }
140 }
141
142 pub fn compress_xof(
143 &self,
144 cv: &CVWords,
145 block: &[u8; BLOCK_LEN],
146 block_len: u8,
147 counter: u64,
148 flags: u8,
149 ) -> [u8; 64] {
150 match self {
151 Platform::Portable => portable::compress_xof(cv, block, block_len, counter, flags),
152 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
154 Platform::SSE2 => unsafe {
155 crate::sse2::compress_xof(cv, block, block_len, counter, flags)
156 },
157 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
159 Platform::SSE41 | Platform::AVX2 => unsafe {
160 crate::sse41::compress_xof(cv, block, block_len, counter, flags)
161 },
162 #[cfg(blake3_avx512_ffi)]
164 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
165 Platform::AVX512 => unsafe {
166 crate::avx512::compress_xof(cv, block, block_len, counter, flags)
167 },
168 #[cfg(blake3_neon)]
170 Platform::NEON => portable::compress_xof(cv, block, block_len, counter, flags),
171 }
172 }
173
174 pub fn hash_many<const N: usize>(
185 &self,
186 inputs: &[&[u8; N]],
187 key: &CVWords,
188 counter: u64,
189 increment_counter: IncrementCounter,
190 flags: u8,
191 flags_start: u8,
192 flags_end: u8,
193 out: &mut [u8],
194 ) {
195 match self {
196 Platform::Portable => portable::hash_many(
197 inputs,
198 key,
199 counter,
200 increment_counter,
201 flags,
202 flags_start,
203 flags_end,
204 out,
205 ),
206 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
208 Platform::SSE2 => unsafe {
209 crate::sse2::hash_many(
210 inputs,
211 key,
212 counter,
213 increment_counter,
214 flags,
215 flags_start,
216 flags_end,
217 out,
218 )
219 },
220 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
222 Platform::SSE41 => unsafe {
223 crate::sse41::hash_many(
224 inputs,
225 key,
226 counter,
227 increment_counter,
228 flags,
229 flags_start,
230 flags_end,
231 out,
232 )
233 },
234 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
236 Platform::AVX2 => unsafe {
237 crate::avx2::hash_many(
238 inputs,
239 key,
240 counter,
241 increment_counter,
242 flags,
243 flags_start,
244 flags_end,
245 out,
246 )
247 },
248 #[cfg(blake3_avx512_ffi)]
250 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
251 Platform::AVX512 => unsafe {
252 crate::avx512::hash_many(
253 inputs,
254 key,
255 counter,
256 increment_counter,
257 flags,
258 flags_start,
259 flags_end,
260 out,
261 )
262 },
263 #[cfg(blake3_neon)]
265 Platform::NEON => unsafe {
266 crate::neon::hash_many(
267 inputs,
268 key,
269 counter,
270 increment_counter,
271 flags,
272 flags_start,
273 flags_end,
274 out,
275 )
276 },
277 }
278 }
279
280 pub fn xof_many(
281 &self,
282 cv: &CVWords,
283 block: &[u8; BLOCK_LEN],
284 block_len: u8,
285 mut counter: u64,
286 flags: u8,
287 out: &mut [u8],
288 ) {
289 debug_assert_eq!(0, out.len() % BLOCK_LEN, "whole blocks only");
290 if out.is_empty() {
291 return;
293 }
294 match self {
295 #[cfg(blake3_avx512_ffi)]
297 #[cfg(unix)]
298 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
299 Platform::AVX512 => unsafe {
300 crate::avx512::xof_many(cv, block, block_len, counter, flags, out)
301 },
302 _ => {
303 for out_block in out.chunks_exact_mut(BLOCK_LEN) {
306 let out_array: &mut [u8; BLOCK_LEN] = out_block.try_into().unwrap();
308 *out_array = self.compress_xof(cv, block, block_len, counter, flags);
309 counter += 1;
310 }
311 }
312 }
313 }
314
315 pub fn portable() -> Self {
318 Self::Portable
319 }
320
321 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
322 pub fn sse2() -> Option<Self> {
323 if sse2_detected() {
324 Some(Self::SSE2)
325 } else {
326 None
327 }
328 }
329
330 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
331 pub fn sse41() -> Option<Self> {
332 if sse41_detected() {
333 Some(Self::SSE41)
334 } else {
335 None
336 }
337 }
338
339 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
340 pub fn avx2() -> Option<Self> {
341 if avx2_detected() {
342 Some(Self::AVX2)
343 } else {
344 None
345 }
346 }
347
348 #[cfg(blake3_avx512_ffi)]
349 #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
350 pub fn avx512() -> Option<Self> {
351 if avx512_detected() {
352 Some(Self::AVX512)
353 } else {
354 None
355 }
356 }
357
358 #[cfg(blake3_neon)]
359 pub fn neon() -> Option<Self> {
360 Some(Self::NEON)
362 }
363}
364
365#[cfg(blake3_avx512_ffi)]
368#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
369#[inline(always)]
370#[allow(unreachable_code)]
371pub fn avx512_detected() -> bool {
372 if cfg!(miri) {
373 return false;
374 }
375
376 if cfg!(feature = "no_avx512") {
378 return false;
379 }
380 #[cfg(all(target_feature = "avx512f", target_feature = "avx512vl"))]
382 {
383 return true;
384 }
385 #[cfg(feature = "std")]
387 {
388 if is_x86_feature_detected!("avx512f") && is_x86_feature_detected!("avx512vl") {
389 return true;
390 }
391 }
392 false
393}
394
395#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
396#[inline(always)]
397#[allow(unreachable_code)]
398pub fn avx2_detected() -> bool {
399 if cfg!(miri) {
400 return false;
401 }
402
403 if cfg!(feature = "no_avx2") {
405 return false;
406 }
407 #[cfg(target_feature = "avx2")]
409 {
410 return true;
411 }
412 #[cfg(feature = "std")]
414 {
415 if is_x86_feature_detected!("avx2") {
416 return true;
417 }
418 }
419 false
420}
421
422#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
423#[inline(always)]
424#[allow(unreachable_code)]
425pub fn sse41_detected() -> bool {
426 if cfg!(miri) {
427 return false;
428 }
429
430 if cfg!(feature = "no_sse41") {
432 return false;
433 }
434 #[cfg(target_feature = "sse4.1")]
436 {
437 return true;
438 }
439 #[cfg(feature = "std")]
441 {
442 if is_x86_feature_detected!("sse4.1") {
443 return true;
444 }
445 }
446 false
447}
448
449#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
450#[inline(always)]
451#[allow(unreachable_code)]
452pub fn sse2_detected() -> bool {
453 if cfg!(miri) {
454 return false;
455 }
456
457 if cfg!(feature = "no_sse2") {
459 return false;
460 }
461 #[cfg(target_feature = "sse2")]
463 {
464 return true;
465 }
466 #[cfg(feature = "std")]
468 {
469 if is_x86_feature_detected!("sse2") {
470 return true;
471 }
472 }
473 false
474}
475
476#[inline(always)]
477pub fn words_from_le_bytes_32(bytes: &[u8; 32]) -> [u32; 8] {
478 let mut out = [0; 8];
479 out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4));
480 out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4));
481 out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4));
482 out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4));
483 out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4));
484 out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4));
485 out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4));
486 out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4));
487 out
488}
489
490#[inline(always)]
491pub fn words_from_le_bytes_64(bytes: &[u8; 64]) -> [u32; 16] {
492 let mut out = [0; 16];
493 out[0] = u32::from_le_bytes(*array_ref!(bytes, 0 * 4, 4));
494 out[1] = u32::from_le_bytes(*array_ref!(bytes, 1 * 4, 4));
495 out[2] = u32::from_le_bytes(*array_ref!(bytes, 2 * 4, 4));
496 out[3] = u32::from_le_bytes(*array_ref!(bytes, 3 * 4, 4));
497 out[4] = u32::from_le_bytes(*array_ref!(bytes, 4 * 4, 4));
498 out[5] = u32::from_le_bytes(*array_ref!(bytes, 5 * 4, 4));
499 out[6] = u32::from_le_bytes(*array_ref!(bytes, 6 * 4, 4));
500 out[7] = u32::from_le_bytes(*array_ref!(bytes, 7 * 4, 4));
501 out[8] = u32::from_le_bytes(*array_ref!(bytes, 8 * 4, 4));
502 out[9] = u32::from_le_bytes(*array_ref!(bytes, 9 * 4, 4));
503 out[10] = u32::from_le_bytes(*array_ref!(bytes, 10 * 4, 4));
504 out[11] = u32::from_le_bytes(*array_ref!(bytes, 11 * 4, 4));
505 out[12] = u32::from_le_bytes(*array_ref!(bytes, 12 * 4, 4));
506 out[13] = u32::from_le_bytes(*array_ref!(bytes, 13 * 4, 4));
507 out[14] = u32::from_le_bytes(*array_ref!(bytes, 14 * 4, 4));
508 out[15] = u32::from_le_bytes(*array_ref!(bytes, 15 * 4, 4));
509 out
510}
511
512#[inline(always)]
513pub fn le_bytes_from_words_32(words: &[u32; 8]) -> [u8; 32] {
514 let mut out = [0; 32];
515 *array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
516 *array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
517 *array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
518 *array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
519 *array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
520 *array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
521 *array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
522 *array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
523 out
524}
525
526#[inline(always)]
527pub fn le_bytes_from_words_64(words: &[u32; 16]) -> [u8; 64] {
528 let mut out = [0; 64];
529 *array_mut_ref!(out, 0 * 4, 4) = words[0].to_le_bytes();
530 *array_mut_ref!(out, 1 * 4, 4) = words[1].to_le_bytes();
531 *array_mut_ref!(out, 2 * 4, 4) = words[2].to_le_bytes();
532 *array_mut_ref!(out, 3 * 4, 4) = words[3].to_le_bytes();
533 *array_mut_ref!(out, 4 * 4, 4) = words[4].to_le_bytes();
534 *array_mut_ref!(out, 5 * 4, 4) = words[5].to_le_bytes();
535 *array_mut_ref!(out, 6 * 4, 4) = words[6].to_le_bytes();
536 *array_mut_ref!(out, 7 * 4, 4) = words[7].to_le_bytes();
537 *array_mut_ref!(out, 8 * 4, 4) = words[8].to_le_bytes();
538 *array_mut_ref!(out, 9 * 4, 4) = words[9].to_le_bytes();
539 *array_mut_ref!(out, 10 * 4, 4) = words[10].to_le_bytes();
540 *array_mut_ref!(out, 11 * 4, 4) = words[11].to_le_bytes();
541 *array_mut_ref!(out, 12 * 4, 4) = words[12].to_le_bytes();
542 *array_mut_ref!(out, 13 * 4, 4) = words[13].to_le_bytes();
543 *array_mut_ref!(out, 14 * 4, 4) = words[14].to_le_bytes();
544 *array_mut_ref!(out, 15 * 4, 4) = words[15].to_le_bytes();
545 out
546}