konst_macro_rules/
string.rs

1use core::fmt;
2
3// copied from core
4// https://github.com/rust-lang/rust/blob/673d0db5e393e9c64897005b470bfeb6d5aec61b/library/core/src/str/validations.rs#L232
5const UTF8_CHAR_WIDTH: &[u8; 256] = &[
6    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7    1, // 0x1F
8    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9    1, // 0x3F
10    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11    1, // 0x5F
12    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13    1, // 0x7F
14    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15    0, // 0x9F
16    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17    0, // 0xBF
18    0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
19    2, // 0xDF
20    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xEF
21    4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0xFF
22];
23
24///////////////////////////////////////////////////////////////////////////////
25
26#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct Utf8Error {
28    valid_up_to: usize,
29}
30
31impl Utf8Error {
32    /// The index up to which a `&str` can be validly constructed from the input `&[u8]`.
33    ///
34    /// `&input[..error.valid_up_to()]` is valid utf8.
35    pub const fn valid_up_to(&self) -> usize {
36        self.valid_up_to
37    }
38
39    /// For erroring with an error message.
40    pub const fn panic(&self) -> ! {
41        let offset = self.valid_up_to();
42        [/*Could not interpret bytes from offset as a str*/][offset]
43    }
44}
45
46impl fmt::Display for Utf8Error {
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48        write!(
49            f,
50            "invalid utf-8 sequence starting from index {}",
51            self.valid_up_to
52        )
53    }
54}
55
56///////////////////////////////////////////////////////////////////////////////
57
58pub const fn check_utf8(mut bytes: &[u8]) -> Result<(), Utf8Error> {
59    let in_len = bytes.len();
60
61    macro_rules! try_nexts {
62        ($rema:ident, [$second:ident $(,$nexts:ident)*], $extra_checks:expr ) => ({
63            if let [$second, $($nexts,)* ref rem @ ..] = *$rema  {
64                if $( is_continuation_byte($nexts) && )* $extra_checks {
65                    bytes = rem;
66                } else {
67                    return Err(Utf8Error{valid_up_to: in_len - bytes.len()});
68                }
69
70            } else {
71                return Err(Utf8Error{valid_up_to: in_len - bytes.len()});
72            }
73        })
74    }
75
76    while let [first, ref rema @ ..] = *bytes {
77        let utf8len = UTF8_CHAR_WIDTH[first as usize];
78        if bytes.len() < utf8len as usize {
79            return Err(Utf8Error {
80                valid_up_to: in_len - bytes.len(),
81            });
82        }
83
84        match utf8len {
85            1 => {
86                bytes = rema;
87                continue;
88            }
89            2 => try_nexts!(rema, [second], is_continuation_byte(second)),
90            3 => try_nexts!(
91                rema,
92                [second, third],
93                matches! {
94                    (first, second),
95                    (0xE0, 0xA0..=0xBF)
96                    | (0xE1..=0xEC, 0x80..=0xBF)
97                    | (0xED, 0x80..=0x9F)
98                    | (0xEE..=0xEF, 0x80..=0xBF)
99                }
100            ),
101            4 => try_nexts!(
102                rema,
103                [second, third, fourth],
104                matches!(
105                    (first, second),
106                    (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F)
107                )
108            ),
109            _ => {
110                return Err(Utf8Error {
111                    valid_up_to: in_len - bytes.len(),
112                })
113            }
114        }
115    }
116    Ok(())
117}
118
119const fn is_continuation_byte(b: u8) -> bool {
120    (b & 0b11_000000) == 0b10_000000
121}
122
123#[cfg(not(feature = "rust_1_55"))]
124#[macro_export]
125macro_rules! from_utf8_macro {
126    ($slice:expr) => {
127        match $slice {
128            x => unsafe {
129                match $crate::string::check_utf8(x) {
130                    $crate::__::Ok(()) => {
131                        let ptr = x as *const [$crate::__::u8] as *const $crate::__::str;
132                        unsafe { Ok($crate::utils::Dereference { ptr }.reff) }
133                    }
134                    $crate::__::Err(e) => $crate::__::Err(e),
135                }
136            },
137        }
138    };
139}
140
141#[cfg(feature = "rust_1_55")]
142#[macro_export]
143macro_rules! from_utf8_macro {
144    ($slice:expr) => {
145        $crate::string::from_utf8_fn($slice)
146    };
147}
148
149#[cfg(feature = "rust_1_55")]
150#[inline]
151pub const fn from_utf8_fn(slice: &[u8]) -> Result<&str, Utf8Error> {
152    match check_utf8(slice) {
153        Ok(()) => unsafe { Ok(core::str::from_utf8_unchecked(slice)) },
154        Err(e) => Err(e),
155    }
156}