1use core::fmt;
2
3const UTF8_CHAR_WIDTH: &[u8; 256] = &[
6 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
7 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
9 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
11 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
13 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
15 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
17 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
19 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ];
23
24#[derive(Debug, Clone, PartialEq, Eq)]
27pub struct Utf8Error {
28 valid_up_to: usize,
29}
30
31impl Utf8Error {
32 pub const fn valid_up_to(&self) -> usize {
36 self.valid_up_to
37 }
38
39 pub const fn panic(&self) -> ! {
41 let offset = self.valid_up_to();
42 [][offset]
43 }
44}
45
46impl fmt::Display for Utf8Error {
47 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48 write!(
49 f,
50 "invalid utf-8 sequence starting from index {}",
51 self.valid_up_to
52 )
53 }
54}
55
56pub const fn check_utf8(mut bytes: &[u8]) -> Result<(), Utf8Error> {
59 let in_len = bytes.len();
60
61 macro_rules! try_nexts {
62 ($rema:ident, [$second:ident $(,$nexts:ident)*], $extra_checks:expr ) => ({
63 if let [$second, $($nexts,)* ref rem @ ..] = *$rema {
64 if $( is_continuation_byte($nexts) && )* $extra_checks {
65 bytes = rem;
66 } else {
67 return Err(Utf8Error{valid_up_to: in_len - bytes.len()});
68 }
69
70 } else {
71 return Err(Utf8Error{valid_up_to: in_len - bytes.len()});
72 }
73 })
74 }
75
76 while let [first, ref rema @ ..] = *bytes {
77 let utf8len = UTF8_CHAR_WIDTH[first as usize];
78 if bytes.len() < utf8len as usize {
79 return Err(Utf8Error {
80 valid_up_to: in_len - bytes.len(),
81 });
82 }
83
84 match utf8len {
85 1 => {
86 bytes = rema;
87 continue;
88 }
89 2 => try_nexts!(rema, [second], is_continuation_byte(second)),
90 3 => try_nexts!(
91 rema,
92 [second, third],
93 matches! {
94 (first, second),
95 (0xE0, 0xA0..=0xBF)
96 | (0xE1..=0xEC, 0x80..=0xBF)
97 | (0xED, 0x80..=0x9F)
98 | (0xEE..=0xEF, 0x80..=0xBF)
99 }
100 ),
101 4 => try_nexts!(
102 rema,
103 [second, third, fourth],
104 matches!(
105 (first, second),
106 (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F)
107 )
108 ),
109 _ => {
110 return Err(Utf8Error {
111 valid_up_to: in_len - bytes.len(),
112 })
113 }
114 }
115 }
116 Ok(())
117}
118
119const fn is_continuation_byte(b: u8) -> bool {
120 (b & 0b11_000000) == 0b10_000000
121}
122
123#[cfg(not(feature = "rust_1_55"))]
124#[macro_export]
125macro_rules! from_utf8_macro {
126 ($slice:expr) => {
127 match $slice {
128 x => unsafe {
129 match $crate::string::check_utf8(x) {
130 $crate::__::Ok(()) => {
131 let ptr = x as *const [$crate::__::u8] as *const $crate::__::str;
132 unsafe { Ok($crate::utils::Dereference { ptr }.reff) }
133 }
134 $crate::__::Err(e) => $crate::__::Err(e),
135 }
136 },
137 }
138 };
139}
140
141#[cfg(feature = "rust_1_55")]
142#[macro_export]
143macro_rules! from_utf8_macro {
144 ($slice:expr) => {
145 $crate::string::from_utf8_fn($slice)
146 };
147}
148
149#[cfg(feature = "rust_1_55")]
150#[inline]
151pub const fn from_utf8_fn(slice: &[u8]) -> Result<&str, Utf8Error> {
152 match check_utf8(slice) {
153 Ok(()) => unsafe { Ok(core::str::from_utf8_unchecked(slice)) },
154 Err(e) => Err(e),
155 }
156}