clap_lex/ext.rs
1use std::ffi::OsStr;
2
3/// String-like methods for [`OsStr`]
4pub trait OsStrExt: private::Sealed {
5 /// Converts to a string slice.
6 ///
7 /// The `Utf8Error` is guaranteed to have a valid UTF8 boundary
8 /// in its `valid_up_to()`
9 fn try_str(&self) -> Result<&str, std::str::Utf8Error>;
10 /// Returns `true` if the given pattern matches a sub-slice of
11 /// this string slice.
12 ///
13 /// Returns `false` if it does not.
14 ///
15 /// # Examples
16 ///
17 /// ```rust
18 /// use clap_lex::OsStrExt as _;
19 /// let bananas = std::ffi::OsStr::new("bananas");
20 ///
21 /// assert!(bananas.contains("nana"));
22 /// assert!(!bananas.contains("apples"));
23 /// ```
24 fn contains(&self, needle: &str) -> bool;
25 /// Returns the byte index of the first character of this string slice that
26 /// matches the pattern.
27 ///
28 /// Returns [`None`] if the pattern doesn't match.
29 ///
30 /// # Examples
31 ///
32 /// ```rust
33 /// use clap_lex::OsStrExt as _;
34 /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard Gepardi");
35 ///
36 /// assert_eq!(s.find("L"), Some(0));
37 /// assert_eq!(s.find("é"), Some(14));
38 /// assert_eq!(s.find("par"), Some(17));
39 /// ```
40 ///
41 /// Not finding the pattern:
42 ///
43 /// ```rust
44 /// use clap_lex::OsStrExt as _;
45 /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard");
46 ///
47 /// assert_eq!(s.find("1"), None);
48 /// ```
49 fn find(&self, needle: &str) -> Option<usize>;
50 /// Returns a string slice with the prefix removed.
51 ///
52 /// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped
53 /// in `Some`.
54 ///
55 /// If the string does not start with `prefix`, returns `None`.
56 ///
57 /// # Examples
58 ///
59 /// ```
60 /// use std::ffi::OsStr;
61 /// use clap_lex::OsStrExt as _;
62 /// assert_eq!(OsStr::new("foo:bar").strip_prefix("foo:"), Some(OsStr::new("bar")));
63 /// assert_eq!(OsStr::new("foo:bar").strip_prefix("bar"), None);
64 /// assert_eq!(OsStr::new("foofoo").strip_prefix("foo"), Some(OsStr::new("foo")));
65 /// ```
66 fn strip_prefix(&self, prefix: &str) -> Option<&OsStr>;
67 /// Returns `true` if the given pattern matches a prefix of this
68 /// string slice.
69 ///
70 /// Returns `false` if it does not.
71 ///
72 /// # Examples
73 ///
74 /// ```
75 /// use clap_lex::OsStrExt as _;
76 /// let bananas = std::ffi::OsStr::new("bananas");
77 ///
78 /// assert!(bananas.starts_with("bana"));
79 /// assert!(!bananas.starts_with("nana"));
80 /// ```
81 fn starts_with(&self, prefix: &str) -> bool;
82 /// An iterator over substrings of this string slice, separated by
83 /// characters matched by a pattern.
84 ///
85 /// # Examples
86 ///
87 /// Simple patterns:
88 ///
89 /// ```
90 /// use std::ffi::OsStr;
91 /// use clap_lex::OsStrExt as _;
92 /// let v: Vec<_> = OsStr::new("Mary had a little lamb").split(" ").collect();
93 /// assert_eq!(v, [OsStr::new("Mary"), OsStr::new("had"), OsStr::new("a"), OsStr::new("little"), OsStr::new("lamb")]);
94 ///
95 /// let v: Vec<_> = OsStr::new("").split("X").collect();
96 /// assert_eq!(v, [OsStr::new("")]);
97 ///
98 /// let v: Vec<_> = OsStr::new("lionXXtigerXleopard").split("X").collect();
99 /// assert_eq!(v, [OsStr::new("lion"), OsStr::new(""), OsStr::new("tiger"), OsStr::new("leopard")]);
100 ///
101 /// let v: Vec<_> = OsStr::new("lion::tiger::leopard").split("::").collect();
102 /// assert_eq!(v, [OsStr::new("lion"), OsStr::new("tiger"), OsStr::new("leopard")]);
103 /// ```
104 ///
105 /// If a string contains multiple contiguous separators, you will end up
106 /// with empty strings in the output:
107 ///
108 /// ```
109 /// use std::ffi::OsStr;
110 /// use clap_lex::OsStrExt as _;
111 /// let x = OsStr::new("||||a||b|c");
112 /// let d: Vec<_> = x.split("|").collect();
113 ///
114 /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
115 /// ```
116 ///
117 /// Contiguous separators are separated by the empty string.
118 ///
119 /// ```
120 /// use std::ffi::OsStr;
121 /// use clap_lex::OsStrExt as _;
122 /// let x = OsStr::new("(///)");
123 /// let d: Vec<_> = x.split("/").collect();
124 ///
125 /// assert_eq!(d, &[OsStr::new("("), OsStr::new(""), OsStr::new(""), OsStr::new(")")]);
126 /// ```
127 ///
128 /// Separators at the start or end of a string are neighbored
129 /// by empty strings.
130 ///
131 /// ```
132 /// use std::ffi::OsStr;
133 /// use clap_lex::OsStrExt as _;
134 /// let d: Vec<_> = OsStr::new("010").split("0").collect();
135 /// assert_eq!(d, &[OsStr::new(""), OsStr::new("1"), OsStr::new("")]);
136 /// ```
137 ///
138 /// When the empty string is used as a separator, it panics
139 ///
140 /// ```should_panic
141 /// use std::ffi::OsStr;
142 /// use clap_lex::OsStrExt as _;
143 /// let f: Vec<_> = OsStr::new("rust").split("").collect();
144 /// assert_eq!(f, &[OsStr::new(""), OsStr::new("r"), OsStr::new("u"), OsStr::new("s"), OsStr::new("t"), OsStr::new("")]);
145 /// ```
146 ///
147 /// Contiguous separators can lead to possibly surprising behavior
148 /// when whitespace is used as the separator. This code is correct:
149 ///
150 /// ```
151 /// use std::ffi::OsStr;
152 /// use clap_lex::OsStrExt as _;
153 /// let x = OsStr::new(" a b c");
154 /// let d: Vec<_> = x.split(" ").collect();
155 ///
156 /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
157 /// ```
158 ///
159 /// It does _not_ give you:
160 ///
161 /// ```,ignore
162 /// assert_eq!(d, &[OsStr::new("a"), OsStr::new("b"), OsStr::new("c")]);
163 /// ```
164 ///
165 /// Use [`split_whitespace`] for this behavior.
166 ///
167 /// [`split_whitespace`]: str::split_whitespace
168 fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n>;
169 /// Splits the string on the first occurrence of the specified delimiter and
170 /// returns prefix before delimiter and suffix after delimiter.
171 ///
172 /// # Examples
173 ///
174 /// ```
175 /// use std::ffi::OsStr;
176 /// use clap_lex::OsStrExt as _;
177 /// assert_eq!(OsStr::new("cfg").split_once("="), None);
178 /// assert_eq!(OsStr::new("cfg=").split_once("="), Some((OsStr::new("cfg"), OsStr::new(""))));
179 /// assert_eq!(OsStr::new("cfg=foo").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo"))));
180 /// assert_eq!(OsStr::new("cfg=foo=bar").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo=bar"))));
181 /// ```
182 fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)>;
183}
184
185impl OsStrExt for OsStr {
186 fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
187 let bytes = self.as_encoded_bytes();
188 std::str::from_utf8(bytes)
189 }
190
191 fn contains(&self, needle: &str) -> bool {
192 self.find(needle).is_some()
193 }
194
195 fn find(&self, needle: &str) -> Option<usize> {
196 let bytes = self.as_encoded_bytes();
197 (0..=self.len().checked_sub(needle.len())?)
198 .find(|&x| bytes[x..].starts_with(needle.as_bytes()))
199 }
200
201 fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
202 let bytes = self.as_encoded_bytes();
203 bytes.strip_prefix(prefix.as_bytes()).map(|s| {
204 // SAFETY:
205 // - This came from `as_encoded_bytes`
206 // - Since `prefix` is `&str`, any split will be along UTF-8 boundary
207 unsafe { OsStr::from_encoded_bytes_unchecked(s) }
208 })
209 }
210 fn starts_with(&self, prefix: &str) -> bool {
211 let bytes = self.as_encoded_bytes();
212 bytes.starts_with(prefix.as_bytes())
213 }
214
215 fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
216 assert_ne!(needle, "");
217 Split {
218 haystack: Some(self),
219 needle,
220 }
221 }
222
223 fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
224 let start = self.find(needle)?;
225 let end = start + needle.len();
226 let haystack = self.as_encoded_bytes();
227 let first = &haystack[0..start];
228 let second = &haystack[end..];
229 // SAFETY:
230 // - This came from `as_encoded_bytes`
231 // - Since `needle` is `&str`, any split will be along UTF-8 boundary
232 unsafe {
233 Some((
234 OsStr::from_encoded_bytes_unchecked(first),
235 OsStr::from_encoded_bytes_unchecked(second),
236 ))
237 }
238 }
239}
240
241mod private {
242 pub trait Sealed {}
243
244 impl Sealed for std::ffi::OsStr {}
245}
246
247pub struct Split<'s, 'n> {
248 haystack: Option<&'s OsStr>,
249 needle: &'n str,
250}
251
252impl<'s> Iterator for Split<'s, '_> {
253 type Item = &'s OsStr;
254
255 fn next(&mut self) -> Option<Self::Item> {
256 let haystack = self.haystack?;
257 if let Some((first, second)) = haystack.split_once(self.needle) {
258 if !haystack.is_empty() {
259 debug_assert_ne!(haystack, second);
260 }
261 self.haystack = Some(second);
262 Some(first)
263 } else {
264 self.haystack = None;
265 Some(haystack)
266 }
267 }
268}
269
270/// Split an `OsStr`
271///
272/// # Safety
273///
274/// `index` must be at a valid UTF-8 boundary
275pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
276 unsafe {
277 let bytes = os.as_encoded_bytes();
278 let (first, second) = bytes.split_at(index);
279 (
280 OsStr::from_encoded_bytes_unchecked(first),
281 OsStr::from_encoded_bytes_unchecked(second),
282 )
283 }
284}