clap_lex/
ext.rs

1use std::ffi::OsStr;
2
3/// String-like methods for [`OsStr`]
4pub trait OsStrExt: private::Sealed {
5    /// Converts to a string slice.
6    ///
7    /// The `Utf8Error` is guaranteed to have a valid UTF8 boundary
8    /// in its `valid_up_to()`
9    fn try_str(&self) -> Result<&str, std::str::Utf8Error>;
10    /// Returns `true` if the given pattern matches a sub-slice of
11    /// this string slice.
12    ///
13    /// Returns `false` if it does not.
14    ///
15    /// # Examples
16    ///
17    /// ```rust
18    /// use clap_lex::OsStrExt as _;
19    /// let bananas = std::ffi::OsStr::new("bananas");
20    ///
21    /// assert!(bananas.contains("nana"));
22    /// assert!(!bananas.contains("apples"));
23    /// ```
24    fn contains(&self, needle: &str) -> bool;
25    /// Returns the byte index of the first character of this string slice that
26    /// matches the pattern.
27    ///
28    /// Returns [`None`] if the pattern doesn't match.
29    ///
30    /// # Examples
31    ///
32    /// ```rust
33    /// use clap_lex::OsStrExt as _;
34    /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard Gepardi");
35    ///
36    /// assert_eq!(s.find("L"), Some(0));
37    /// assert_eq!(s.find("é"), Some(14));
38    /// assert_eq!(s.find("par"), Some(17));
39    /// ```
40    ///
41    /// Not finding the pattern:
42    ///
43    /// ```rust
44    /// use clap_lex::OsStrExt as _;
45    /// let s = std::ffi::OsStr::new("Löwe 老虎 Léopard");
46    ///
47    /// assert_eq!(s.find("1"), None);
48    /// ```
49    fn find(&self, needle: &str) -> Option<usize>;
50    /// Returns a string slice with the prefix removed.
51    ///
52    /// If the string starts with the pattern `prefix`, returns substring after the prefix, wrapped
53    /// in `Some`.
54    ///
55    /// If the string does not start with `prefix`, returns `None`.
56    ///
57    /// # Examples
58    ///
59    /// ```
60    /// use std::ffi::OsStr;
61    /// use clap_lex::OsStrExt as _;
62    /// assert_eq!(OsStr::new("foo:bar").strip_prefix("foo:"), Some(OsStr::new("bar")));
63    /// assert_eq!(OsStr::new("foo:bar").strip_prefix("bar"), None);
64    /// assert_eq!(OsStr::new("foofoo").strip_prefix("foo"), Some(OsStr::new("foo")));
65    /// ```
66    fn strip_prefix(&self, prefix: &str) -> Option<&OsStr>;
67    /// Returns `true` if the given pattern matches a prefix of this
68    /// string slice.
69    ///
70    /// Returns `false` if it does not.
71    ///
72    /// # Examples
73    ///
74    /// ```
75    /// use clap_lex::OsStrExt as _;
76    /// let bananas = std::ffi::OsStr::new("bananas");
77    ///
78    /// assert!(bananas.starts_with("bana"));
79    /// assert!(!bananas.starts_with("nana"));
80    /// ```
81    fn starts_with(&self, prefix: &str) -> bool;
82    /// An iterator over substrings of this string slice, separated by
83    /// characters matched by a pattern.
84    ///
85    /// # Examples
86    ///
87    /// Simple patterns:
88    ///
89    /// ```
90    /// use std::ffi::OsStr;
91    /// use clap_lex::OsStrExt as _;
92    /// let v: Vec<_> = OsStr::new("Mary had a little lamb").split(" ").collect();
93    /// assert_eq!(v, [OsStr::new("Mary"), OsStr::new("had"), OsStr::new("a"), OsStr::new("little"), OsStr::new("lamb")]);
94    ///
95    /// let v: Vec<_> = OsStr::new("").split("X").collect();
96    /// assert_eq!(v, [OsStr::new("")]);
97    ///
98    /// let v: Vec<_> = OsStr::new("lionXXtigerXleopard").split("X").collect();
99    /// assert_eq!(v, [OsStr::new("lion"), OsStr::new(""), OsStr::new("tiger"), OsStr::new("leopard")]);
100    ///
101    /// let v: Vec<_> = OsStr::new("lion::tiger::leopard").split("::").collect();
102    /// assert_eq!(v, [OsStr::new("lion"), OsStr::new("tiger"), OsStr::new("leopard")]);
103    /// ```
104    ///
105    /// If a string contains multiple contiguous separators, you will end up
106    /// with empty strings in the output:
107    ///
108    /// ```
109    /// use std::ffi::OsStr;
110    /// use clap_lex::OsStrExt as _;
111    /// let x = OsStr::new("||||a||b|c");
112    /// let d: Vec<_> = x.split("|").collect();
113    ///
114    /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
115    /// ```
116    ///
117    /// Contiguous separators are separated by the empty string.
118    ///
119    /// ```
120    /// use std::ffi::OsStr;
121    /// use clap_lex::OsStrExt as _;
122    /// let x = OsStr::new("(///)");
123    /// let d: Vec<_> = x.split("/").collect();
124    ///
125    /// assert_eq!(d, &[OsStr::new("("), OsStr::new(""), OsStr::new(""), OsStr::new(")")]);
126    /// ```
127    ///
128    /// Separators at the start or end of a string are neighbored
129    /// by empty strings.
130    ///
131    /// ```
132    /// use std::ffi::OsStr;
133    /// use clap_lex::OsStrExt as _;
134    /// let d: Vec<_> = OsStr::new("010").split("0").collect();
135    /// assert_eq!(d, &[OsStr::new(""), OsStr::new("1"), OsStr::new("")]);
136    /// ```
137    ///
138    /// When the empty string is used as a separator, it panics
139    ///
140    /// ```should_panic
141    /// use std::ffi::OsStr;
142    /// use clap_lex::OsStrExt as _;
143    /// let f: Vec<_> = OsStr::new("rust").split("").collect();
144    /// assert_eq!(f, &[OsStr::new(""), OsStr::new("r"), OsStr::new("u"), OsStr::new("s"), OsStr::new("t"), OsStr::new("")]);
145    /// ```
146    ///
147    /// Contiguous separators can lead to possibly surprising behavior
148    /// when whitespace is used as the separator. This code is correct:
149    ///
150    /// ```
151    /// use std::ffi::OsStr;
152    /// use clap_lex::OsStrExt as _;
153    /// let x = OsStr::new("    a  b c");
154    /// let d: Vec<_> = x.split(" ").collect();
155    ///
156    /// assert_eq!(d, &[OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new(""), OsStr::new("a"), OsStr::new(""), OsStr::new("b"), OsStr::new("c")]);
157    /// ```
158    ///
159    /// It does _not_ give you:
160    ///
161    /// ```,ignore
162    /// assert_eq!(d, &[OsStr::new("a"), OsStr::new("b"), OsStr::new("c")]);
163    /// ```
164    ///
165    /// Use [`split_whitespace`] for this behavior.
166    ///
167    /// [`split_whitespace`]: str::split_whitespace
168    fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n>;
169    /// Splits the string on the first occurrence of the specified delimiter and
170    /// returns prefix before delimiter and suffix after delimiter.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// use std::ffi::OsStr;
176    /// use clap_lex::OsStrExt as _;
177    /// assert_eq!(OsStr::new("cfg").split_once("="), None);
178    /// assert_eq!(OsStr::new("cfg=").split_once("="), Some((OsStr::new("cfg"), OsStr::new(""))));
179    /// assert_eq!(OsStr::new("cfg=foo").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo"))));
180    /// assert_eq!(OsStr::new("cfg=foo=bar").split_once("="), Some((OsStr::new("cfg"), OsStr::new("foo=bar"))));
181    /// ```
182    fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)>;
183}
184
185impl OsStrExt for OsStr {
186    fn try_str(&self) -> Result<&str, std::str::Utf8Error> {
187        let bytes = self.as_encoded_bytes();
188        std::str::from_utf8(bytes)
189    }
190
191    fn contains(&self, needle: &str) -> bool {
192        self.find(needle).is_some()
193    }
194
195    fn find(&self, needle: &str) -> Option<usize> {
196        let bytes = self.as_encoded_bytes();
197        (0..=self.len().checked_sub(needle.len())?)
198            .find(|&x| bytes[x..].starts_with(needle.as_bytes()))
199    }
200
201    fn strip_prefix(&self, prefix: &str) -> Option<&OsStr> {
202        let bytes = self.as_encoded_bytes();
203        bytes.strip_prefix(prefix.as_bytes()).map(|s| {
204            // SAFETY:
205            // - This came from `as_encoded_bytes`
206            // - Since `prefix` is `&str`, any split will be along UTF-8 boundary
207            unsafe { OsStr::from_encoded_bytes_unchecked(s) }
208        })
209    }
210    fn starts_with(&self, prefix: &str) -> bool {
211        let bytes = self.as_encoded_bytes();
212        bytes.starts_with(prefix.as_bytes())
213    }
214
215    fn split<'s, 'n>(&'s self, needle: &'n str) -> Split<'s, 'n> {
216        assert_ne!(needle, "");
217        Split {
218            haystack: Some(self),
219            needle,
220        }
221    }
222
223    fn split_once(&self, needle: &'_ str) -> Option<(&OsStr, &OsStr)> {
224        let start = self.find(needle)?;
225        let end = start + needle.len();
226        let haystack = self.as_encoded_bytes();
227        let first = &haystack[0..start];
228        let second = &haystack[end..];
229        // SAFETY:
230        // - This came from `as_encoded_bytes`
231        // - Since `needle` is `&str`, any split will be along UTF-8 boundary
232        unsafe {
233            Some((
234                OsStr::from_encoded_bytes_unchecked(first),
235                OsStr::from_encoded_bytes_unchecked(second),
236            ))
237        }
238    }
239}
240
241mod private {
242    pub trait Sealed {}
243
244    impl Sealed for std::ffi::OsStr {}
245}
246
247pub struct Split<'s, 'n> {
248    haystack: Option<&'s OsStr>,
249    needle: &'n str,
250}
251
252impl<'s> Iterator for Split<'s, '_> {
253    type Item = &'s OsStr;
254
255    fn next(&mut self) -> Option<Self::Item> {
256        let haystack = self.haystack?;
257        if let Some((first, second)) = haystack.split_once(self.needle) {
258            if !haystack.is_empty() {
259                debug_assert_ne!(haystack, second);
260            }
261            self.haystack = Some(second);
262            Some(first)
263        } else {
264            self.haystack = None;
265            Some(haystack)
266        }
267    }
268}
269
270/// Split an `OsStr`
271///
272/// # Safety
273///
274/// `index` must be at a valid UTF-8 boundary
275pub(crate) unsafe fn split_at(os: &OsStr, index: usize) -> (&OsStr, &OsStr) {
276    unsafe {
277        let bytes = os.as_encoded_bytes();
278        let (first, second) = bytes.split_at(index);
279        (
280            OsStr::from_encoded_bytes_unchecked(first),
281            OsStr::from_encoded_bytes_unchecked(second),
282        )
283    }
284}