const_format_proc_macros/format_str/
parsing.rs

1use super::{FmtArg, FmtStrComponent, FormatStr, ParseError, ParseErrorKind, WhichArg};
2
3use crate::{
4    formatting::{FormattingFlags, IsAlternate, NumberFormatting},
5    parse_utils::StrRawness,
6};
7
8#[cfg(test)]
9impl FmtStrComponent {
10    pub(super) fn str(s: &str) -> Self {
11        Self::Str(s.to_string(), StrRawness::dummy())
12    }
13    pub(super) fn arg(which_arg: WhichArg, formatting: FormattingFlags) -> Self {
14        Self::Arg(FmtArg {
15            which_arg,
16            formatting,
17            rawness: StrRawness::dummy(),
18        })
19    }
20}
21
22impl FmtArg {
23    fn new(which_arg: WhichArg, formatting: FormattingFlags, rawness: StrRawness) -> Self {
24        Self {
25            which_arg,
26            formatting,
27            rawness,
28        }
29    }
30}
31
32#[allow(dead_code)]
33impl WhichArg {
34    pub(super) fn ident(s: &str) -> Self {
35        Self::Ident(s.to_string())
36    }
37}
38
39/////////////////////////////////////
40
41#[cfg(test)]
42impl std::str::FromStr for FormatStr {
43    type Err = ParseError;
44
45    fn from_str(input: &str) -> Result<FormatStr, ParseError> {
46        parse_format_str(input, StrRawness::dummy())
47    }
48}
49
50impl FormatStr {
51    pub fn parse(input: &str, rawness: StrRawness) -> Result<FormatStr, ParseError> {
52        parse_format_str(input, rawness)
53    }
54}
55
56fn parse_format_str(input: &str, rawness: StrRawness) -> Result<FormatStr, ParseError> {
57    let mut components = Vec::<FmtStrComponent>::new();
58
59    let mut arg_start = 0;
60
61    loop {
62        let open_pos = input.find_from('{', arg_start);
63
64        let str = &input[arg_start..open_pos.unwrap_or(input.len())];
65        components.push_arg_str(parse_mid_str(str, arg_start)?, rawness);
66
67        if let Some(open_pos) = open_pos {
68            let after_open = open_pos + 1;
69            if input[after_open..].starts_with('{') {
70                components.push_arg_str("{".to_string(), rawness);
71
72                arg_start = open_pos + 2;
73            } else if let Some(close_pos) = input.find_from('}', after_open) {
74                let after_close = close_pos + 1;
75
76                let arg = parse_fmt_arg(&input[after_open..close_pos], after_open, rawness)?;
77                components.push(FmtStrComponent::Arg(arg));
78
79                arg_start = after_close;
80            } else {
81                return Err(ParseError {
82                    pos: open_pos,
83                    kind: ParseErrorKind::UnclosedArg,
84                });
85            }
86        } else {
87            break;
88        }
89    }
90
91    Ok(FormatStr { list: components })
92}
93
94/// Parses the text between arguments, to unescape `}}` into `}`
95fn parse_mid_str(str: &str, starts_at: usize) -> Result<String, ParseError> {
96    let mut buffer = String::with_capacity(str.len());
97
98    let mut starts_pos = 0;
99    let bytes = str.as_bytes();
100
101    while let Some(close_pos) = str.find_from('}', starts_pos) {
102        let after_close = close_pos + 1;
103        if bytes.get(after_close) == Some(&b'}') {
104            buffer.push_str(&str[starts_pos..after_close]);
105            starts_pos = after_close + 1;
106        } else {
107            return Err(ParseError {
108                pos: starts_at + close_pos,
109                kind: ParseErrorKind::InvalidClosedArg,
110            });
111        }
112    }
113    buffer.push_str(&str[starts_pos..]);
114
115    Ok(buffer)
116}
117
118/// Parses the format arguments (`{:?}`, `{foo:}`, `{0}`, etc).
119///
120/// `starts_at` is the offset of `input` in the formatting string.
121fn parse_fmt_arg(input: &str, starts_at: usize, rawness: StrRawness) -> Result<FmtArg, ParseError> {
122    let colon = input.find(':');
123
124    let which_arg_str = &input[..colon.unwrap_or(input.len())];
125    let formatting_str = colon.map_or("", |x| &input[x + 1..]);
126    let formatting_starts_at = colon.map_or(input.len(), |x| starts_at + x + 1);
127
128    Ok(FmtArg::new(
129        parse_which_arg(which_arg_str, starts_at)?,
130        parse_formatting(formatting_str, formatting_starts_at)?,
131        rawness,
132    ))
133}
134
135/// Parses the name of the argument in `{foo}`, `{}`, `{bar:?}`
136///
137/// `starts_at` is the offset of `input` in the formatting string.
138fn parse_which_arg(input: &str, starts_at: usize) -> Result<WhichArg, ParseError> {
139    if input.is_empty() {
140        Ok(WhichArg::Positional(None))
141    } else if input.as_bytes()[0].is_ascii_digit() {
142        match input.parse::<usize>() {
143            Ok(number) => Ok(WhichArg::Positional(Some(number))),
144            Err(_) => Err(ParseError {
145                pos: starts_at,
146                kind: ParseErrorKind::NotANumber {
147                    what: input.to_string(),
148                },
149            }),
150        }
151    } else {
152        parse_ident(input, starts_at)
153    }
154}
155
156/// Parses the `?` and other formatters inside formatting arguments (`{}`).
157///
158/// `starts_at` is the offset of `input` in the formatting string.
159fn parse_formatting(input: &str, starts_at: usize) -> Result<FormattingFlags, ParseError> {
160    match input {
161        "#" => return Ok(FormattingFlags::display(IsAlternate::Yes)),
162        "" => return Ok(FormattingFlags::display(IsAlternate::No)),
163        _ => {}
164    }
165
166    let mut bytes = input.as_bytes();
167
168    let make_error = || ParseError {
169        pos: starts_at,
170        kind: ParseErrorKind::UnknownFormatting {
171            what: input.to_string(),
172        },
173    };
174
175    if let [before @ .., b'?'] = bytes {
176        bytes = before;
177    }
178
179    let mut num_fmt = NumberFormatting::Decimal;
180    let mut is_alternate = IsAlternate::No;
181
182    for byte in bytes {
183        match byte {
184            b'b' if num_fmt.is_regular() => num_fmt = NumberFormatting::Binary,
185            b'x' if num_fmt.is_regular() => num_fmt = NumberFormatting::LowerHexadecimal,
186            b'X' if num_fmt.is_regular() => num_fmt = NumberFormatting::Hexadecimal,
187            b'#' => is_alternate = IsAlternate::Yes,
188            _ => return Err(make_error()),
189        }
190    }
191    Ok(FormattingFlags::debug(num_fmt, is_alternate))
192}
193
194/// Parses an identifier in a formatting argument.
195///
196/// `starts_at` is the offset of `input` in the formatting string.
197fn parse_ident(ident_str: &str, starts_at: usize) -> Result<WhichArg, ParseError> {
198    if is_ident(ident_str) {
199        Ok(WhichArg::Ident(ident_str.to_string()))
200    } else {
201        Err(ParseError {
202            pos: starts_at,
203            kind: ParseErrorKind::NotAnIdent {
204                what: ident_str.to_string(),
205            },
206        })
207    }
208}
209
210////////////////////////////////////////////////////////////////////////////////
211
212fn is_ident(s: &str) -> bool {
213    use unicode_xid::UnicodeXID;
214
215    if s.is_empty() || s == "_" {
216        return false;
217    }
218
219    let mut chars = s.chars();
220    let first = chars.next().unwrap();
221
222    // For some reason '_' is not considered a valid character for the stard of an ident
223    (first.is_xid_start() || first == '_') && chars.all(|c| c.is_xid_continue())
224}
225
226////////////////////////////////////////////////////////////////////////////////
227
228trait VecExt {
229    fn push_arg_str(&mut self, str: String, rawness: StrRawness);
230}
231
232impl VecExt for Vec<FmtStrComponent> {
233    fn push_arg_str(&mut self, str: String, rawness: StrRawness) {
234        if !str.is_empty() {
235            self.push(FmtStrComponent::Str(str, rawness));
236        }
237    }
238}
239
240trait StrExt {
241    fn find_from(&self, c: char, from: usize) -> Option<usize>;
242}
243
244impl StrExt for str {
245    fn find_from(&self, c: char, from: usize) -> Option<usize> {
246        self[from..].find(c).map(|p| p + from)
247    }
248}