toml_write/
string.rs

1#[derive(Copy, Clone, Debug)]
2pub struct TomlStringBuilder<'s> {
3    decoded: &'s str,
4    metrics: ValueMetrics,
5}
6
7impl<'s> TomlStringBuilder<'s> {
8    pub fn new(decoded: &'s str) -> Self {
9        Self {
10            decoded,
11            metrics: ValueMetrics::calculate(decoded),
12        }
13    }
14
15    pub fn as_default(&self) -> TomlString<'s> {
16        self.as_basic_pretty()
17            .or_else(|| self.as_literal())
18            .or_else(|| self.as_ml_basic_pretty())
19            .or_else(|| self.as_ml_literal())
20            .unwrap_or_else(|| {
21                if self.metrics.newline {
22                    self.as_ml_basic()
23                } else {
24                    self.as_basic()
25                }
26            })
27    }
28
29    pub fn as_literal(&self) -> Option<TomlString<'s>> {
30        if self.metrics.escape_codes
31            || 0 < self.metrics.max_seq_single_quotes
32            || self.metrics.newline
33        {
34            None
35        } else {
36            Some(TomlString {
37                decoded: self.decoded,
38                encoding: Encoding::LiteralString,
39                newline: self.metrics.newline,
40            })
41        }
42    }
43
44    pub fn as_ml_literal(&self) -> Option<TomlString<'s>> {
45        if self.metrics.escape_codes || 2 < self.metrics.max_seq_single_quotes {
46            None
47        } else {
48            Some(TomlString {
49                decoded: self.decoded,
50                encoding: Encoding::MlLiteralString,
51                newline: self.metrics.newline,
52            })
53        }
54    }
55
56    pub fn as_basic_pretty(&self) -> Option<TomlString<'s>> {
57        if self.metrics.escape_codes
58            || self.metrics.escape
59            || 0 < self.metrics.max_seq_double_quotes
60            || self.metrics.newline
61        {
62            None
63        } else {
64            Some(self.as_basic())
65        }
66    }
67
68    pub fn as_ml_basic_pretty(&self) -> Option<TomlString<'s>> {
69        if self.metrics.escape_codes
70            || self.metrics.escape
71            || 2 < self.metrics.max_seq_double_quotes
72        {
73            None
74        } else {
75            Some(self.as_ml_basic())
76        }
77    }
78
79    pub fn as_basic(&self) -> TomlString<'s> {
80        TomlString {
81            decoded: self.decoded,
82            encoding: Encoding::BasicString,
83            newline: self.metrics.newline,
84        }
85    }
86
87    pub fn as_ml_basic(&self) -> TomlString<'s> {
88        TomlString {
89            decoded: self.decoded,
90            encoding: Encoding::MlBasicString,
91            newline: self.metrics.newline,
92        }
93    }
94}
95
96#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
97pub struct TomlString<'s> {
98    decoded: &'s str,
99    encoding: Encoding,
100    newline: bool,
101}
102
103impl crate::WriteTomlValue for TomlString<'_> {
104    fn write_toml_value<W: crate::TomlWrite + ?Sized>(&self, writer: &mut W) -> core::fmt::Result {
105        write_toml_value(self.decoded, Some(self.encoding), self.newline, writer)
106    }
107}
108
109#[derive(Copy, Clone, Debug)]
110pub struct TomlKeyBuilder<'s> {
111    decoded: &'s str,
112    metrics: KeyMetrics,
113}
114
115impl<'s> TomlKeyBuilder<'s> {
116    pub fn new(decoded: &'s str) -> Self {
117        Self {
118            decoded,
119            metrics: KeyMetrics::calculate(decoded),
120        }
121    }
122
123    pub fn as_default(&self) -> TomlKey<'s> {
124        self.as_unquoted()
125            .or_else(|| self.as_basic_pretty())
126            .or_else(|| self.as_literal())
127            .unwrap_or_else(|| self.as_basic())
128    }
129
130    pub fn as_unquoted(&self) -> Option<TomlKey<'s>> {
131        if self.metrics.unquoted {
132            Some(TomlKey {
133                decoded: self.decoded,
134                encoding: None,
135            })
136        } else {
137            None
138        }
139    }
140
141    pub fn as_literal(&self) -> Option<TomlKey<'s>> {
142        if self.metrics.escape_codes || self.metrics.single_quotes {
143            None
144        } else {
145            Some(TomlKey {
146                decoded: self.decoded,
147                encoding: Some(Encoding::LiteralString),
148            })
149        }
150    }
151
152    pub fn as_basic_pretty(&self) -> Option<TomlKey<'s>> {
153        if self.metrics.escape_codes || self.metrics.escape || self.metrics.double_quotes {
154            None
155        } else {
156            Some(self.as_basic())
157        }
158    }
159
160    pub fn as_basic(&self) -> TomlKey<'s> {
161        TomlKey {
162            decoded: self.decoded,
163            encoding: Some(Encoding::BasicString),
164        }
165    }
166}
167
168#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
169pub struct TomlKey<'s> {
170    decoded: &'s str,
171    encoding: Option<Encoding>,
172}
173
174impl crate::WriteTomlKey for TomlKey<'_> {
175    fn write_toml_key<W: crate::TomlWrite + ?Sized>(&self, writer: &mut W) -> core::fmt::Result {
176        let newline = false;
177        write_toml_value(self.decoded, self.encoding, newline, writer)
178    }
179}
180
181#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
182#[repr(u8)]
183#[allow(clippy::enum_variant_names)]
184enum Encoding {
185    LiteralString,
186    BasicString,
187    MlLiteralString,
188    MlBasicString,
189}
190
191impl Encoding {}
192
193fn write_toml_value<W: crate::TomlWrite + ?Sized>(
194    decoded: &str,
195    encoding: Option<Encoding>,
196    newline: bool,
197    writer: &mut W,
198) -> core::fmt::Result {
199    let delimiter = match encoding {
200        Some(Encoding::LiteralString) => "'",
201        Some(Encoding::BasicString) => "\"",
202        Some(Encoding::MlLiteralString) => "'''",
203        Some(Encoding::MlBasicString) => "\"\"\"",
204        None => "",
205    };
206    let escaped = match encoding {
207        Some(Encoding::LiteralString) | Some(Encoding::MlLiteralString) => false,
208        Some(Encoding::BasicString) | Some(Encoding::MlBasicString) => true,
209        None => false,
210    };
211    let is_ml = match encoding {
212        Some(Encoding::LiteralString) | Some(Encoding::BasicString) => false,
213        Some(Encoding::MlLiteralString) | Some(Encoding::MlBasicString) => true,
214        None => false,
215    };
216    let newline_prefix = newline && is_ml;
217
218    write!(writer, "{delimiter}")?;
219    if newline_prefix {
220        writer.newline()?;
221    }
222    if escaped {
223        // ```bnf
224        // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
225        // wschar =  %x20  ; Space
226        // wschar =/ %x09  ; Horizontal tab
227        // escape = %x5C                   ; \
228        // ```
229        let max_seq_double_quotes = if is_ml { 2 } else { 0 };
230        let mut stream = decoded;
231        while !stream.is_empty() {
232            let mut unescaped_end = 0;
233            let mut escaped = None;
234            let mut seq_double_quotes = 0;
235            for (i, b) in stream.as_bytes().iter().enumerate() {
236                if *b == b'"' {
237                    seq_double_quotes += 1;
238                    if max_seq_double_quotes < seq_double_quotes {
239                        escaped = Some(r#"\""#);
240                        break;
241                    }
242                } else {
243                    seq_double_quotes = 0;
244                }
245
246                match *b {
247                    0x8 => {
248                        escaped = Some(r#"\b"#);
249                        break;
250                    }
251                    0x9 => {
252                        escaped = Some(r#"\t"#);
253                        break;
254                    }
255                    0xa => {
256                        if !is_ml {
257                            escaped = Some(r#"\n"#);
258                            break;
259                        }
260                    }
261                    0xc => {
262                        escaped = Some(r#"\f"#);
263                        break;
264                    }
265                    0xd => {
266                        escaped = Some(r#"\r"#);
267                        break;
268                    }
269                    0x22 => {} // double quote handled earlier
270                    0x5c => {
271                        escaped = Some(r#"\\"#);
272                        break;
273                    }
274                    c if c <= 0x1f || c == 0x7f => {
275                        break;
276                    }
277                    _ => {}
278                }
279
280                unescaped_end = i + 1;
281            }
282            let unescaped = &stream[0..unescaped_end];
283            let escaped_str = escaped.unwrap_or("");
284            let end = unescaped_end + if escaped.is_some() { 1 } else { 0 };
285            stream = &stream[end..];
286            write!(writer, "{unescaped}{escaped_str}")?;
287            if escaped.is_none() && !stream.is_empty() {
288                let b = stream.as_bytes().first().unwrap();
289                write!(writer, "\\u{:04X}", *b as u32)?;
290                stream = &stream[1..];
291            }
292        }
293    } else {
294        write!(writer, "{decoded}")?;
295    }
296    write!(writer, "{delimiter}")?;
297    Ok(())
298}
299
300#[derive(Copy, Clone, Debug)]
301struct ValueMetrics {
302    max_seq_single_quotes: u8,
303    max_seq_double_quotes: u8,
304    escape_codes: bool,
305    escape: bool,
306    newline: bool,
307}
308
309impl ValueMetrics {
310    fn new() -> Self {
311        Self {
312            max_seq_single_quotes: 0,
313            max_seq_double_quotes: 0,
314            escape_codes: false,
315            escape: false,
316            newline: false,
317        }
318    }
319
320    fn calculate(s: &str) -> Self {
321        let mut metrics = Self::new();
322
323        let mut prev_single_quotes = 0;
324        let mut prev_double_quotes = 0;
325        for byte in s.as_bytes() {
326            if *byte == b'\'' {
327                prev_single_quotes += 1;
328                metrics.max_seq_single_quotes =
329                    metrics.max_seq_single_quotes.max(prev_single_quotes);
330            } else {
331                prev_single_quotes = 0;
332            }
333            if *byte == b'"' {
334                prev_double_quotes += 1;
335                metrics.max_seq_double_quotes =
336                    metrics.max_seq_double_quotes.max(prev_double_quotes);
337            } else {
338                prev_double_quotes = 0;
339            }
340
341            // ```bnf
342            // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
343            //
344            // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
345            // wschar =  %x20  ; Space
346            // wschar =/ %x09  ; Horizontal tab
347            // escape = %x5C                   ; \
348            // ```
349            match *byte {
350                b'\\' => metrics.escape = true,
351                // Escape codes are needed if any ascii control
352                // characters are present, including \b \f \r.
353                b'\t' => {} // always allowed; remaining neutral on this
354                b'\n' => metrics.newline = true,
355                c if c <= 0x1f || c == 0x7f => metrics.escape_codes = true,
356                _ => {}
357            }
358        }
359
360        metrics
361    }
362}
363
364#[derive(Copy, Clone, Debug)]
365struct KeyMetrics {
366    unquoted: bool,
367    single_quotes: bool,
368    double_quotes: bool,
369    escape_codes: bool,
370    escape: bool,
371}
372
373impl KeyMetrics {
374    fn new() -> Self {
375        Self {
376            unquoted: true,
377            single_quotes: false,
378            double_quotes: false,
379            escape_codes: false,
380            escape: false,
381        }
382    }
383
384    fn calculate(s: &str) -> Self {
385        let mut metrics = Self::new();
386
387        metrics.unquoted = !s.is_empty();
388
389        for byte in s.as_bytes() {
390            if !matches!(*byte, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'_') {
391                metrics.unquoted = false;
392            }
393
394            // ```bnf
395            // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
396            //
397            // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
398            //
399            // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
400            // wschar =  %x20  ; Space
401            // wschar =/ %x09  ; Horizontal tab
402            // escape = %x5C                   ; \
403            // ```
404            match *byte {
405                b'\'' => metrics.single_quotes = true,
406                b'"' => metrics.double_quotes = true,
407                b'\\' => metrics.escape = true,
408                // Escape codes are needed if any ascii control
409                // characters are present, including \b \f \r.
410                b'\t' => {} // always allowed
411                c if c <= 0x1f || c == 0x7f => metrics.escape_codes = true,
412                _ => {}
413            }
414        }
415
416        metrics
417    }
418}