toml_writer/
string.rs

1/// Describes how a TOML string (key or value) should be formatted.
2///
3/// # Example
4///
5/// ```rust
6/// # #[cfg(feature = "alloc")] {
7/// # use toml_writer::ToTomlValue as _;
8/// let string = "Hello
9/// world!
10/// ";
11/// let string = toml_writer::TomlStringBuilder::new(string).as_default();
12/// let string = string.to_toml_value();
13/// assert_eq!(string, r#""""
14/// Hello
15/// world!
16/// """"#);
17/// # }
18/// ```
19#[derive(Copy, Clone, Debug)]
20pub struct TomlStringBuilder<'s> {
21    decoded: &'s str,
22    metrics: ValueMetrics,
23}
24
25impl<'s> TomlStringBuilder<'s> {
26    pub fn new(decoded: &'s str) -> Self {
27        Self {
28            decoded,
29            metrics: ValueMetrics::calculate(decoded),
30        }
31    }
32
33    pub fn as_default(&self) -> TomlString<'s> {
34        self.as_basic_pretty()
35            .or_else(|| self.as_literal())
36            .or_else(|| self.as_ml_basic_pretty())
37            .or_else(|| self.as_ml_literal())
38            .unwrap_or_else(|| {
39                if self.metrics.newline {
40                    self.as_ml_basic()
41                } else {
42                    self.as_basic()
43                }
44            })
45    }
46
47    pub fn as_literal(&self) -> Option<TomlString<'s>> {
48        if self.metrics.escape_codes
49            || 0 < self.metrics.max_seq_single_quotes
50            || self.metrics.newline
51        {
52            None
53        } else {
54            Some(TomlString {
55                decoded: self.decoded,
56                encoding: Encoding::LiteralString,
57                newline: self.metrics.newline,
58            })
59        }
60    }
61
62    pub fn as_ml_literal(&self) -> Option<TomlString<'s>> {
63        if self.metrics.escape_codes || 2 < self.metrics.max_seq_single_quotes {
64            None
65        } else {
66            Some(TomlString {
67                decoded: self.decoded,
68                encoding: Encoding::MlLiteralString,
69                newline: self.metrics.newline,
70            })
71        }
72    }
73
74    pub fn as_basic_pretty(&self) -> Option<TomlString<'s>> {
75        if self.metrics.escape_codes
76            || self.metrics.escape
77            || 0 < self.metrics.max_seq_double_quotes
78            || self.metrics.newline
79        {
80            None
81        } else {
82            Some(self.as_basic())
83        }
84    }
85
86    pub fn as_ml_basic_pretty(&self) -> Option<TomlString<'s>> {
87        if self.metrics.escape_codes
88            || self.metrics.escape
89            || 2 < self.metrics.max_seq_double_quotes
90        {
91            None
92        } else {
93            Some(self.as_ml_basic())
94        }
95    }
96
97    pub fn as_basic(&self) -> TomlString<'s> {
98        TomlString {
99            decoded: self.decoded,
100            encoding: Encoding::BasicString,
101            newline: self.metrics.newline,
102        }
103    }
104
105    pub fn as_ml_basic(&self) -> TomlString<'s> {
106        TomlString {
107            decoded: self.decoded,
108            encoding: Encoding::MlBasicString,
109            newline: self.metrics.newline,
110        }
111    }
112}
113
114#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
115pub struct TomlString<'s> {
116    decoded: &'s str,
117    encoding: Encoding,
118    newline: bool,
119}
120
121impl crate::WriteTomlValue for TomlString<'_> {
122    fn write_toml_value<W: crate::TomlWrite + ?Sized>(&self, writer: &mut W) -> core::fmt::Result {
123        write_toml_value(self.decoded, Some(self.encoding), self.newline, writer)
124    }
125}
126
127#[derive(Copy, Clone, Debug)]
128pub struct TomlKeyBuilder<'s> {
129    decoded: &'s str,
130    metrics: KeyMetrics,
131}
132
133impl<'s> TomlKeyBuilder<'s> {
134    pub fn new(decoded: &'s str) -> Self {
135        Self {
136            decoded,
137            metrics: KeyMetrics::calculate(decoded),
138        }
139    }
140
141    pub fn as_default(&self) -> TomlKey<'s> {
142        self.as_unquoted()
143            .or_else(|| self.as_basic_pretty())
144            .or_else(|| self.as_literal())
145            .unwrap_or_else(|| self.as_basic())
146    }
147
148    pub fn as_unquoted(&self) -> Option<TomlKey<'s>> {
149        if self.metrics.unquoted {
150            Some(TomlKey {
151                decoded: self.decoded,
152                encoding: None,
153            })
154        } else {
155            None
156        }
157    }
158
159    pub fn as_literal(&self) -> Option<TomlKey<'s>> {
160        if self.metrics.escape_codes || self.metrics.single_quotes {
161            None
162        } else {
163            Some(TomlKey {
164                decoded: self.decoded,
165                encoding: Some(Encoding::LiteralString),
166            })
167        }
168    }
169
170    pub fn as_basic_pretty(&self) -> Option<TomlKey<'s>> {
171        if self.metrics.escape_codes || self.metrics.escape || self.metrics.double_quotes {
172            None
173        } else {
174            Some(self.as_basic())
175        }
176    }
177
178    pub fn as_basic(&self) -> TomlKey<'s> {
179        TomlKey {
180            decoded: self.decoded,
181            encoding: Some(Encoding::BasicString),
182        }
183    }
184}
185
186#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
187pub struct TomlKey<'s> {
188    decoded: &'s str,
189    encoding: Option<Encoding>,
190}
191
192impl crate::WriteTomlKey for TomlKey<'_> {
193    fn write_toml_key<W: crate::TomlWrite + ?Sized>(&self, writer: &mut W) -> core::fmt::Result {
194        let newline = false;
195        write_toml_value(self.decoded, self.encoding, newline, writer)
196    }
197}
198
199#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)]
200#[repr(u8)]
201#[allow(clippy::enum_variant_names)]
202enum Encoding {
203    LiteralString,
204    BasicString,
205    MlLiteralString,
206    MlBasicString,
207}
208
209impl Encoding {}
210
211fn write_toml_value<W: crate::TomlWrite + ?Sized>(
212    decoded: &str,
213    encoding: Option<Encoding>,
214    newline: bool,
215    writer: &mut W,
216) -> core::fmt::Result {
217    let delimiter = match encoding {
218        Some(Encoding::LiteralString) => "'",
219        Some(Encoding::BasicString) => "\"",
220        Some(Encoding::MlLiteralString) => "'''",
221        Some(Encoding::MlBasicString) => "\"\"\"",
222        None => "",
223    };
224    let escaped = match encoding {
225        Some(Encoding::LiteralString) | Some(Encoding::MlLiteralString) => false,
226        Some(Encoding::BasicString) | Some(Encoding::MlBasicString) => true,
227        None => false,
228    };
229    let is_ml = match encoding {
230        Some(Encoding::LiteralString) | Some(Encoding::BasicString) => false,
231        Some(Encoding::MlLiteralString) | Some(Encoding::MlBasicString) => true,
232        None => false,
233    };
234    let newline_prefix = newline && is_ml;
235
236    write!(writer, "{delimiter}")?;
237    if newline_prefix {
238        writer.newline()?;
239    }
240    if escaped {
241        // ```bnf
242        // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
243        // wschar =  %x20  ; Space
244        // wschar =/ %x09  ; Horizontal tab
245        // escape = %x5C                   ; \
246        // ```
247        let max_seq_double_quotes = if is_ml { 2 } else { 0 };
248        let mut stream = decoded;
249        while !stream.is_empty() {
250            let mut unescaped_end = 0;
251            let mut escaped = None;
252            let mut seq_double_quotes = 0;
253            for (i, b) in stream.as_bytes().iter().enumerate() {
254                if *b == b'"' {
255                    seq_double_quotes += 1;
256                    if max_seq_double_quotes < seq_double_quotes {
257                        escaped = Some(r#"\""#);
258                        break;
259                    }
260                } else {
261                    seq_double_quotes = 0;
262                }
263
264                match *b {
265                    0x8 => {
266                        escaped = Some(r#"\b"#);
267                        break;
268                    }
269                    0x9 => {
270                        escaped = Some(r#"\t"#);
271                        break;
272                    }
273                    0xa => {
274                        if !is_ml {
275                            escaped = Some(r#"\n"#);
276                            break;
277                        }
278                    }
279                    0xc => {
280                        escaped = Some(r#"\f"#);
281                        break;
282                    }
283                    0xd => {
284                        escaped = Some(r#"\r"#);
285                        break;
286                    }
287                    0x22 => {} // double quote handled earlier
288                    0x5c => {
289                        escaped = Some(r#"\\"#);
290                        break;
291                    }
292                    c if c <= 0x1f || c == 0x7f => {
293                        break;
294                    }
295                    _ => {}
296                }
297
298                unescaped_end = i + 1;
299            }
300            let unescaped = &stream[0..unescaped_end];
301            let escaped_str = escaped.unwrap_or("");
302            let end = unescaped_end + if escaped.is_some() { 1 } else { 0 };
303            stream = &stream[end..];
304            write!(writer, "{unescaped}{escaped_str}")?;
305            if escaped.is_none() && !stream.is_empty() {
306                let b = stream.as_bytes().first().unwrap();
307                write!(writer, "\\u{:04X}", *b as u32)?;
308                stream = &stream[1..];
309            }
310        }
311    } else {
312        write!(writer, "{decoded}")?;
313    }
314    write!(writer, "{delimiter}")?;
315    Ok(())
316}
317
318#[derive(Copy, Clone, Debug)]
319struct ValueMetrics {
320    max_seq_single_quotes: u8,
321    max_seq_double_quotes: u8,
322    escape_codes: bool,
323    escape: bool,
324    newline: bool,
325}
326
327impl ValueMetrics {
328    fn new() -> Self {
329        Self {
330            max_seq_single_quotes: 0,
331            max_seq_double_quotes: 0,
332            escape_codes: false,
333            escape: false,
334            newline: false,
335        }
336    }
337
338    fn calculate(s: &str) -> Self {
339        let mut metrics = Self::new();
340
341        let mut prev_single_quotes = 0;
342        let mut prev_double_quotes = 0;
343        for byte in s.as_bytes() {
344            if *byte == b'\'' {
345                prev_single_quotes += 1;
346                metrics.max_seq_single_quotes =
347                    metrics.max_seq_single_quotes.max(prev_single_quotes);
348            } else {
349                prev_single_quotes = 0;
350            }
351            if *byte == b'"' {
352                prev_double_quotes += 1;
353                metrics.max_seq_double_quotes =
354                    metrics.max_seq_double_quotes.max(prev_double_quotes);
355            } else {
356                prev_double_quotes = 0;
357            }
358
359            // ```bnf
360            // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
361            //
362            // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
363            // wschar =  %x20  ; Space
364            // wschar =/ %x09  ; Horizontal tab
365            // escape = %x5C                   ; \
366            // ```
367            match *byte {
368                b'\\' => metrics.escape = true,
369                // Escape codes are needed if any ascii control
370                // characters are present, including \b \f \r.
371                b'\t' => {} // always allowed; remaining neutral on this
372                b'\n' => metrics.newline = true,
373                c if c <= 0x1f || c == 0x7f => metrics.escape_codes = true,
374                _ => {}
375            }
376        }
377
378        metrics
379    }
380}
381
382#[derive(Copy, Clone, Debug)]
383struct KeyMetrics {
384    unquoted: bool,
385    single_quotes: bool,
386    double_quotes: bool,
387    escape_codes: bool,
388    escape: bool,
389}
390
391impl KeyMetrics {
392    fn new() -> Self {
393        Self {
394            unquoted: true,
395            single_quotes: false,
396            double_quotes: false,
397            escape_codes: false,
398            escape: false,
399        }
400    }
401
402    fn calculate(s: &str) -> Self {
403        let mut metrics = Self::new();
404
405        metrics.unquoted = !s.is_empty();
406
407        for byte in s.as_bytes() {
408            if !matches!(*byte, b'a'..=b'z' | b'A'..=b'Z' | b'0'..=b'9' | b'-' | b'_') {
409                metrics.unquoted = false;
410            }
411
412            // ```bnf
413            // unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
414            //
415            // literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
416            //
417            // basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
418            // wschar =  %x20  ; Space
419            // wschar =/ %x09  ; Horizontal tab
420            // escape = %x5C                   ; \
421            // ```
422            match *byte {
423                b'\'' => metrics.single_quotes = true,
424                b'"' => metrics.double_quotes = true,
425                b'\\' => metrics.escape = true,
426                // Escape codes are needed if any ascii control
427                // characters are present, including \b \f \r.
428                b'\t' => {} // always allowed
429                c if c <= 0x1f || c == 0x7f => metrics.escape_codes = true,
430                _ => {}
431            }
432        }
433
434        metrics
435    }
436}