toml_parser/decoder/
string.rs

1use core::ops::RangeInclusive;
2
3use winnow::stream::ContainsToken as _;
4use winnow::stream::Offset as _;
5use winnow::stream::Stream as _;
6
7use crate::decoder::StringBuilder;
8use crate::lexer::APOSTROPHE;
9use crate::lexer::ML_BASIC_STRING_DELIM;
10use crate::lexer::ML_LITERAL_STRING_DELIM;
11use crate::lexer::QUOTATION_MARK;
12use crate::lexer::WSCHAR;
13use crate::ErrorSink;
14use crate::Expected;
15use crate::ParseError;
16use crate::Raw;
17use crate::Span;
18
19const ALLOCATION_ERROR: &str = "could not allocate for string";
20
21/// Parse literal string
22///
23/// ```bnf
24/// ;; Literal String
25///
26/// literal-string = apostrophe *literal-char apostrophe
27///
28/// apostrophe = %x27 ; ' apostrophe
29///
30/// literal-char = %x09 / %x20-26 / %x28-7E / non-ascii
31/// ```
32pub(crate) fn decode_literal_string<'i>(
33    raw: Raw<'i>,
34    output: &mut dyn StringBuilder<'i>,
35    error: &mut dyn ErrorSink,
36) {
37    const INVALID_STRING: &str = "invalid literal string";
38
39    output.clear();
40
41    let s = raw.as_str();
42    let s = if let Some(stripped) = s.strip_prefix(APOSTROPHE as char) {
43        stripped
44    } else {
45        error.report_error(
46            ParseError::new(INVALID_STRING)
47                .with_context(Span::new_unchecked(0, raw.len()))
48                .with_expected(&[Expected::Literal("'")])
49                .with_unexpected(Span::new_unchecked(0, 0)),
50        );
51        s
52    };
53    let s = if let Some(stripped) = s.strip_suffix(APOSTROPHE as char) {
54        stripped
55    } else {
56        error.report_error(
57            ParseError::new(INVALID_STRING)
58                .with_context(Span::new_unchecked(0, raw.len()))
59                .with_expected(&[Expected::Literal("'")])
60                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
61        );
62        s
63    };
64
65    for (i, b) in s.as_bytes().iter().enumerate() {
66        if !LITERAL_CHAR.contains_token(b) {
67            let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes());
68            error.report_error(
69                ParseError::new(INVALID_STRING)
70                    .with_context(Span::new_unchecked(0, raw.len()))
71                    .with_expected(&[Expected::Description("non-single-quote visible characters")])
72                    .with_unexpected(Span::new_unchecked(offset, offset)),
73            );
74        }
75    }
76
77    if !output.push_str(s) {
78        error.report_error(
79            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
80        );
81    }
82}
83
84/// `literal-char = %x09 / %x20-26 / %x28-7E / non-ascii`
85const LITERAL_CHAR: (
86    u8,
87    RangeInclusive<u8>,
88    RangeInclusive<u8>,
89    RangeInclusive<u8>,
90) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
91
92/// `non-ascii = %x80-D7FF / %xE000-10FFFF`
93/// - ASCII is 0xxxxxxx
94/// - First byte for UTF-8 is 11xxxxxx
95/// - Subsequent UTF-8 bytes are 10xxxxxx
96const NON_ASCII: RangeInclusive<u8> = 0x80..=0xff;
97
98/// Parse multi-line literal string
99///
100/// ```bnf
101/// ;; Multiline Literal String
102///
103/// ml-literal-string = ml-literal-string-delim [ newline ] ml-literal-body
104///                     ml-literal-string-delim
105/// ml-literal-string-delim = 3apostrophe
106/// ml-literal-body = *mll-content *( mll-quotes 1*mll-content ) [ mll-quotes ]
107///
108/// mll-content = mll-char / newline
109/// mll-quotes = 1*2apostrophe
110/// ```
111pub(crate) fn decode_ml_literal_string<'i>(
112    raw: Raw<'i>,
113    output: &mut dyn StringBuilder<'i>,
114    error: &mut dyn ErrorSink,
115) {
116    const INVALID_STRING: &str = "invalid multi-line literal string";
117    output.clear();
118
119    let s = raw.as_str();
120    let s = if let Some(stripped) = s.strip_prefix(ML_LITERAL_STRING_DELIM) {
121        stripped
122    } else {
123        error.report_error(
124            ParseError::new(INVALID_STRING)
125                .with_context(Span::new_unchecked(0, raw.len()))
126                .with_expected(&[Expected::Literal("'")])
127                .with_unexpected(Span::new_unchecked(0, 0)),
128        );
129        s
130    };
131    let s = strip_start_newline(s);
132    let s = if let Some(stripped) = s.strip_suffix(ML_LITERAL_STRING_DELIM) {
133        stripped
134    } else {
135        error.report_error(
136            ParseError::new(INVALID_STRING)
137                .with_context(Span::new_unchecked(0, raw.len()))
138                .with_expected(&[Expected::Literal("'")])
139                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
140        );
141        s.trim_end_matches('\'')
142    };
143
144    for (i, b) in s.as_bytes().iter().enumerate() {
145        if *b == b'\'' || *b == b'\n' {
146        } else if *b == b'\r' {
147            if s.as_bytes().get(i + 1) != Some(&b'\n') {
148                let offset = (&s.as_bytes()[i + 1..]).offset_from(&raw.as_bytes());
149                error.report_error(
150                    ParseError::new("carriage return must be followed by newline")
151                        .with_context(Span::new_unchecked(0, raw.len()))
152                        .with_expected(&[Expected::Literal("\n")])
153                        .with_unexpected(Span::new_unchecked(offset, offset)),
154                );
155            }
156        } else if !MLL_CHAR.contains_token(b) {
157            let offset = (&s.as_bytes()[i..]).offset_from(&raw.as_bytes());
158            error.report_error(
159                ParseError::new(INVALID_STRING)
160                    .with_context(Span::new_unchecked(0, raw.len()))
161                    .with_expected(&[Expected::Description("non-single-quote characters")])
162                    .with_unexpected(Span::new_unchecked(offset, offset)),
163            );
164        }
165    }
166
167    if !output.push_str(s) {
168        error.report_error(
169            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
170        );
171    }
172}
173
174/// `mll-char = %x09 / %x20-26 / %x28-7E / non-ascii`
175const MLL_CHAR: (
176    u8,
177    RangeInclusive<u8>,
178    RangeInclusive<u8>,
179    RangeInclusive<u8>,
180) = (0x9, 0x20..=0x26, 0x28..=0x7E, NON_ASCII);
181
182/// Parse basic string
183///
184/// ```bnf
185/// ;; Basic String
186///
187/// basic-string = quotation-mark *basic-char quotation-mark
188///
189/// basic-char = basic-unescaped / escaped
190///
191/// escaped = escape escape-seq-char
192/// ```
193pub(crate) fn decode_basic_string<'i>(
194    raw: Raw<'i>,
195    output: &mut dyn StringBuilder<'i>,
196    error: &mut dyn ErrorSink,
197) {
198    const INVALID_STRING: &str = "invalid basic string";
199    output.clear();
200
201    let s = raw.as_str();
202    let s = if let Some(stripped) = s.strip_prefix(QUOTATION_MARK as char) {
203        stripped
204    } else {
205        error.report_error(
206            ParseError::new(INVALID_STRING)
207                .with_context(Span::new_unchecked(0, raw.len()))
208                .with_expected(&[Expected::Literal("\"")])
209                .with_unexpected(Span::new_unchecked(0, 0)),
210        );
211        s
212    };
213    let mut s = if let Some(stripped) = s.strip_suffix(QUOTATION_MARK as char) {
214        stripped
215    } else {
216        error.report_error(
217            ParseError::new(INVALID_STRING)
218                .with_context(Span::new_unchecked(0, raw.len()))
219                .with_expected(&[Expected::Literal("\"")])
220                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
221        );
222        s
223    };
224
225    let segment = basic_unescaped(&mut s);
226    if !output.push_str(segment) {
227        error.report_error(
228            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
229        );
230    }
231    while !s.is_empty() {
232        if s.starts_with("\\") {
233            let _ = s.next_token();
234
235            let c = escape_seq_char(&mut s, raw, error);
236            if !output.push_char(c) {
237                error.report_error(
238                    ParseError::new(ALLOCATION_ERROR)
239                        .with_unexpected(Span::new_unchecked(0, raw.len())),
240                );
241            }
242        } else {
243            let invalid = basic_invalid(&mut s);
244            let start = invalid.offset_from(&raw.as_str());
245            let end = start + invalid.len();
246            error.report_error(
247                ParseError::new(INVALID_STRING)
248                    .with_context(Span::new_unchecked(0, raw.len()))
249                    .with_expected(&[
250                        Expected::Description("non-double-quote visible characters"),
251                        Expected::Literal("\\"),
252                    ])
253                    .with_unexpected(Span::new_unchecked(start, end)),
254            );
255            let _ = output.push_str(invalid);
256        }
257
258        let segment = basic_unescaped(&mut s);
259        if !output.push_str(segment) {
260            let start = segment.offset_from(&raw.as_str());
261            let end = start + segment.len();
262            error.report_error(
263                ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)),
264            );
265        }
266    }
267}
268
269/// `basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii`
270fn basic_unescaped<'i>(stream: &mut &'i str) -> &'i str {
271    let offset = stream
272        .as_bytes()
273        .offset_for(|b| !BASIC_UNESCAPED.contains_token(b))
274        .unwrap_or(stream.len());
275    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
276    unsafe {
277        stream.next_slice_unchecked(offset)
278    }
279    #[cfg(not(feature = "unsafe"))]
280    stream.next_slice(offset)
281}
282
283fn basic_invalid<'i>(stream: &mut &'i str) -> &'i str {
284    let offset = stream
285        .as_bytes()
286        .offset_for(|b| (BASIC_UNESCAPED, ESCAPE).contains_token(b))
287        .unwrap_or(stream.len());
288    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
289    unsafe {
290        stream.next_slice_unchecked(offset)
291    }
292    #[cfg(not(feature = "unsafe"))]
293    stream.next_slice(offset)
294}
295
296/// `basic-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii`
297#[allow(clippy::type_complexity)]
298const BASIC_UNESCAPED: (
299    (u8, u8),
300    u8,
301    RangeInclusive<u8>,
302    RangeInclusive<u8>,
303    RangeInclusive<u8>,
304) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
305
306/// `escape = %x5C                    ; \`
307const ESCAPE: u8 = b'\\';
308
309/// ```bnf
310/// escape-seq-char =  %x22         ; "    quotation mark  U+0022
311/// escape-seq-char =/ %x5C         ; \    reverse solidus U+005C
312/// escape-seq-char =/ %x62         ; b    backspace       U+0008
313/// escape-seq-char =/ %x66         ; f    form feed       U+000C
314/// escape-seq-char =/ %x6E         ; n    line feed       U+000A
315/// escape-seq-char =/ %x72         ; r    carriage return U+000D
316/// escape-seq-char =/ %x74         ; t    tab             U+0009
317/// escape-seq-char =/ %x75 4HEXDIG ; uXXXX                U+XXXX
318/// escape-seq-char =/ %x55 8HEXDIG ; UXXXXXXXX            U+XXXXXXXX
319/// ```
320fn escape_seq_char(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) -> char {
321    const EXPECTED_ESCAPES: &[Expected] = &[
322        Expected::Literal("b"),
323        Expected::Literal("f"),
324        Expected::Literal("n"),
325        Expected::Literal("r"),
326        Expected::Literal("\\"),
327        Expected::Literal("\""),
328        Expected::Literal("u"),
329        Expected::Literal("U"),
330    ];
331
332    let start = stream.checkpoint();
333    let Some(id) = stream.next_token() else {
334        let offset = stream.offset_from(&raw.as_str());
335        error.report_error(
336            ParseError::new("missing escaped value")
337                .with_context(Span::new_unchecked(0, raw.len()))
338                .with_expected(EXPECTED_ESCAPES)
339                .with_unexpected(Span::new_unchecked(offset, offset)),
340        );
341        return '\\';
342    };
343    match id {
344        'b' => '\u{8}',
345        'f' => '\u{c}',
346        'n' => '\n',
347        'r' => '\r',
348        't' => '\t',
349        'u' => hexescape(stream, 4, raw, error),
350        'U' => hexescape(stream, 8, raw, error),
351        '\\' => '\\',
352        '"' => '"',
353        _ => {
354            stream.reset(&start);
355            let offset = stream.offset_from(&raw.as_str());
356            error.report_error(
357                ParseError::new("missing escaped value")
358                    .with_context(Span::new_unchecked(0, raw.len()))
359                    .with_expected(EXPECTED_ESCAPES)
360                    .with_unexpected(Span::new_unchecked(offset, offset)),
361            );
362            '\\'
363        }
364    }
365}
366
367fn hexescape(
368    stream: &mut &str,
369    num_digits: usize,
370    raw: Raw<'_>,
371    error: &mut dyn ErrorSink,
372) -> char {
373    let offset = stream
374        .as_bytes()
375        .offset_for(|b| !HEXDIG.contains_token(b))
376        .unwrap_or_else(|| stream.eof_offset())
377        .min(num_digits);
378    #[cfg(feature = "unsafe")] // SAFETY: HEXDIG ensure `offset` is along UTF-8 boundary
379    let value = unsafe { stream.next_slice_unchecked(offset) };
380    #[cfg(not(feature = "unsafe"))]
381    let value = stream.next_slice(offset);
382
383    if value.len() != num_digits {
384        let offset = stream.offset_from(&raw.as_str());
385        error.report_error(
386            ParseError::new("too few unicode value digits")
387                .with_context(Span::new_unchecked(0, raw.len()))
388                .with_expected(&[Expected::Description("unicode hexadecimal value")])
389                .with_unexpected(Span::new_unchecked(offset, offset)),
390        );
391        return '�';
392    }
393
394    let Some(value) = u32::from_str_radix(value, 16).ok().and_then(char::from_u32) else {
395        let offset = value.offset_from(&raw.as_str());
396        error.report_error(
397            ParseError::new("invalid value")
398                .with_context(Span::new_unchecked(0, raw.len()))
399                .with_expected(&[Expected::Description("unicode hexadecimal value")])
400                .with_unexpected(Span::new_unchecked(offset, offset)),
401        );
402        return '�';
403    };
404
405    value
406}
407
408/// `HEXDIG = DIGIT / "A" / "B" / "C" / "D" / "E" / "F"`
409const HEXDIG: (RangeInclusive<u8>, RangeInclusive<u8>, RangeInclusive<u8>) =
410    (DIGIT, b'A'..=b'F', b'a'..=b'f');
411
412/// `DIGIT = %x30-39 ; 0-9`
413const DIGIT: RangeInclusive<u8> = b'0'..=b'9';
414
415fn strip_start_newline(s: &str) -> &str {
416    s.strip_prefix('\n')
417        .or_else(|| s.strip_prefix("\r\n"))
418        .unwrap_or(s)
419}
420
421/// Parse multi-line basic string
422///
423/// ```bnf
424/// ;; Multiline Basic String
425///
426/// ml-basic-string = ml-basic-string-delim [ newline ] ml-basic-body
427///                   ml-basic-string-delim
428/// ml-basic-string-delim = 3quotation-mark
429///
430/// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
431///
432/// mlb-content = mlb-char / newline / mlb-escaped-nl
433/// mlb-char = mlb-unescaped / escaped
434/// mlb-quotes = 1*2quotation-mark
435/// ```
436pub(crate) fn decode_ml_basic_string<'i>(
437    raw: Raw<'i>,
438    output: &mut dyn StringBuilder<'i>,
439    error: &mut dyn ErrorSink,
440) {
441    const INVALID_STRING: &str = "invalid multi-line basic string";
442
443    let s = raw.as_str();
444    let s = if let Some(stripped) = s.strip_prefix(ML_BASIC_STRING_DELIM) {
445        stripped
446    } else {
447        error.report_error(
448            ParseError::new(INVALID_STRING)
449                .with_context(Span::new_unchecked(0, raw.len()))
450                .with_expected(&[Expected::Literal("\"")])
451                .with_unexpected(Span::new_unchecked(0, 0)),
452        );
453        s
454    };
455    let s = strip_start_newline(s);
456    let mut s = if let Some(stripped) = s.strip_suffix(ML_BASIC_STRING_DELIM) {
457        stripped
458    } else {
459        error.report_error(
460            ParseError::new(INVALID_STRING)
461                .with_context(Span::new_unchecked(0, raw.len()))
462                .with_expected(&[Expected::Literal("\"")])
463                .with_unexpected(Span::new_unchecked(raw.len(), raw.len())),
464        );
465        s
466    };
467
468    let segment = mlb_unescaped(&mut s);
469    if !output.push_str(segment) {
470        error.report_error(
471            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
472        );
473    }
474    while !s.is_empty() {
475        if s.starts_with("\\") {
476            let _ = s.next_token();
477
478            if s.as_bytes()
479                .first()
480                .map(|b| (WSCHAR, b'\r', b'\n').contains_token(b))
481                .unwrap_or(false)
482            {
483                mlb_escaped_nl(&mut s, raw, error);
484            } else {
485                let c = escape_seq_char(&mut s, raw, error);
486                if !output.push_char(c) {
487                    error.report_error(
488                        ParseError::new(ALLOCATION_ERROR)
489                            .with_unexpected(Span::new_unchecked(0, raw.len())),
490                    );
491                }
492            }
493        } else if s.starts_with("\r") {
494            let offset = if s.starts_with("\r\n") {
495                "\r\n".len()
496            } else {
497                let start = s.offset_from(&raw.as_str()) + 1;
498                error.report_error(
499                    ParseError::new("carriage return must be followed by newline")
500                        .with_context(Span::new_unchecked(0, raw.len()))
501                        .with_expected(&[Expected::Literal("\n")])
502                        .with_unexpected(Span::new_unchecked(start, start)),
503                );
504                "\r".len()
505            };
506            #[cfg(feature = "unsafe")]
507            // SAFETY: Newlines ensure `offset` is along UTF-8 boundary
508            let newline = unsafe { s.next_slice_unchecked(offset) };
509            #[cfg(not(feature = "unsafe"))]
510            let newline = s.next_slice(offset);
511            if !output.push_str(newline) {
512                let start = newline.offset_from(&raw.as_str());
513                let end = start + newline.len();
514                error.report_error(
515                    ParseError::new(ALLOCATION_ERROR)
516                        .with_unexpected(Span::new_unchecked(start, end)),
517                );
518            }
519        } else {
520            let invalid = mlb_invalid(&mut s);
521            let start = invalid.offset_from(&raw.as_str());
522            let end = start + invalid.len();
523            error.report_error(
524                ParseError::new(INVALID_STRING)
525                    .with_context(Span::new_unchecked(0, raw.len()))
526                    .with_expected(&[Expected::Literal("\\"), Expected::Description("characters")])
527                    .with_unexpected(Span::new_unchecked(start, end)),
528            );
529            let _ = output.push_str(invalid);
530        }
531
532        let segment = mlb_unescaped(&mut s);
533        if !output.push_str(segment) {
534            let start = segment.offset_from(&raw.as_str());
535            let end = start + segment.len();
536            error.report_error(
537                ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(start, end)),
538            );
539        }
540    }
541}
542
543/// ```bnf
544/// mlb-escaped-nl = escape ws newline *( wschar / newline )
545/// ```
546fn mlb_escaped_nl(stream: &mut &str, raw: Raw<'_>, error: &mut dyn ErrorSink) {
547    const INVALID_STRING: &str = "invalid multi-line basic string";
548    let ws_offset = stream
549        .as_bytes()
550        .offset_for(|b| !WSCHAR.contains_token(b))
551        .unwrap_or(stream.len());
552    #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary
553    unsafe {
554        stream.next_slice_unchecked(ws_offset);
555    }
556    #[cfg(not(feature = "unsafe"))]
557    stream.next_slice(ws_offset);
558
559    let start = stream.checkpoint();
560    match stream.next_token() {
561        Some('\n') => {}
562        Some('\r') => {
563            if stream.as_bytes().first() == Some(&b'\n') {
564                let _ = stream.next_token();
565            } else {
566                let start = stream.offset_from(&raw.as_str());
567                let end = start;
568                error.report_error(
569                    ParseError::new("carriage return must be followed by newline")
570                        .with_context(Span::new_unchecked(0, raw.len()))
571                        .with_expected(&[Expected::Literal("\n")])
572                        .with_unexpected(Span::new_unchecked(start, end)),
573                );
574            }
575        }
576        _ => {
577            stream.reset(&start);
578
579            let start = stream.offset_from(&raw.as_str());
580            let end = start;
581            error.report_error(
582                ParseError::new(INVALID_STRING)
583                    .with_context(Span::new_unchecked(0, raw.len()))
584                    .with_expected(&[Expected::Literal("\n")])
585                    .with_unexpected(Span::new_unchecked(start, end)),
586            );
587        }
588    }
589
590    loop {
591        let start_offset = stream.offset_from(&raw.as_str());
592
593        let offset = stream
594            .as_bytes()
595            .offset_for(|b| !(WSCHAR, b'\n').contains_token(b))
596            .unwrap_or(stream.len());
597        #[cfg(feature = "unsafe")] // SAFETY: WSCHAR ensure `offset` is along UTF-8 boundary
598        unsafe {
599            stream.next_slice_unchecked(offset);
600        }
601        #[cfg(not(feature = "unsafe"))]
602        stream.next_slice(offset);
603
604        if stream.starts_with("\r") {
605            let offset = if stream.starts_with("\r\n") {
606                "\r\n".len()
607            } else {
608                let start = stream.offset_from(&raw.as_str()) + 1;
609                error.report_error(
610                    ParseError::new("carriage return must be followed by newline")
611                        .with_context(Span::new_unchecked(0, raw.len()))
612                        .with_expected(&[Expected::Literal("\n")])
613                        .with_unexpected(Span::new_unchecked(start, start)),
614                );
615                "\r".len()
616            };
617            #[cfg(feature = "unsafe")]
618            // SAFETY: Newlines ensure `offset` is along UTF-8 boundary
619            let _ = unsafe { stream.next_slice_unchecked(offset) };
620            #[cfg(not(feature = "unsafe"))]
621            let _ = stream.next_slice(offset);
622        }
623
624        let end_offset = stream.offset_from(&raw.as_str());
625        if start_offset == end_offset {
626            break;
627        }
628    }
629}
630
631/// `mlb-unescaped` extended with `mlb-quotes` and `LF`
632///
633/// **warning:** `newline` is not validated
634///
635/// ```bnf
636/// ml-basic-body = *mlb-content *( mlb-quotes 1*mlb-content ) [ mlb-quotes ]
637///
638/// mlb-content = mlb-char / newline / mlb-escaped-nl
639/// mlb-char = mlb-unescaped / escaped
640/// mlb-quotes = 1*2quotation-mark
641/// mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii
642/// ```
643fn mlb_unescaped<'i>(stream: &mut &'i str) -> &'i str {
644    let offset = stream
645        .as_bytes()
646        .offset_for(|b| !(MLB_UNESCAPED, b'"', b'\n').contains_token(b))
647        .unwrap_or(stream.len());
648    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
649    unsafe {
650        stream.next_slice_unchecked(offset)
651    }
652    #[cfg(not(feature = "unsafe"))]
653    stream.next_slice(offset)
654}
655
656fn mlb_invalid<'i>(stream: &mut &'i str) -> &'i str {
657    let offset = stream
658        .as_bytes()
659        .offset_for(|b| (MLB_UNESCAPED, b'"', b'\n', ESCAPE, '\r').contains_token(b))
660        .unwrap_or(stream.len());
661    #[cfg(feature = "unsafe")] // SAFETY: BASIC_UNESCAPED ensure `offset` is along UTF-8 boundary
662    unsafe {
663        stream.next_slice_unchecked(offset)
664    }
665    #[cfg(not(feature = "unsafe"))]
666    stream.next_slice(offset)
667}
668
669/// `mlb-unescaped = wschar / %x21 / %x23-5B / %x5D-7E / non-ascii`
670#[allow(clippy::type_complexity)]
671const MLB_UNESCAPED: (
672    (u8, u8),
673    u8,
674    RangeInclusive<u8>,
675    RangeInclusive<u8>,
676    RangeInclusive<u8>,
677) = (WSCHAR, 0x21, 0x23..=0x5B, 0x5D..=0x7E, NON_ASCII);
678
679/// Parse unquoted key
680///
681/// ```bnf
682/// unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _
683/// ```
684pub(crate) fn decode_unquoted_key<'i>(
685    raw: Raw<'i>,
686    output: &mut dyn StringBuilder<'i>,
687    error: &mut dyn ErrorSink,
688) {
689    let s = raw.as_str();
690
691    if s.is_empty() {
692        error.report_error(
693            ParseError::new("unquoted keys cannot be empty")
694                .with_context(Span::new_unchecked(0, s.len()))
695                .with_expected(&[
696                    Expected::Description("letters"),
697                    Expected::Description("numbers"),
698                    Expected::Literal("-"),
699                    Expected::Literal("_"),
700                ])
701                .with_unexpected(Span::new_unchecked(0, s.len())),
702        );
703    }
704
705    for (i, b) in s.as_bytes().iter().enumerate() {
706        if !UNQUOTED_CHAR.contains_token(b) {
707            error.report_error(
708                ParseError::new("invalid unquoted key")
709                    .with_context(Span::new_unchecked(0, s.len()))
710                    .with_expected(&[
711                        Expected::Description("letters"),
712                        Expected::Description("numbers"),
713                        Expected::Literal("-"),
714                        Expected::Literal("_"),
715                    ])
716                    .with_unexpected(Span::new_unchecked(i, i)),
717            );
718        }
719    }
720
721    if !output.push_str(s) {
722        error.report_error(
723            ParseError::new(ALLOCATION_ERROR).with_unexpected(Span::new_unchecked(0, raw.len())),
724        );
725    }
726}
727
728/// `unquoted-key = 1*( ALPHA / DIGIT / %x2D / %x5F ) ; A-Z / a-z / 0-9 / - / _`
729const UNQUOTED_CHAR: (
730    RangeInclusive<u8>,
731    RangeInclusive<u8>,
732    RangeInclusive<u8>,
733    u8,
734    u8,
735) = (b'A'..=b'Z', b'a'..=b'z', b'0'..=b'9', b'-', b'_');
736
737#[cfg(test)]
738#[cfg(feature = "std")]
739mod test {
740    use super::*;
741    use crate::decoder::Encoding;
742
743    use alloc::borrow::Cow;
744
745    use snapbox::assert_data_eq;
746    use snapbox::prelude::*;
747    use snapbox::str;
748
749    #[test]
750    fn literal_string() {
751        let cases = [
752            (
753                r"'C:\Users\nodejs\templates'",
754                str![[r#"C:\Users\nodejs\templates"#]].raw(),
755                str![[r#"
756[]
757
758"#]]
759                .raw(),
760            ),
761            (
762                r"'\\ServerX\admin$\system32\'",
763                str![[r#"\\ServerX\admin$\system32\"#]].raw(),
764                str![[r#"
765[]
766
767"#]]
768                .raw(),
769            ),
770            (
771                r#"'Tom "Dubs" Preston-Werner'"#,
772                str![[r#"Tom "Dubs" Preston-Werner"#]].raw(),
773                str![[r#"
774[]
775
776"#]]
777                .raw(),
778            ),
779            (
780                r"'<\i\c*\s*>'",
781                str![[r#"<\i\c*\s*>"#]].raw(),
782                str![[r#"
783[]
784
785"#]]
786                .raw(),
787            ),
788        ];
789        for (input, expected, expected_error) in cases {
790            let mut error = Vec::new();
791            let mut actual = Cow::Borrowed("");
792            decode_literal_string(
793                Raw::new_unchecked(input, Some(Encoding::LiteralString), Default::default()),
794                &mut actual,
795                &mut error,
796            );
797            assert_data_eq!(actual.as_ref(), expected);
798            assert_data_eq!(error.to_debug(), expected_error);
799        }
800    }
801
802    #[test]
803    fn ml_literal_string() {
804        let cases = [
805            (
806                r"'''I [dw]on't need \d{2} apples'''",
807                str![[r#"I [dw]on't need \d{2} apples"#]].raw(),
808                str![[r#"
809[]
810
811"#]]
812                .raw(),
813            ),
814            (
815                r#"''''one_quote''''"#,
816                str!["'one_quote'"].raw(),
817                str![[r#"
818[]
819
820"#]]
821                .raw(),
822            ),
823            (
824                r#"'''
825The first newline is
826trimmed in raw strings.
827   All other whitespace
828   is preserved.
829'''"#,
830                str![[r#"
831The first newline is
832trimmed in raw strings.
833   All other whitespace
834   is preserved.
835
836"#]]
837                .raw(),
838                str![[r#"
839[]
840
841"#]]
842                .raw(),
843            ),
844        ];
845        for (input, expected, expected_error) in cases {
846            let mut error = Vec::new();
847            let mut actual = Cow::Borrowed("");
848            decode_ml_literal_string(
849                Raw::new_unchecked(input, Some(Encoding::MlLiteralString), Default::default()),
850                &mut actual,
851                &mut error,
852            );
853            assert_data_eq!(actual.as_ref(), expected);
854            assert_data_eq!(error.to_debug(), expected_error);
855        }
856    }
857
858    #[test]
859    fn basic_string() {
860        let cases = [
861            (
862                r#""""#,
863                str![""].raw(),
864                str![[r#"
865[]
866
867"#]]
868                .raw(),
869            ),
870            (
871                r#""content\"trailing""#,
872                str![[r#"content"trailing"#]].raw(),
873                str![[r#"
874[]
875
876"#]]
877                .raw(),
878            ),
879            (
880                r#""content\""#,
881                str![[r#"content\"#]].raw(),
882                str![[r#"
883[
884    ParseError {
885        context: Some(
886            0..10,
887        ),
888        description: "missing escaped value",
889        expected: Some(
890            [
891                Literal(
892                    "b",
893                ),
894                Literal(
895                    "f",
896                ),
897                Literal(
898                    "n",
899                ),
900                Literal(
901                    "r",
902                ),
903                Literal(
904                    "\\",
905                ),
906                Literal(
907                    "\"",
908                ),
909                Literal(
910                    "u",
911                ),
912                Literal(
913                    "U",
914                ),
915            ],
916        ),
917        unexpected: Some(
918            9..9,
919        ),
920    },
921]
922
923"#]]
924                .raw(),
925            ),
926            (
927                r#""content
928trailing""#,
929                str![[r#"
930content
931trailing
932"#]]
933                .raw(),
934                str![[r#"
935[
936    ParseError {
937        context: Some(
938            0..18,
939        ),
940        description: "invalid basic string",
941        expected: Some(
942            [
943                Description(
944                    "non-double-quote visible characters",
945                ),
946                Literal(
947                    "\\",
948                ),
949            ],
950        ),
951        unexpected: Some(
952            8..9,
953        ),
954    },
955]
956
957"#]]
958                .raw(),
959            ),
960            (
961                r#""I'm a string. \"You can quote me\". Name\tJos\u00E9\nLocation\tSF. \U0002070E""#,
962                str![[r#"
963I'm a string. "You can quote me". Name	José
964Location	SF. 𠜎
965"#]]
966                .raw(),
967                str![[r#"
968[]
969
970"#]]
971                .raw(),
972            ),
973        ];
974        for (input, expected, expected_error) in cases {
975            let mut error = Vec::new();
976            let mut actual = Cow::Borrowed("");
977            decode_basic_string(
978                Raw::new_unchecked(input, Some(Encoding::BasicString), Default::default()),
979                &mut actual,
980                &mut error,
981            );
982            assert_data_eq!(actual.as_ref(), expected);
983            assert_data_eq!(error.to_debug(), expected_error);
984        }
985    }
986
987    #[test]
988    fn ml_basic_string() {
989        let cases = [
990            (
991                r#""""
992Roses are red
993Violets are blue""""#,
994                str![[r#"
995Roses are red
996Violets are blue
997"#]]
998                .raw(),
999                str![[r#"
1000[]
1001
1002"#]]
1003                .raw(),
1004            ),
1005            (
1006                r#"""" \""" """"#,
1007                str![[r#" """ "#]].raw(),
1008                str![[r#"
1009[]
1010
1011"#]]
1012                .raw(),
1013            ),
1014            (
1015                r#"""" \\""""#,
1016                str![[r#" \"#]].raw(),
1017                str![[r#"
1018[]
1019
1020"#]]
1021                .raw(),
1022            ),
1023            (
1024                r#""""
1025The quick brown \
1026
1027
1028  fox jumps over \
1029    the lazy dog.""""#,
1030                str!["The quick brown fox jumps over the lazy dog."].raw(),
1031                str![[r#"
1032[]
1033
1034"#]]
1035                .raw(),
1036            ),
1037            (
1038                r#""""\
1039       The quick brown \
1040       fox jumps over \
1041       the lazy dog.\
1042       """"#,
1043                str!["The quick brown fox jumps over the lazy dog."].raw(),
1044                str![[r#"
1045[]
1046
1047"#]]
1048                .raw(),
1049            ),
1050            (
1051                r#""""\
1052       """"#,
1053                str![""].raw(),
1054                str![[r#"
1055[]
1056
1057"#]]
1058                .raw(),
1059            ),
1060            (
1061                r#""""
1062\
1063  \
1064""""#,
1065                str![""].raw(),
1066                str![[r#"
1067[]
1068
1069"#]]
1070                .raw(),
1071            ),
1072            (
1073                r#""""  """#,
1074                str![[r#"  """#]].raw(),
1075                str![[r#"
1076[
1077    ParseError {
1078        context: Some(
1079            0..7,
1080        ),
1081        description: "invalid multi-line basic string",
1082        expected: Some(
1083            [
1084                Literal(
1085                    "\"",
1086                ),
1087            ],
1088        ),
1089        unexpected: Some(
1090            7..7,
1091        ),
1092    },
1093]
1094
1095"#]]
1096                .raw(),
1097            ),
1098            (
1099                r#""""  \""""#,
1100                str![[r#"  \"#]].raw(),
1101                str![[r#"
1102[
1103    ParseError {
1104        context: Some(
1105            0..9,
1106        ),
1107        description: "missing escaped value",
1108        expected: Some(
1109            [
1110                Literal(
1111                    "b",
1112                ),
1113                Literal(
1114                    "f",
1115                ),
1116                Literal(
1117                    "n",
1118                ),
1119                Literal(
1120                    "r",
1121                ),
1122                Literal(
1123                    "\\",
1124                ),
1125                Literal(
1126                    "\"",
1127                ),
1128                Literal(
1129                    "u",
1130                ),
1131                Literal(
1132                    "U",
1133                ),
1134            ],
1135        ),
1136        unexpected: Some(
1137            6..6,
1138        ),
1139    },
1140]
1141
1142"#]]
1143                .raw(),
1144            ),
1145        ];
1146        for (input, expected, expected_error) in cases {
1147            let mut error = Vec::new();
1148            let mut actual = Cow::Borrowed("");
1149            decode_ml_basic_string(
1150                Raw::new_unchecked(input, Some(Encoding::MlBasicString), Default::default()),
1151                &mut actual,
1152                &mut error,
1153            );
1154            assert_data_eq!(actual.as_ref(), expected);
1155            assert_data_eq!(error.to_debug(), expected_error);
1156        }
1157    }
1158
1159    #[test]
1160    fn unquoted_keys() {
1161        let cases = [
1162            (
1163                "a",
1164                str!["a"].raw(),
1165                str![[r#"
1166[]
1167
1168"#]]
1169                .raw(),
1170            ),
1171            (
1172                "hello",
1173                str!["hello"].raw(),
1174                str![[r#"
1175[]
1176
1177"#]]
1178                .raw(),
1179            ),
1180            (
1181                "-",
1182                str!["-"].raw(),
1183                str![[r#"
1184[]
1185
1186"#]]
1187                .raw(),
1188            ),
1189            (
1190                "_",
1191                str!["_"].raw(),
1192                str![[r#"
1193[]
1194
1195"#]]
1196                .raw(),
1197            ),
1198            (
1199                "-hello-world-",
1200                str!["-hello-world-"].raw(),
1201                str![[r#"
1202[]
1203
1204"#]]
1205                .raw(),
1206            ),
1207            (
1208                "_hello_world_",
1209                str!["_hello_world_"].raw(),
1210                str![[r#"
1211[]
1212
1213"#]]
1214                .raw(),
1215            ),
1216            (
1217                "",
1218                str![""].raw(),
1219                str![[r#"
1220[
1221    ParseError {
1222        context: Some(
1223            0..0,
1224        ),
1225        description: "unquoted keys cannot be empty",
1226        expected: Some(
1227            [
1228                Description(
1229                    "letters",
1230                ),
1231                Description(
1232                    "numbers",
1233                ),
1234                Literal(
1235                    "-",
1236                ),
1237                Literal(
1238                    "_",
1239                ),
1240            ],
1241        ),
1242        unexpected: Some(
1243            0..0,
1244        ),
1245    },
1246]
1247
1248"#]]
1249                .raw(),
1250            ),
1251        ];
1252
1253        for (input, expected, expected_error) in cases {
1254            let mut error = Vec::new();
1255            let mut actual = Cow::Borrowed("");
1256            decode_unquoted_key(
1257                Raw::new_unchecked(input, None, Default::default()),
1258                &mut actual,
1259                &mut error,
1260            );
1261            assert_data_eq!(actual.as_ref(), expected);
1262            assert_data_eq!(error.to_debug(), expected_error);
1263        }
1264    }
1265}