dada_parser/
tokenizer.rs

1use dada_util::Map;
2
3use dada_ir_ast::{
4    ast::{Identifier, LiteralKind},
5    diagnostic::{Diagnostic, Level},
6    span::{Anchor, Offset, Span},
7};
8
9/// Interned text content for tokens, allowing processed string literals
10/// to be stored efficiently while keeping tokens Copy.
11#[salsa::interned(debug)]
12pub struct TokenText<'db> {
13    #[return_ref]
14    pub text: String,
15}
16
17#[derive(Clone)]
18pub struct Token<'input, 'db> {
19    pub span: Span<'db>,
20    pub skipped: Option<Skipped>,
21    pub kind: TokenKind<'input, 'db>,
22}
23
24impl std::fmt::Debug for Token<'_, '_> {
25    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
26        f.debug_struct("Token")
27            .field("span", &"...")
28            .field("skipped", &self.skipped)
29            .field("kind", &self.kind)
30            .finish()
31    }
32}
33
34/// Records tokens that were skipped before this token was issued.
35#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
36pub enum Skipped {
37    /// Misc non-newline whitespace was skipped
38    Whitespace,
39
40    /// Whitespace including at least one `\n` was skipped
41    Newline,
42
43    /// A comment was skipped (which implies a newline)
44    Comment,
45}
46
47#[derive(Clone, Debug)]
48pub enum TokenKind<'input, 'db> {
49    /// A program identifier
50    Identifier(Identifier<'db>),
51
52    /// A keyword
53    Keyword(Keyword),
54
55    /// A delimeted tree like `{}` or `[]` and the text that was in it.
56    Delimited {
57        delimiter: Delimiter,
58        text: &'input str,
59    },
60
61    /// An op-char like `+`, `-`, etc.
62    OpChar(char),
63
64    /// An integer like `22`
65    Literal(LiteralKind, TokenText<'db>),
66
67    /// Invalid characters
68    Error(Diagnostic),
69}
70
71macro_rules! keywords {
72    (pub enum $Keyword:ident {
73        $($kw:ident = $kwstr:expr,)*
74    }) => {
75        #[derive(Copy, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, Debug)]
76        pub enum $Keyword {
77            $($kw,)*
78        }
79
80        impl std::fmt::Display for $Keyword {
81            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
82                let s = match self {
83                    $(Self::$kw => $kwstr,)*
84                };
85                write!(f, "`{}`", s)
86            }
87        }
88
89        impl $Keyword {
90            const STRINGS: &'static [(&'static str, $Keyword)] = &[
91                $(($kwstr, $Keyword::$kw),)*
92            ];
93        }
94    }
95}
96
97keywords! {
98    pub enum Keyword {
99        As = "as",
100        Async = "async",
101        Await = "await",
102        Box = "box",
103        Boxed = "boxed",
104        Class = "class",
105        Crate = "crate",
106        Dyn = "dyn",
107        Else = "else",
108        Enum = "enum",
109        Export = "export",
110        False = "false",
111        Fn = "fn",
112        If = "if",
113        Is = "is",
114        Lent = "lent",
115        Let = "let",
116        Give = "give",
117        Given = "given",
118        Match = "match",
119        Matches = "matches",
120        Mod = "mod",
121        Mut = "mut",
122        My = "my",
123        Our = "our",
124        Owned = "owned",
125        Perm = "perm",
126        Place = "place",
127        Pub = "pub",
128        Ref = "ref",
129        Return = "return",
130        Self_ = "self",
131        Share = "share",
132        Shared = "shared",
133        Struct = "struct",
134        Tracked = "tracked",
135        True = "true",
136        Type = "type",
137        Unique = "unique",
138        Unsafe = "unsafe",
139        Use = "use",
140        Where = "where",
141    }
142}
143
144impl Keyword {
145    fn map() -> &'static Map<String, Keyword> {
146        static MAP: std::sync::OnceLock<Map<String, Keyword>> = std::sync::OnceLock::new();
147        MAP.get_or_init(|| {
148            let mut map = Map::default();
149            for (upper_str, kw) in Keyword::STRINGS {
150                map.insert(upper_str.to_string(), *kw);
151            }
152            map
153        })
154    }
155}
156
157pub mod operator {
158    /// A recognized operator, can be derefd to the characters
159    #[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
160    pub struct Op(&'static [char]);
161
162    impl std::ops::Deref for Op {
163        type Target = [char];
164
165        fn deref(&self) -> &Self::Target {
166            self.0
167        }
168    }
169
170    impl std::fmt::Display for Op {
171        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
172            for char in self.0 {
173                write!(f, "{char}")?;
174            }
175            Ok(())
176        }
177    }
178
179    pub const PLUS: Op = Op(&['+']);
180    pub const MINUS: Op = Op(&['-']);
181    pub const STAR: Op = Op(&['*']);
182    pub const SLASH: Op = Op(&['/']);
183    #[expect(dead_code)]
184    pub const AND: Op = Op(&['&']);
185    pub const ANDAND: Op = Op(&['&', '&']);
186    #[expect(dead_code)]
187    pub const PIPE: Op = Op(&['|']);
188    pub const PIPEPIPE: Op = Op(&['|', '|']);
189    pub const LESSTHAN: Op = Op(&['<']);
190    pub const LESSTHANEQ: Op = Op(&['<', '=']);
191    pub const GREATERTHAN: Op = Op(&['>']);
192    pub const GREATERTHANEQ: Op = Op(&['>', '=']);
193    pub const EQ: Op = Op(&['=']);
194    pub const EQEQ: Op = Op(&['=', '=']);
195    pub const ARROW: Op = Op(&['-', '>']);
196    pub const DOT: Op = Op(&['.']);
197    pub const COLON: Op = Op(&[':']);
198    pub const BANG: Op = Op(&['!']);
199    pub const COMMA: Op = Op(&[',']);
200}
201
202#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
203pub enum Delimiter {
204    Parentheses,
205    SquareBrackets,
206    CurlyBraces,
207}
208
209impl Delimiter {
210    pub fn open_char(self) -> char {
211        match self {
212            Self::Parentheses => '(',
213            Self::SquareBrackets => '[',
214            Self::CurlyBraces => '{',
215        }
216    }
217
218    pub fn close_char(self) -> char {
219        match self {
220            Self::Parentheses => ')',
221            Self::SquareBrackets => ']',
222            Self::CurlyBraces => '}',
223        }
224    }
225
226    pub fn chars(self) -> &'static str {
227        match self {
228            Delimiter::Parentheses => "()",
229            Delimiter::SquareBrackets => "[]",
230            Delimiter::CurlyBraces => "{}",
231        }
232    }
233}
234
235pub fn tokenize<'input, 'db>(
236    db: &'db dyn crate::Db,
237    anchor: Anchor<'db>,
238    input_offset: Offset,
239    input: &'input str,
240) -> Vec<Token<'input, 'db>> {
241    Tokenizer {
242        db,
243        anchor,
244        input,
245        chars: input.char_indices().peekable(),
246        tokens: vec![],
247        kws: Keyword::map(),
248        error_start: None,
249        input_offset,
250        skipped_accum: None,
251    }
252    .tokenize()
253}
254
255struct Tokenizer<'input, 'db> {
256    db: &'db dyn crate::Db,
257    anchor: Anchor<'db>,
258    input: &'input str,
259    chars: CharIndices<'input>,
260    tokens: Vec<Token<'input, 'db>>,
261    kws: &'static Map<String, Keyword>,
262    input_offset: Offset,
263    error_start: Option<usize>,
264    skipped_accum: Option<Skipped>,
265}
266
267impl<'input, 'db> Tokenizer<'input, 'db> {
268    fn tokenize(mut self) -> Vec<Token<'input, 'db>> {
269        while let Some((index, ch)) = self.chars.next() {
270            match ch {
271                // Comments
272                '#' => self.comment(index),
273
274                // Identifiers and keywords
275                _ if ch.is_alphabetic() || ch == '_' => self.identifier(index, ch),
276
277                // Delimited
278                '{' => self.delimited(index, Delimiter::CurlyBraces, '}'),
279                '[' => self.delimited(index, Delimiter::SquareBrackets, ']'),
280                '(' => self.delimited(index, Delimiter::Parentheses, ')'),
281
282                // Integers
283                _ if ch.is_ascii_digit() => self.integer(index, ch),
284
285                // Strings
286                '"' => self.string_literal(index),
287
288                // Newline
289                '\n' => {
290                    self.accumulate_skipped(Skipped::Newline);
291                }
292
293                // Other whitespace
294                _ if ch.is_whitespace() => {
295                    self.accumulate_skipped(Skipped::Whitespace);
296                }
297
298                // Ops
299                _ if is_op_char(ch) => self.ops(index, ch),
300
301                _ => {
302                    // Record start of an errorneous set of tokens.
303                    // When we reach the start of a valid token (or end of input)
304                    // this will be reported as an error in `clear_accumulated`.
305                    if self.error_start.is_none() {
306                        self.error_start = Some(index);
307                    }
308                }
309            }
310        }
311
312        let _skipped = self.clear_accumulated(self.input.len());
313
314        self.tokens
315    }
316
317    fn accumulate_skipped(&mut self, skipped: Skipped) {
318        self.skipped_accum = std::cmp::max(self.skipped_accum, Some(skipped));
319    }
320
321    /// Clears various accumulated state in prep for a new token being issued (or the final token).
322    /// Returns the [`Skipped`][] value that should be used for the next token issued (if any).
323    /// Reports errors for any invalid characters seen thus far.
324    fn clear_accumulated(&mut self, index: usize) -> Option<Skipped> {
325        if let Some(start) = self.error_start {
326            self.error_start = None;
327
328            let span = self.span(start, index);
329            self.tokens.push(Token {
330                span,
331                skipped: None,
332                kind: TokenKind::Error(
333                    Diagnostic::error(self.db, span, "unrecognized characters(s)").label(
334                        self.db,
335                        Level::Error,
336                        span,
337                        "I don't know how to interpret these characters",
338                    ),
339                ),
340            });
341        }
342
343        self.skipped_accum.take()
344    }
345
346    fn span(&self, start: usize, end: usize) -> Span<'db> {
347        assert!(end >= start);
348        Span {
349            anchor: self.anchor,
350            start: self.input_offset + start,
351            end: self.input_offset + end,
352        }
353    }
354
355    fn comment(&mut self, index: usize) {
356        let _skipped = self.clear_accumulated(index);
357        self.accumulate_skipped(Skipped::Comment);
358
359        for (_index, ch) in &mut self.chars {
360            if ch == '\n' {
361                return;
362            }
363        }
364    }
365
366    fn identifier(&mut self, start: usize, ch: char) {
367        let skipped = self.clear_accumulated(start);
368
369        let mut end = start + ch.len_utf8();
370
371        while let Some(&(index, ch)) = self.chars.peek() {
372            if ch.is_alphanumeric() || ch == '_' {
373                end = index + ch.len_utf8();
374                self.chars.next();
375            } else {
376                break;
377            }
378        }
379
380        let span = self.span(start, end);
381
382        let text = &self.input[start..end];
383        if let Some(kw) = self.kws.get(text) {
384            self.tokens.push(Token {
385                span,
386                skipped,
387                kind: TokenKind::Keyword(*kw),
388            });
389        } else {
390            let identifier = Identifier::new(self.db, text.to_string());
391            self.tokens.push(Token {
392                span,
393                skipped,
394                kind: TokenKind::Identifier(identifier),
395            })
396        }
397    }
398
399    fn integer(&mut self, start: usize, ch: char) {
400        let skipped = self.clear_accumulated(start);
401
402        let mut end = start + ch.len_utf8();
403
404        while let Some(&(index, ch)) = self.chars.peek() {
405            if ch.is_ascii_digit() || ch == '_' {
406                end = index + ch.len_utf8();
407                self.chars.next();
408            } else {
409                break;
410            }
411        }
412
413        let span = self.span(start, end);
414
415        let text = &self.input[start..end];
416        let token_text = TokenText::new(self.db, text.to_string());
417        self.tokens.push(Token {
418            span,
419            skipped,
420            kind: TokenKind::Literal(LiteralKind::Integer, token_text),
421        });
422    }
423
424    /// Process an escape sequence after consuming `\`.
425    /// `backslash_offset` is the byte index of the `\` character.
426    fn escape_sequence(&mut self, backslash_offset: usize, content: &mut String) {
427        if let Some((index, escape)) = self.chars.next() {
428            match escape {
429                '"' => content.push('"'),
430                '\\' => content.push('\\'),
431                'n' => content.push('\n'),
432                'r' => content.push('\r'),
433                't' => content.push('\t'),
434                '{' => content.push('{'),
435                '}' => content.push('}'),
436                _ => {
437                    content.push('\\');
438                    content.push(escape);
439
440                    let span = self.span(index, index + escape.len_utf8());
441                    self.tokens.push(Token {
442                        span,
443                        skipped: None,
444                        kind: TokenKind::Error(Diagnostic::error(
445                            self.db,
446                            span,
447                            format!("invalid escape `\\{escape}`"),
448                        )),
449                    });
450                }
451            }
452        } else {
453            content.push('\\');
454
455            let span = self.span(backslash_offset, backslash_offset + '\\'.len_utf8());
456            self.tokens.push(Token {
457                span,
458                skipped: None,
459                kind: TokenKind::Error(Diagnostic::error(
460                    self.db,
461                    span,
462                    "`\\` must be followed by an escape character",
463                )),
464            });
465        }
466    }
467
468    /// Emit a string literal token with the given span and processed content.
469    ///
470    /// `quote_len` is the number of quote characters in the delimiter (1 for `"`, 3 for `"""`).
471    /// When the raw source content begins with a newline, multiline dedenting is applied:
472    /// the raw content is dedented and escape sequences are re-processed on the result,
473    /// replacing the `content` that was built during scanning.
474    fn emit_string_literal(
475        &mut self,
476        span: Span<'db>,
477        skipped: Option<Skipped>,
478        content: String,
479        quote_len: usize,
480        raw: bool,
481    ) {
482        // Extract the raw source content between the quote delimiters.
483        let raw_start = (span.start - self.input_offset).as_usize() + quote_len;
484        let raw_end = (span.end - self.input_offset).as_usize() - quote_len;
485        let raw_content = &self.input[raw_start..raw_end];
486
487        // 💡 Multiline detection: if the raw content begins with a newline,
488        // we apply dedenting on the raw source text (before escape processing)
489        // and then re-process escapes. This ensures escape sequences like `\n`
490        // are treated as content, not as line delimiters for dedenting.
491        //
492        // Raw strings (`"\` prefix) skip dedenting — the raw_content starts
493        // with `\<newline>`, so we skip the `\` marker and process escape
494        // sequences on the rest without dedenting.
495        let final_content = if raw {
496            // Skip the `\` marker; content from `\n` onward is preserved as-is
497            let after_marker = &raw_content[1..]; // skip `\`
498            process_escape_sequences(after_marker)
499        } else if raw_content.starts_with('\n') {
500            let dedented = dedent_multiline(raw_content);
501            process_escape_sequences(&dedented)
502        } else {
503            content
504        };
505
506        let token_text = TokenText::new(self.db, final_content);
507        self.tokens.push(Token {
508            span,
509            skipped,
510            kind: TokenKind::Literal(LiteralKind::String, token_text),
511        });
512    }
513
514    /// Emit tokens for an unterminated string literal: a literal token
515    /// with whatever content was accumulated, plus an error token.
516    /// No multiline dedenting is applied since the string is malformed.
517    fn emit_unterminated_string(
518        &mut self,
519        start: usize,
520        skipped: Option<Skipped>,
521        content: String,
522        message: &str,
523    ) {
524        let span = self.span(start, self.input.len());
525        let token_text = TokenText::new(self.db, content);
526        self.tokens.push(Token {
527            span,
528            skipped,
529            kind: TokenKind::Literal(LiteralKind::String, token_text),
530        });
531        self.tokens.push(Token {
532            span,
533            skipped: None,
534            kind: TokenKind::Error(Diagnostic::error(self.db, span, message)),
535        });
536    }
537
538    fn string_literal(&mut self, start: usize) {
539        let skipped = self.clear_accumulated(start);
540
541        // Check for triple-quoted string: opening `"` already consumed,
542        // peek to see if next two chars are also `"`.
543        if let Some(&(_, '"')) = self.chars.peek() {
544            // Could be empty string `""` or triple-quoted `"""...`.
545            // Consume the second `"` and peek again to disambiguate.
546            self.chars.next();
547            if let Some(&(_, '"')) = self.chars.peek() {
548                // Triple-quoted string: consume the third `"`
549                self.chars.next();
550                return self.triple_quoted_string_literal(start, skipped);
551            }
552
553            // Empty string `""`
554            self.emit_string_literal(
555                self.span(start, start + 2),
556                skipped,
557                String::new(),
558                1,
559                false,
560            );
561            return;
562        }
563
564        // Check for raw string prefix: `\` followed by newline disables dedenting.
565        // The `\` is consumed as a marker (not an escape sequence).
566        let raw = if let Some(&(_, '\\')) = self.chars.peek() {
567            // Peek two ahead: we need `\` + `\n`
568            let mut lookahead = self.chars.clone();
569            lookahead.next(); // skip `\`
570            matches!(lookahead.next(), Some((_, '\n')))
571        } else {
572            false
573        };
574
575        if raw {
576            // Consume the `\` marker (not treated as an escape)
577            self.chars.next();
578        }
579
580        let mut processed_content = String::new();
581
582        while let Some((end, ch)) = self.chars.next() {
583            if ch == '"' {
584                self.emit_string_literal(
585                    self.span(start, end + ch.len_utf8()),
586                    skipped,
587                    processed_content,
588                    1,
589                    raw,
590                );
591                return;
592            }
593
594            if ch == '\\' {
595                self.escape_sequence(end, &mut processed_content);
596            } else {
597                processed_content.push(ch);
598            }
599        }
600
601        self.emit_unterminated_string(
602            start,
603            skipped,
604            processed_content,
605            "missing end quote for string",
606        );
607    }
608
609    /// Lex a triple-quoted string literal. Called after the opening `"""`
610    /// has been consumed. Scans until the closing `"""` is found.
611    fn triple_quoted_string_literal(&mut self, start: usize, skipped: Option<Skipped>) {
612        let mut processed_content = String::new();
613
614        while let Some((end, ch)) = self.chars.next() {
615            if ch == '"' {
616                // Check if this is `"""` (closing delimiter).
617                // Consume quotes one at a time; if we don't reach three,
618                // they're content.
619                processed_content.push('"');
620                if let Some(&(_, '"')) = self.chars.peek() {
621                    self.chars.next();
622                    processed_content.push('"');
623                    if let Some(&(third_idx, '"')) = self.chars.peek() {
624                        // Found closing `"""` — consume the third quote
625                        // and remove the two content quotes we speculatively added.
626                        self.chars.next();
627                        processed_content.pop();
628                        processed_content.pop();
629                        self.emit_string_literal(
630                            self.span(start, third_idx + '"'.len_utf8()),
631                            skipped,
632                            processed_content,
633                            3,
634                            false,
635                        );
636                        return;
637                    }
638                }
639            } else if ch == '\\' {
640                self.escape_sequence(end, &mut processed_content);
641            } else {
642                processed_content.push(ch);
643            }
644        }
645
646        self.emit_unterminated_string(
647            start,
648            skipped,
649            processed_content,
650            "missing end quotes for triple-quoted string",
651        );
652    }
653
654    fn delimited(&mut self, start: usize, delim: Delimiter, close: char) {
655        let skipped = self.clear_accumulated(start);
656        let mut close_stack = vec![close];
657
658        while let Some((end, ch)) = self.chars.next() {
659            match ch {
660                '{' => close_stack.push('}'),
661                '[' => close_stack.push(']'),
662                '(' => close_stack.push(')'),
663                '}' | ']' | ')' => {
664                    if ch == *close_stack.last().unwrap() {
665                        close_stack.pop();
666                        if close_stack.is_empty() {
667                            assert!(ch.len_utf8() == 1);
668                            self.tokens.push(Token {
669                                span: self.span(start, end + 1),
670                                skipped,
671                                kind: TokenKind::Delimited {
672                                    delimiter: delim,
673                                    text: &self.input[start + 1..end],
674                                },
675                            });
676                            return;
677                        }
678                    } else {
679                        break;
680                    }
681                }
682                _ => {}
683            }
684        }
685
686        // Hmm, ideally we'd push a Delimited token here
687        // with what we've seen so far, but the `narrow`
688        // function on `AbsoluteSpan` assumes there is an
689        // end-delimiter in the span, and I don't want to mess that up.
690
691        let end = self.input.len();
692        let span = self.span(start, end);
693        self.tokens.push(Token {
694            span,
695            skipped: None,
696            kind: TokenKind::Error(Diagnostic::error(
697                self.db,
698                span,
699                format!("missing `{close}`"),
700            )),
701        });
702    }
703
704    fn ops(&mut self, start: usize, ch: char) {
705        let skipped = self.clear_accumulated(start);
706        self.tokens.push(Token {
707            span: self.span(start, start + ch.len_utf8()),
708            skipped,
709            kind: TokenKind::OpChar(ch),
710        });
711    }
712}
713
714pub fn is_op_char(ch: char) -> bool {
715    matches!(
716        ch,
717        '+' | '-'
718            | '*'
719            | '/'
720            | '%'
721            | '='
722            | '!'
723            | '<'
724            | '>'
725            | '&'
726            | '|'
727            | ':'
728            | ','
729            | '.'
730            | ';'
731            | '?'
732    )
733}
734
735type CharIndices<'input> = std::iter::Peekable<std::str::CharIndices<'input>>;
736
737/// Apply multiline string dedenting to raw source content.
738///
739/// Given raw content that starts with a newline (multiline string detected by caller),
740/// this function:
741/// 1. Strips the leading newline
742/// 2. Strips the trailing line (newline + any whitespace before closing quote)
743/// 3. Computes the common whitespace prefix across all non-empty lines
744/// 4. Removes that prefix from the start of each line
745///
746/// # Example
747/// ```text
748/// Input (raw):  "\n        hello\n          world\n        "
749/// After strip:  "        hello\n          world"
750/// Common prefix: "        " (8 spaces)
751/// Result:       "hello\n  world"
752/// ```
753fn dedent_multiline(raw: &str) -> String {
754    // Step 1: strip leading newline.
755    let content = &raw[1..];
756
757    // Step 2: strip the trailing line (everything after the last newline, inclusive).
758    let content = match content.rfind('\n') {
759        Some(pos) => &content[..pos],
760        None => {
761            // No newlines left — single-line content after stripping.
762            // No dedenting needed.
763            return content.to_string();
764        }
765    };
766
767    // Step 3: compute common whitespace prefix across non-empty lines.
768    let lines: Vec<&str> = content.split('\n').collect();
769    let common_prefix = lines
770        .iter()
771        .filter(|line| !line.is_empty())
772        .map(|line| line.len() - line.trim_start().len())
773        .min()
774        .unwrap_or(0);
775
776    // Step 4: remove the common prefix from each line and rejoin.
777    lines
778        .iter()
779        .map(|line| {
780            if line.len() >= common_prefix {
781                &line[common_prefix..]
782            } else {
783                // Empty or shorter than prefix (shouldn't happen for non-empty lines)
784                line
785            }
786        })
787        .collect::<Vec<_>>()
788        .join("\n")
789}
790
791/// Process escape sequences in raw source text, producing the final string content.
792///
793/// This mirrors the escape processing in `Tokenizer::escape_sequence()` but operates
794/// on a standalone string. Invalid escapes are kept as-is (errors were already emitted
795/// during the scanning phase).
796fn process_escape_sequences(raw: &str) -> String {
797    let mut result = String::with_capacity(raw.len());
798    let mut chars = raw.chars();
799
800    while let Some(ch) = chars.next() {
801        if ch == '\\' {
802            match chars.next() {
803                Some('"') => result.push('"'),
804                Some('\\') => result.push('\\'),
805                Some('n') => result.push('\n'),
806                Some('r') => result.push('\r'),
807                Some('t') => result.push('\t'),
808                Some('{') => result.push('{'),
809                Some('}') => result.push('}'),
810                Some(escape) => {
811                    // Invalid escape — keep as-is (error already emitted during scan)
812                    result.push('\\');
813                    result.push(escape);
814                }
815                None => {
816                    // Trailing backslash — keep as-is
817                    result.push('\\');
818                }
819            }
820        } else {
821            result.push(ch);
822        }
823    }
824
825    result
826}
dada_parser/tokenizer.rs

dada_parser/
tokenizer.rs