furiosa_mapping/parser/
lexer.rs

1use proc_macro2::{Literal, TokenStream, TokenTree};
2use std::collections::VecDeque;
3use std::fmt;
4
5/// Mode for the lexer, determining how expressions are tokenized.
6#[derive(Debug, Clone, Copy)]
7pub enum LexerMode {
8    /// Parse mapping expressions (for `m!` macro)
9    Mapping,
10    /// Parse index expressions with expression capture (for `i!` macro)
11    Index,
12}
13
14#[derive(Debug, Clone)]
15pub enum Token {
16    // --- Literals ---
17    Symbol(String),
18    Nat(usize),
19    Expr(proc_macro2::TokenStream),
20    Escaped(proc_macro2::TokenStream),
21
22    // --- Operators & Punctuation ---
23    Slash,   // /
24    Percent, // %
25    Eq,      // =
26    Hash,    // #
27    Comma,   // ,
28    Colon,   // :
29
30    // --- Delimiters ---
31    LParen,   // (
32    RParen,   // )
33    LBracket, // [
34    RBracket, // ]
35}
36
37#[derive(Debug, Clone)]
38pub enum LexicalError {
39    InvalidToken(String),
40    UnrecognizedToken(String),
41}
42
43impl fmt::Display for LexicalError {
44    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
45        match self {
46            LexicalError::InvalidToken(s) => write!(f, "Invalid token: {}", s),
47            LexicalError::UnrecognizedToken(s) => write!(f, "Unrecognized token: {}", s),
48        }
49    }
50}
51
52/// Lexer for tokenizing input TokenStream.
53#[derive(Debug)]
54pub struct Lexer {
55    /// Iterator over TokenTree.
56    iter: proc_macro2::token_stream::IntoIter,
57    /// Pending TokenTrees to be processed.
58    pending: VecDeque<TokenTree>,
59    /// Lexer mode (Mapping or Index)
60    mode: LexerMode,
61    /// Whether a Colon was seen in Index mode
62    after_colon: bool,
63}
64
65impl Lexer {
66    /// Creates a new Lexer from the given TokenStream with the specified mode.
67    pub fn new(input: TokenStream, mode: LexerMode) -> Self {
68        Lexer {
69            iter: input.into_iter(),
70            pending: VecDeque::new(),
71            mode,
72            after_colon: false,
73        }
74    }
75
76    fn next_tree(&mut self) -> Option<TokenTree> {
77        self.pending.pop_front().or_else(|| self.iter.next())
78    }
79
80    fn capture_expr(&mut self, first: TokenTree) -> proc_macro2::TokenStream {
81        let mut tokens = vec![first];
82
83        loop {
84            match self.next_tree() {
85                None => break,
86                Some(tree) => {
87                    if let TokenTree::Punct(ref p) = tree
88                        && p.as_char() == ','
89                    {
90                        // Push comma back for the next token
91                        self.pending.push_front(tree);
92                        break;
93                    }
94                    tokens.push(tree);
95                }
96            }
97        }
98
99        tokens.into_iter().collect()
100    }
101}
102
103impl Iterator for Lexer {
104    type Item = Result<(usize, Token, usize), LexicalError>;
105
106    fn next(&mut self) -> Option<Self::Item> {
107        let tree = self.next_tree()?;
108        let (span_start, span_end) = (0, 0);
109
110        // In Index mode, after we see Colon, capture the expression
111        if matches!(self.mode, LexerMode::Index) && self.after_colon {
112            self.after_colon = false;
113            let expr = self.capture_expr(tree);
114            return Some(Ok((span_start, Token::Expr(expr), span_end)));
115        }
116
117        let token = match tree {
118            TokenTree::Ident(ident) => Token::Symbol(ident.to_string()),
119
120            TokenTree::Literal(lit) => {
121                let s = lit.to_string();
122                if let Ok(n) = s.parse::<usize>() {
123                    Token::Nat(n)
124                } else if s == "\")\"" {
125                    Token::RParen
126                } else if s == "\"]\"" {
127                    Token::RBracket
128                } else {
129                    return Some(Err(LexicalError::InvalidToken(s)));
130                }
131            }
132
133            TokenTree::Punct(punct) => {
134                let ch = punct.as_char();
135
136                match ch {
137                    '/' => Token::Slash,
138                    '%' => Token::Percent,
139                    '#' => Token::Hash,
140                    ',' => Token::Comma,
141                    ':' => {
142                        if matches!(self.mode, LexerMode::Index) {
143                            self.after_colon = true;
144                        }
145                        Token::Colon
146                    }
147                    '=' => Token::Eq,
148                    _ => return Some(Err(LexicalError::UnrecognizedToken(ch.to_string()))),
149                }
150            }
151
152            TokenTree::Group(group) => {
153                let (open_token, close_lit_string) = match group.delimiter() {
154                    proc_macro2::Delimiter::Parenthesis => (Token::LParen, ")"),
155                    proc_macro2::Delimiter::Bracket => (Token::LBracket, "]"),
156                    proc_macro2::Delimiter::Brace => {
157                        let inner_stream: proc_macro2::TokenStream = group.stream();
158                        return Some(Ok((span_start, Token::Escaped(inner_stream), span_end)));
159                    }
160                    proc_macro2::Delimiter::None => {
161                        let inner_stream: Vec<TokenTree> = group.stream().into_iter().collect();
162                        for token in inner_stream.into_iter().rev() {
163                            self.pending.push_front(token);
164                        }
165                        return self.next();
166                    }
167                };
168
169                let mut fake_close = TokenTree::Literal(Literal::string(close_lit_string));
170                fake_close.set_span(group.span_close());
171                self.pending.push_front(fake_close);
172
173                let inner_stream: Vec<TokenTree> = group.stream().into_iter().collect();
174                for token in inner_stream.into_iter().rev() {
175                    self.pending.push_front(token);
176                }
177
178                open_token
179            }
180        };
181
182        Some(Ok((span_start, token, span_end)))
183    }
184}