From 7add446d14052c9a4cd9b6d53e108b7acb2cdd07 Mon Sep 17 00:00:00 2001 From: Jesse Braham Date: Sun, 26 Jan 2025 11:29:04 +0100 Subject: [PATCH] Remove currently unhandled tokens, improvements to lexer module --- onihime/src/lexer/error.rs | 78 +++++++++++---- onihime/src/lexer/mod.rs | 197 ++++++++++++++++--------------------- onihime/src/lexer/token.rs | 37 +++---- 3 files changed, 164 insertions(+), 148 deletions(-) diff --git a/onihime/src/lexer/error.rs b/onihime/src/lexer/error.rs index ef6e3c7..8fa2f67 100644 --- a/onihime/src/lexer/error.rs +++ b/onihime/src/lexer/error.rs @@ -1,38 +1,74 @@ -/// Errors during lexical analysis. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum LexerError { +use std::fmt; + +/// Kinds of errors which can occur during lexical analysis. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum LexerErrorKind { /// An invalid character literal was encountered. InvalidChar, - /// An invalid keyword literal was encountered. + /// An invalid keyword was encountered. InvalidKeyword, /// An invalid number literal was encountered. InvalidNumber, - /// An invalid token was encountered. - InvalidToken, - /// An unclosed string was encountered. + /// An invalid symbol was encountered. + InvalidSymbol, + /// An unclosed string literal was encountered. UnclosedString, - /// Invalid UTF-8 sequence was encountered. - Utf8Error(std::str::Utf8Error), } -impl From for LexerError { - fn from(err: std::str::Utf8Error) -> Self { - Self::Utf8Error(err) +#[cfg(not(tarpaulin_include))] +impl fmt::Display for LexerErrorKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + use LexerErrorKind::*; + + match self { + InvalidChar => write!(f, "Invalid character literal"), + InvalidKeyword => write!(f, "Invalid keyword"), + InvalidNumber => write!(f, "Invalid number literal"), + InvalidSymbol => write!(f, "Invalid symbol"), + UnclosedString => write!(f, "Unclosed string literal"), + } + } +} + +/// Errors which occur during lexical analysis. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct LexerError { + /// The kind of lexer error. + pub kind: LexerErrorKind, + /// Additional context regarding the lexer error. + pub context: Option, +} + +impl LexerError { + /// Construct a new instance of a lexer error. + #[must_use] + pub const fn new(kind: LexerErrorKind) -> Self { + Self { + kind, + context: None, + } + } + + /// Provide additional context for a lexer error. + #[must_use] + pub fn with_context(mut self, f: impl FnOnce() -> C) -> Self + where + C: fmt::Display, + { + self.context = Some(f().to_string()); + self } } impl std::error::Error for LexerError {} #[cfg(not(tarpaulin_include))] -impl std::fmt::Display for LexerError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - LexerError::InvalidChar => write!(f, "An invalid character literal was encountered"), - LexerError::InvalidKeyword => write!(f, "An invalid keyword literal was encountered"), - LexerError::InvalidNumber => write!(f, "An invalid number literal was encountered"), - LexerError::InvalidToken => write!(f, "An invalid token was encountered"), - LexerError::UnclosedString => write!(f, "An unclosed string was encountered"), - LexerError::Utf8Error(err) => write!(f, "{err}"), +impl fmt::Display for LexerError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + if let Some(ref context) = self.context { + write!(f, "{}: {}", self.kind, context) + } else { + write!(f, "{}", self.kind) } } } diff --git a/onihime/src/lexer/mod.rs b/onihime/src/lexer/mod.rs index 78c589f..03984e1 100644 --- a/onihime/src/lexer/mod.rs +++ b/onihime/src/lexer/mod.rs @@ -9,14 +9,14 @@ use std::{ iter::Peekable, - ops::Range, str::{self, Chars}, }; pub use self::{ - error::LexerError, + error::{LexerError, LexerErrorKind}, token::{Token, TokenKind}, }; +use crate::Span; mod error; mod token; @@ -31,7 +31,7 @@ pub struct Lexer<'a> { } impl<'a> Lexer<'a> { - /// Construct a new lexer instance. + /// Construct a new instance of a lexer. #[must_use] pub fn new(source: &'a str) -> Self { Self { @@ -52,8 +52,8 @@ impl<'a> Lexer<'a> { /// Return the span of the current [Token]. #[inline] #[must_use] - pub fn span(&self) -> Range { - self.token_start..self.cursor + pub fn span(&self) -> Span { + Span::new(self.token_start, self.cursor) } /// Return the next [Token] in the input stream. @@ -62,7 +62,7 @@ impl<'a> Lexer<'a> { self.token_start = self.cursor; let Some(c) = self.advance() else { - return Ok(None); // EOF reached + return Ok(None); // EOF reached, no input left to tokenize }; let kind = match c { @@ -78,36 +78,17 @@ impl<'a> Lexer<'a> { '{' => TokenKind::OpenBrace, '}' => TokenKind::CloseBrace, - // Dispatch: - '#' => match self.advance() { - Some('{') => TokenKind::OpenHashBrace, - Some('_') => TokenKind::Discard, - _ => { - self.read_word(); // Recover - return Err(LexerError::InvalidToken); - } - }, - - // Macros: - '\'' => TokenKind::Quote, - '`' => TokenKind::BackQuote, - ',' if self.peek().is_some_and(|c| *c == '@') => { - self.advance(); // '@' - TokenKind::CommaAt - } - ',' => TokenKind::Comma, - // Literals: '\\' => self.read_char()?, ':' => self.read_keyword()?, '0'..='9' => self.read_number(c)?, '+' | '-' if self.peek().is_some_and(|c| c.is_ascii_digit()) => self.read_number(c)?, '"' => self.read_string()?, - _ if is_symbol_prefix(c) => { + _ if is_symbol_prefix(&c) => { self.read_word(); - match str::from_utf8(self.slice())? { - "true" | "false" => TokenKind::Bool, - "nil" => TokenKind::Nil, + match self.slice() { + b"true" | b"false" => TokenKind::Bool, + b"nil" => TokenKind::Nil, _ => TokenKind::Symbol, } } @@ -115,11 +96,11 @@ impl<'a> Lexer<'a> { // Invalid tokens: _ => { self.read_word(); // Recover - return Err(LexerError::InvalidToken); + return Err(LexerError::new(LexerErrorKind::InvalidSymbol)); } }; - Ok(Some(Token::new(kind))) + Ok(Some(Token::new(kind, self.span()))) } #[inline] @@ -140,12 +121,12 @@ impl<'a> Lexer<'a> { } fn read_word(&mut self) { - self.take_while(|c| !is_separator(*c)); + self.take_while(|c| !is_separator(c)); } fn read_comment(&mut self) -> TokenKind { self.take_while(|c| *c != '\n'); - TokenKind::LineComment + TokenKind::Comment } fn read_whitespace(&mut self) -> TokenKind { @@ -155,16 +136,19 @@ impl<'a> Lexer<'a> { fn read_char(&mut self) -> Result { // NOTE: We have already consumed the initial '\' when this function is invoked - let c = if self.peek().is_some_and(|c| !is_separator(*c)) { + + let c = if self.peek().is_some_and(|c| !is_separator(c)) { self.advance().unwrap() // SAFETY: This will never panic } else { - return Err(LexerError::InvalidChar); + return Err(LexerError::new(LexerErrorKind::InvalidChar)); }; match c { - 'u' if self.peek().is_some_and(|c| !is_separator(*c)) => self.complete_unicode_escape(), - 'x' if self.peek().is_some_and(|c| !is_separator(*c)) => self.complete_ascii_escape(), - _ if self.peek().is_some_and(|c| !is_separator(*c)) => Err(LexerError::InvalidChar), + 'u' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_unicode_escape(), + 'x' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_ascii_escape(), + _ if self.peek().is_some_and(|c| !is_separator(c)) => { + Err(LexerError::new(LexerErrorKind::InvalidChar)) + } _ => Ok(TokenKind::Char), } } @@ -177,7 +161,7 @@ impl<'a> Lexer<'a> { self.advance(); } else { self.read_word(); // Recover - return Err(LexerError::InvalidChar); + return Err(LexerError::new(LexerErrorKind::InvalidChar)); } // Expect a single hexadecimal digit: @@ -185,14 +169,14 @@ impl<'a> Lexer<'a> { self.advance(); } else { self.read_word(); // Recover - return Err(LexerError::InvalidChar); + return Err(LexerError::new(LexerErrorKind::InvalidChar)); } // We should be at the end of the literal now, i.e. next char should be a // separator: - if self.peek().is_some_and(|c| !is_separator(*c)) { + if self.peek().is_some_and(|c| !is_separator(c)) { self.read_word(); // Recover - return Err(LexerError::InvalidChar); + return Err(LexerError::new(LexerErrorKind::InvalidChar)); } Ok(TokenKind::Char) @@ -203,21 +187,21 @@ impl<'a> Lexer<'a> { // Expect between 1 and 6 hexadecimal digits: let mut count = 0; - while self.peek().is_some_and(|c| !is_separator(*c)) && count < 6 { + while self.peek().is_some_and(|c| !is_separator(c)) && count < 6 { match self.advance() { Some(c) if c.is_ascii_hexdigit() => count += 1, _ => { self.read_word(); // Recover - return Err(LexerError::InvalidChar); + return Err(LexerError::new(LexerErrorKind::InvalidChar)); } }; } // If no hexadecimal digits were found, or digits were found but we are still // not at the end of the literal, then the literal is invalid: - if count == 0 || self.peek().is_some_and(|c| !is_separator(*c)) { + if count == 0 || self.peek().is_some_and(|c| !is_separator(c)) { self.read_word(); // Recover - return Err(LexerError::InvalidChar); + return Err(LexerError::new(LexerErrorKind::InvalidChar)); } Ok(TokenKind::Char) @@ -225,12 +209,13 @@ impl<'a> Lexer<'a> { fn read_keyword(&mut self) -> Result { // NOTE: We have already consumed the initial ':' when this function is invoked - if self.peek().is_some_and(|c| is_symbol_prefix(*c)) { + + if self.peek().is_some_and(|c| !is_separator(c)) { self.read_word(); Ok(TokenKind::Keyword) } else { self.read_word(); // Recover - Err(LexerError::InvalidKeyword) + Err(LexerError::new(LexerErrorKind::InvalidKeyword)) } } @@ -244,14 +229,14 @@ impl<'a> Lexer<'a> { } } - while self.peek().is_some_and(|c| !is_separator(*c)) { + while self.peek().is_some_and(|c| !is_separator(c)) { match self.advance() { Some(c) if c.is_ascii_digit() => {} Some('.') => return self.complete_decimal(), Some('/') => return self.complete_ratio(), _ => { self.read_word(); // Recover - return Err(LexerError::InvalidNumber); + return Err(LexerError::new(LexerErrorKind::InvalidNumber)); } } } @@ -261,26 +246,27 @@ impl<'a> Lexer<'a> { fn read_number_radix(&mut self, radix: u32) -> Result { // NOTE: We have already consumed the initial '0' when this function is invoked + self.advance(); // Base prefix (i.e. 'b'/'B', 'o'/'O', 'x'/'X') let mut digit_found = false; while let Some(c) = self.peek() { match c { - _ if is_separator(*c) => break, + _ if is_separator(c) => break, _ if c.is_digit(radix) => { - digit_found = true; self.advance(); + digit_found = true; } _ => { self.read_word(); // Recover - return Err(LexerError::InvalidNumber); + return Err(LexerError::new(LexerErrorKind::InvalidNumber)); } }; } if !digit_found { self.read_word(); // Recover - return Err(LexerError::InvalidNumber); + return Err(LexerError::new(LexerErrorKind::InvalidNumber)); } Ok(TokenKind::Integer) @@ -289,18 +275,19 @@ impl<'a> Lexer<'a> { fn complete_decimal(&mut self) -> Result { // NOTE: We have already consumed the leading digits and '.' when this function // is invoked + let mut digit_found = false; let mut exp_found = false; let mut sign_found = false; - while self.peek().is_some_and(|c| !is_separator(*c)) { + while self.peek().is_some_and(|c| !is_separator(c)) { match self.advance() { Some(c) if c.is_ascii_digit() => digit_found = true, Some('e') | Some('E') if digit_found && !exp_found => exp_found = true, Some('+') | Some('-') if exp_found && !sign_found => sign_found = true, Some(_) => { self.read_word(); // Recover - return Err(LexerError::InvalidNumber); + return Err(LexerError::new(LexerErrorKind::InvalidNumber)); } None => unreachable!(), }; @@ -312,16 +299,17 @@ impl<'a> Lexer<'a> { fn complete_ratio(&mut self) -> Result { // NOTE: We have already consumed the leading digits and '/' when this function // is invoked + let mut sign_found = false; let mut digit_found = false; - while self.peek().is_some_and(|c| !is_separator(*c)) { + while self.peek().is_some_and(|c| !is_separator(c)) { match self.advance() { Some(c) if c.is_ascii_digit() => digit_found = true, Some('+') | Some('-') if !digit_found && !sign_found => sign_found = true, Some(_) => { self.read_word(); // Recover - return Err(LexerError::InvalidNumber); + return Err(LexerError::new(LexerErrorKind::InvalidNumber)); } None => unreachable!(), }; @@ -329,7 +317,7 @@ impl<'a> Lexer<'a> { if !digit_found { self.read_word(); // Recover - return Err(LexerError::InvalidNumber); + return Err(LexerError::new(LexerErrorKind::InvalidNumber)); } Ok(TokenKind::Ratio) @@ -337,6 +325,7 @@ impl<'a> Lexer<'a> { fn read_string(&mut self) -> Result { // NOTE: We have already consumed the initial '"' when this function is invoked + loop { match self.advance() { Some('"') => break, @@ -344,7 +333,7 @@ impl<'a> Lexer<'a> { self.advance(); // '"' } Some(_) => {} - None => return Err(LexerError::UnclosedString), + None => return Err(LexerError::new(LexerErrorKind::UnclosedString)), } } @@ -361,12 +350,12 @@ impl Iterator for Lexer<'_> { } #[inline] -fn is_separator(c: char) -> bool { +fn is_separator(c: &char) -> bool { c.is_ascii_whitespace() | matches!(c, '(' | ')' | '[' | ']' | '{' | '}' | ';') } #[inline] -fn is_symbol_prefix(c: char) -> bool { +fn is_symbol_prefix(c: &char) -> bool { c.is_alphabetic() | matches!( c, @@ -382,7 +371,7 @@ mod tests { fn empty() { let mut lexer = Lexer::new(""); assert_eq!(lexer.next(), None); - assert_eq!(lexer.span(), 0..0); + assert_eq!(lexer.span(), Span::default()); assert_eq!(lexer.slice(), &[]); } @@ -397,7 +386,7 @@ mod tests { Err(e) => Err(e), }); assert_eq!(kind, Some(token)); - assert_eq!(span, lexer.span()); + assert_eq!(span, lexer.span().into()); assert_eq!(slice.as_bytes(), lexer.slice()); } assert_eq!(lexer.next(), None); @@ -406,11 +395,11 @@ mod tests { } test!(line_comment: ";; foobar\nnil ; bar; baz" => [ - (Ok(TokenKind::LineComment), 0..9, ";; foobar"), + (Ok(TokenKind::Comment), 0..9, ";; foobar"), (Ok(TokenKind::Whitespace), 9..10, "\n"), (Ok(TokenKind::Nil), 10..13, "nil"), (Ok(TokenKind::Whitespace), 13..14, " "), - (Ok(TokenKind::LineComment), 14..24, "; bar; baz"), + (Ok(TokenKind::Comment), 14..24, "; bar; baz"), ]); test!(list: "(0 1.2 -3/4 +5.6e-7)" => [ @@ -449,33 +438,21 @@ mod tests { (Ok(TokenKind::CloseBracket), 11..12, "]"), ]); - test!(dispatch: "#{} #_() #_ 4" => [ - (Ok(TokenKind::OpenHashBrace), 0..2, "#{"), - (Ok(TokenKind::CloseBrace), 2..3, "}"), - (Ok(TokenKind::Whitespace), 3..4, " "), - (Ok(TokenKind::Discard), 4..6, "#_"), - (Ok(TokenKind::OpenParen), 6..7, "("), - (Ok(TokenKind::CloseParen), 7..8, ")"), - (Ok(TokenKind::Whitespace), 8..9, " "), - (Ok(TokenKind::Discard), 9..11, "#_"), - (Ok(TokenKind::Whitespace), 11..12, " "), - (Ok(TokenKind::Integer), 12..13, "4"), - ]); - - test!(err_invalid_dispatch: "#@" => [ - (Err(LexerError::InvalidToken), 0..2, "#@"), - ]); - - test!(keyword: ":m :x0 :this-is-an-keyword-too!" => [ + test!(keyword: ":m :0 :this-is-an-keyword-too! :đŸ˜»" => [ (Ok(TokenKind::Keyword), 0..2, ":m"), (Ok(TokenKind::Whitespace), 2..3, " "), - (Ok(TokenKind::Keyword), 3..6, ":x0"), - (Ok(TokenKind::Whitespace), 6..7, " "), - (Ok(TokenKind::Keyword), 7..31, ":this-is-an-keyword-too!"), + (Ok(TokenKind::Keyword), 3..5, ":0"), + (Ok(TokenKind::Whitespace), 5..6, " "), + (Ok(TokenKind::Keyword), 6..30, ":this-is-an-keyword-too!"), + (Ok(TokenKind::Whitespace), 30..31, " "), + (Ok(TokenKind::Keyword), 31..36, ":đŸ˜»"), ]); - test!(err_invalid_keyword: ":0" => [ - (Err(LexerError::InvalidKeyword), 0..2, ":0"), + test!(err_invalid_keyword: ": :;" => [ + (Err(LexerError::new(LexerErrorKind::InvalidKeyword)), 0..1, ":"), + (Ok(TokenKind::Whitespace), 1..2, " "), + (Err(LexerError::new(LexerErrorKind::InvalidKeyword)), 2..3, ":"), + (Ok(TokenKind::Comment), 3..4, ";"), ]); test!(char: r"\a \? \7 \λ \\ \u \x" => [ @@ -495,47 +472,47 @@ mod tests { ]); test!(err_invalid_char: r"\ \xF \x0 \x111 \uG \u2222222" => [ - (Err(LexerError::InvalidChar), 0..1, r"\"), + (Err(LexerError::new(LexerErrorKind::InvalidChar)), 0..1, r"\"), (Ok(TokenKind::Whitespace), 1..2, " "), - (Err(LexerError::InvalidChar), 2..5, r"\xF"), + (Err(LexerError::new(LexerErrorKind::InvalidChar)), 2..5, r"\xF"), (Ok(TokenKind::Whitespace), 5..6, " "), - (Err(LexerError::InvalidChar), 6..9, r"\x0"), + (Err(LexerError::new(LexerErrorKind::InvalidChar)), 6..9, r"\x0"), (Ok(TokenKind::Whitespace), 9..10, " "), - (Err(LexerError::InvalidChar), 10..15, r"\x111"), + (Err(LexerError::new(LexerErrorKind::InvalidChar)), 10..15, r"\x111"), (Ok(TokenKind::Whitespace), 15..16, " "), - (Err(LexerError::InvalidChar), 16..19, r"\uG"), + (Err(LexerError::new(LexerErrorKind::InvalidChar)), 16..19, r"\uG"), (Ok(TokenKind::Whitespace), 19..20, " "), - (Err(LexerError::InvalidChar), 20..29, r"\u2222222"), + (Err(LexerError::new(LexerErrorKind::InvalidChar)), 20..29, r"\u2222222"), ]); test!(err_invalid_integer: "0b012 0o8 0xFG 1N 0x" => [ - (Err(LexerError::InvalidNumber), 0..5, "0b012"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..5, "0b012"), (Ok(TokenKind::Whitespace), 5..6, " "), - (Err(LexerError::InvalidNumber), 6..9, "0o8"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 6..9, "0o8"), (Ok(TokenKind::Whitespace), 9..10, " "), - (Err(LexerError::InvalidNumber), 10..14, "0xFG"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 10..14, "0xFG"), (Ok(TokenKind::Whitespace), 14..15, " "), - (Err(LexerError::InvalidNumber), 15..17, "1N"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 15..17, "1N"), (Ok(TokenKind::Whitespace), 17..18, " "), - (Err(LexerError::InvalidNumber), 18..20, "0x"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 18..20, "0x"), ]); test!(err_invalid_decimal: "1.2.3 4.e6 7.8+ 9.0+e1" => [ - (Err(LexerError::InvalidNumber), 0..5, "1.2.3"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..5, "1.2.3"), (Ok(TokenKind::Whitespace), 5..6, " "), - (Err(LexerError::InvalidNumber), 6..10, "4.e6"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 6..10, "4.e6"), (Ok(TokenKind::Whitespace), 10..11, " "), - (Err(LexerError::InvalidNumber), 11..15, "7.8+"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 11..15, "7.8+"), (Ok(TokenKind::Whitespace), 15..16, " "), - (Err(LexerError::InvalidNumber), 16..22, "9.0+e1"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 16..22, "9.0+e1"), ]); test!(err_invalid_ratio: "1/ -2/3+ 4/-" => [ - (Err(LexerError::InvalidNumber), 0..2, "1/"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..2, "1/"), (Ok(TokenKind::Whitespace), 2..3, " "), - (Err(LexerError::InvalidNumber), 3..8, "-2/3+"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 3..8, "-2/3+"), (Ok(TokenKind::Whitespace), 8..9, " "), - (Err(LexerError::InvalidNumber), 9..12, "4/-"), + (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 9..12, "4/-"), ]); test!(string: "\"föö bar1\nbaz\" \"\" \"懄い 😍\"" => [ @@ -547,7 +524,7 @@ mod tests { ]); test!(err_unclosed_string: "\"oops" => [ - (Err(LexerError::UnclosedString), 0..5, "\"oops"), + (Err(LexerError::new(LexerErrorKind::UnclosedString)), 0..5, "\"oops"), ]); test!(symbol: "+ rev fold0 nil? x str-cat 猫" => [ @@ -572,9 +549,9 @@ mod tests { #[test] fn $name(x in $input) { let mut lexer = Lexer::new(&x); - assert_eq!(lexer.next(), Some(Ok(Token { kind: TokenKind::$kind }))); + assert_eq!(lexer.next(), Some(Ok(Token::new(TokenKind::$kind, lexer.span())))); assert_eq!(lexer.slice(), x.as_bytes()); - assert_eq!(lexer.span(), 0..x.len()); + assert_eq!(lexer.span(), Span::new(0, x.len())); } } }; diff --git a/onihime/src/lexer/token.rs b/onihime/src/lexer/token.rs index cb43a34..52f64d5 100644 --- a/onihime/src/lexer/token.rs +++ b/onihime/src/lexer/token.rs @@ -1,12 +1,12 @@ +use crate::Span; + /// Kinds of tokens which are valid in Onihime source code. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum TokenKind { + /// Line comment, e.g. `; ...` + Comment, /// Whitespace, e.g. ' ', '\t', '\n' Whitespace, - /// Line comment, e.g. `; ...` - LineComment, - /// Discard, e.g. `#_ 4`, `#_( ... )` - Discard, /// Opening parenthesis, e.g. `(` OpenParen, @@ -20,8 +20,6 @@ pub enum TokenKind { OpenBracket, /// Closing bracket, e.g. `]` CloseBracket, - /// Opening hash-brace, e.g. `#{` - OpenHashBrace, /// Boolean, e.g. `true`, `false` Bool, @@ -41,15 +39,18 @@ pub enum TokenKind { Symbol, /// Nil, e.g. `nil` Nil, +} - /// Comma, e.g. `,` - Comma, - /// Comma followed by at sign, e.g. `,@` - CommaAt, - /// Backtick quote, e.g. `` ` `` - BackQuote, - /// Single quote, e.g. `'` - Quote, +impl TokenKind { + /// Returns `true` if the token type an atom. + pub fn is_atom(&self) -> bool { + use TokenKind::*; + + matches!( + self, + Bool | Char | Keyword | Decimal | Integer | Ratio | String | Symbol | Nil + ) + } } /// A valid token found in Onihime source code. @@ -57,12 +58,14 @@ pub enum TokenKind { pub struct Token { /// Kind of token which was found. pub kind: TokenKind, + /// The token's span. + pub span: Span, } impl Token { - /// Construct a new instance of `Token`. + /// Construct a new instance of a token. #[must_use] - pub const fn new(kind: TokenKind) -> Self { - Self { kind } + pub const fn new(kind: TokenKind, span: Span) -> Self { + Self { kind, span } } }