From 7add446d14052c9a4cd9b6d53e108b7acb2cdd07 Mon Sep 17 00:00:00 2001
From: Jesse Braham <jesse@hatebit.org>
Date: Sun, 26 Jan 2025 11:29:04 +0100
Subject: [PATCH] Remove currently unhandled tokens, improvements to lexer
 module

---
 onihime/src/lexer/error.rs |  78 +++++++++++----
 onihime/src/lexer/mod.rs   | 197 ++++++++++++++++---------------------
 onihime/src/lexer/token.rs |  37 +++----
 3 files changed, 164 insertions(+), 148 deletions(-)
diff --git a/onihime/src/lexer/error.rs b/onihime/src/lexer/error.rs
index ef6e3c7..8fa2f67 100644
--- a/onihime/src/lexer/error.rs
+++ b/onihime/src/lexer/error.rs
@@ -1,38 +1,74 @@
-/// Errors during lexical analysis.
-#[derive(Debug, Clone, Copy, PartialEq, Eq)]
-pub enum LexerError {
+use std::fmt;
+
+/// Kinds of errors which can occur during lexical analysis.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum LexerErrorKind {
     /// An invalid character literal was encountered.
     InvalidChar,
-    /// An invalid keyword literal was encountered.
+    /// An invalid keyword was encountered.
     InvalidKeyword,
     /// An invalid number literal was encountered.
     InvalidNumber,
-    /// An invalid token was encountered.
-    InvalidToken,
-    /// An unclosed string was encountered.
+    /// An invalid symbol was encountered.
+    InvalidSymbol,
+    /// An unclosed string literal was encountered.
     UnclosedString,
-    /// Invalid UTF-8 sequence was encountered.
-    Utf8Error(std::str::Utf8Error),
 }
 
-impl From<std::str::Utf8Error> for LexerError {
-    fn from(err: std::str::Utf8Error) -> Self {
-        Self::Utf8Error(err)
+#[cfg(not(tarpaulin_include))]
+impl fmt::Display for LexerErrorKind {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        use LexerErrorKind::*;
+
+        match self {
+            InvalidChar => write!(f, "Invalid character literal"),
+            InvalidKeyword => write!(f, "Invalid keyword"),
+            InvalidNumber => write!(f, "Invalid number literal"),
+            InvalidSymbol => write!(f, "Invalid symbol"),
+            UnclosedString => write!(f, "Unclosed string literal"),
+        }
+    }
+}
+
+/// Errors which occur during lexical analysis.
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct LexerError {
+    /// The kind of lexer error.
+    pub kind: LexerErrorKind,
+    /// Additional context regarding the lexer error.
+    pub context: Option<String>,
+}
+
+impl LexerError {
+    /// Construct a new instance of a lexer error.
+    #[must_use]
+    pub const fn new(kind: LexerErrorKind) -> Self {
+        Self {
+            kind,
+            context: None,
+        }
+    }
+
+    /// Provide additional context for a lexer error.
+    #[must_use]
+    pub fn with_context<C>(mut self, f: impl FnOnce() -> C) -> Self
+    where
+        C: fmt::Display,
+    {
+        self.context = Some(f().to_string());
+        self
     }
 }
 
 impl std::error::Error for LexerError {}
 
 #[cfg(not(tarpaulin_include))]
-impl std::fmt::Display for LexerError {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        match self {
-            LexerError::InvalidChar => write!(f, "An invalid character literal was encountered"),
-            LexerError::InvalidKeyword => write!(f, "An invalid keyword literal was encountered"),
-            LexerError::InvalidNumber => write!(f, "An invalid number literal was encountered"),
-            LexerError::InvalidToken => write!(f, "An invalid token was encountered"),
-            LexerError::UnclosedString => write!(f, "An unclosed string was encountered"),
-            LexerError::Utf8Error(err) => write!(f, "{err}"),
+impl fmt::Display for LexerError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        if let Some(ref context) = self.context {
+            write!(f, "{}: {}", self.kind, context)
+        } else {
+            write!(f, "{}", self.kind)
         }
     }
 }
diff --git a/onihime/src/lexer/mod.rs b/onihime/src/lexer/mod.rs
index 78c589f..03984e1 100644
--- a/onihime/src/lexer/mod.rs
+++ b/onihime/src/lexer/mod.rs
@@ -9,14 +9,14 @@
 
 use std::{
     iter::Peekable,
-    ops::Range,
     str::{self, Chars},
 };
 
 pub use self::{
-    error::LexerError,
+    error::{LexerError, LexerErrorKind},
     token::{Token, TokenKind},
 };
+use crate::Span;
 
 mod error;
 mod token;
@@ -31,7 +31,7 @@ pub struct Lexer<'a> {
 }
 
 impl<'a> Lexer<'a> {
-    /// Construct a new lexer instance.
+    /// Construct a new instance of a lexer.
     #[must_use]
     pub fn new(source: &'a str) -> Self {
         Self {
@@ -52,8 +52,8 @@ impl<'a> Lexer<'a> {
     /// Return the span of the current [Token].
     #[inline]
     #[must_use]
-    pub fn span(&self) -> Range<usize> {
-        self.token_start..self.cursor
+    pub fn span(&self) -> Span {
+        Span::new(self.token_start, self.cursor)
     }
 
     /// Return the next [Token] in the input stream.
@@ -62,7 +62,7 @@ impl<'a> Lexer<'a> {
         self.token_start = self.cursor;
 
         let Some(c) = self.advance() else {
-            return Ok(None); // EOF reached
+            return Ok(None); // EOF reached, no input left to tokenize
         };
 
         let kind = match c {
@@ -78,36 +78,17 @@ impl<'a> Lexer<'a> {
             '{' => TokenKind::OpenBrace,
             '}' => TokenKind::CloseBrace,
 
-            // Dispatch:
-            '#' => match self.advance() {
-                Some('{') => TokenKind::OpenHashBrace,
-                Some('_') => TokenKind::Discard,
-                _ => {
-                    self.read_word(); // Recover
-                    return Err(LexerError::InvalidToken);
-                }
-            },
-
-            // Macros:
-            '\'' => TokenKind::Quote,
-            '`' => TokenKind::BackQuote,
-            ',' if self.peek().is_some_and(|c| *c == '@') => {
-                self.advance(); // '@'
-                TokenKind::CommaAt
-            }
-            ',' => TokenKind::Comma,
-
             // Literals:
             '\\' => self.read_char()?,
             ':' => self.read_keyword()?,
             '0'..='9' => self.read_number(c)?,
             '+' | '-' if self.peek().is_some_and(|c| c.is_ascii_digit()) => self.read_number(c)?,
             '"' => self.read_string()?,
-            _ if is_symbol_prefix(c) => {
+            _ if is_symbol_prefix(&c) => {
                 self.read_word();
-                match str::from_utf8(self.slice())? {
-                    "true" | "false" => TokenKind::Bool,
-                    "nil" => TokenKind::Nil,
+                match self.slice() {
+                    b"true" | b"false" => TokenKind::Bool,
+                    b"nil" => TokenKind::Nil,
                     _ => TokenKind::Symbol,
                 }
             }
@@ -115,11 +96,11 @@ impl<'a> Lexer<'a> {
             // Invalid tokens:
             _ => {
                 self.read_word(); // Recover
-                return Err(LexerError::InvalidToken);
+                return Err(LexerError::new(LexerErrorKind::InvalidSymbol));
             }
         };
 
-        Ok(Some(Token::new(kind)))
+        Ok(Some(Token::new(kind, self.span())))
     }
 
     #[inline]
@@ -140,12 +121,12 @@ impl<'a> Lexer<'a> {
     }
 
     fn read_word(&mut self) {
-        self.take_while(|c| !is_separator(*c));
+        self.take_while(|c| !is_separator(c));
     }
 
     fn read_comment(&mut self) -> TokenKind {
         self.take_while(|c| *c != '\n');
-        TokenKind::LineComment
+        TokenKind::Comment
     }
 
     fn read_whitespace(&mut self) -> TokenKind {
@@ -155,16 +136,19 @@ impl<'a> Lexer<'a> {
 
     fn read_char(&mut self) -> Result<TokenKind, LexerError> {
         // NOTE: We have already consumed the initial '\' when this function is invoked
-        let c = if self.peek().is_some_and(|c| !is_separator(*c)) {
+
+        let c = if self.peek().is_some_and(|c| !is_separator(c)) {
             self.advance().unwrap() // SAFETY: This will never panic
         } else {
-            return Err(LexerError::InvalidChar);
+            return Err(LexerError::new(LexerErrorKind::InvalidChar));
         };
 
         match c {
-            'u' if self.peek().is_some_and(|c| !is_separator(*c)) => self.complete_unicode_escape(),
-            'x' if self.peek().is_some_and(|c| !is_separator(*c)) => self.complete_ascii_escape(),
-            _ if self.peek().is_some_and(|c| !is_separator(*c)) => Err(LexerError::InvalidChar),
+            'u' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_unicode_escape(),
+            'x' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_ascii_escape(),
+            _ if self.peek().is_some_and(|c| !is_separator(c)) => {
+                Err(LexerError::new(LexerErrorKind::InvalidChar))
+            }
             _ => Ok(TokenKind::Char),
         }
     }
@@ -177,7 +161,7 @@ impl<'a> Lexer<'a> {
             self.advance();
         } else {
             self.read_word(); // Recover
-            return Err(LexerError::InvalidChar);
+            return Err(LexerError::new(LexerErrorKind::InvalidChar));
         }
 
         // Expect a single hexadecimal digit:
@@ -185,14 +169,14 @@ impl<'a> Lexer<'a> {
             self.advance();
         } else {
             self.read_word(); // Recover
-            return Err(LexerError::InvalidChar);
+            return Err(LexerError::new(LexerErrorKind::InvalidChar));
         }
 
         // We should be at the end of the literal now, i.e. next char should be a
         // separator:
-        if self.peek().is_some_and(|c| !is_separator(*c)) {
+        if self.peek().is_some_and(|c| !is_separator(c)) {
             self.read_word(); // Recover
-            return Err(LexerError::InvalidChar);
+            return Err(LexerError::new(LexerErrorKind::InvalidChar));
         }
 
         Ok(TokenKind::Char)
@@ -203,21 +187,21 @@ impl<'a> Lexer<'a> {
 
         // Expect between 1 and 6 hexadecimal digits:
         let mut count = 0;
-        while self.peek().is_some_and(|c| !is_separator(*c)) && count < 6 {
+        while self.peek().is_some_and(|c| !is_separator(c)) && count < 6 {
             match self.advance() {
                 Some(c) if c.is_ascii_hexdigit() => count += 1,
                 _ => {
                     self.read_word(); // Recover
-                    return Err(LexerError::InvalidChar);
+                    return Err(LexerError::new(LexerErrorKind::InvalidChar));
                 }
             };
         }
 
         // If no hexadecimal digits were found, or digits were found but we are still
         // not at the end of the literal, then the literal is invalid:
-        if count == 0 || self.peek().is_some_and(|c| !is_separator(*c)) {
+        if count == 0 || self.peek().is_some_and(|c| !is_separator(c)) {
             self.read_word(); // Recover
-            return Err(LexerError::InvalidChar);
+            return Err(LexerError::new(LexerErrorKind::InvalidChar));
         }
 
         Ok(TokenKind::Char)
@@ -225,12 +209,13 @@ impl<'a> Lexer<'a> {
 
     fn read_keyword(&mut self) -> Result<TokenKind, LexerError> {
         // NOTE: We have already consumed the initial ':' when this function is invoked
-        if self.peek().is_some_and(|c| is_symbol_prefix(*c)) {
+
+        if self.peek().is_some_and(|c| !is_separator(c)) {
             self.read_word();
             Ok(TokenKind::Keyword)
         } else {
             self.read_word(); // Recover
-            Err(LexerError::InvalidKeyword)
+            Err(LexerError::new(LexerErrorKind::InvalidKeyword))
         }
     }
 
@@ -244,14 +229,14 @@ impl<'a> Lexer<'a> {
             }
         }
 
-        while self.peek().is_some_and(|c| !is_separator(*c)) {
+        while self.peek().is_some_and(|c| !is_separator(c)) {
             match self.advance() {
                 Some(c) if c.is_ascii_digit() => {}
                 Some('.') => return self.complete_decimal(),
                 Some('/') => return self.complete_ratio(),
                 _ => {
                     self.read_word(); // Recover
-                    return Err(LexerError::InvalidNumber);
+                    return Err(LexerError::new(LexerErrorKind::InvalidNumber));
                 }
             }
         }
@@ -261,26 +246,27 @@ impl<'a> Lexer<'a> {
 
     fn read_number_radix(&mut self, radix: u32) -> Result<TokenKind, LexerError> {
         // NOTE: We have already consumed the initial '0' when this function is invoked
+
         self.advance(); // Base prefix (i.e. 'b'/'B', 'o'/'O', 'x'/'X')
 
         let mut digit_found = false;
         while let Some(c) = self.peek() {
             match c {
-                _ if is_separator(*c) => break,
+                _ if is_separator(c) => break,
                 _ if c.is_digit(radix) => {
-                    digit_found = true;
                     self.advance();
+                    digit_found = true;
                 }
                 _ => {
                     self.read_word(); // Recover
-                    return Err(LexerError::InvalidNumber);
+                    return Err(LexerError::new(LexerErrorKind::InvalidNumber));
                 }
             };
         }
 
         if !digit_found {
             self.read_word(); // Recover
-            return Err(LexerError::InvalidNumber);
+            return Err(LexerError::new(LexerErrorKind::InvalidNumber));
         }
 
         Ok(TokenKind::Integer)
@@ -289,18 +275,19 @@ impl<'a> Lexer<'a> {
     fn complete_decimal(&mut self) -> Result<TokenKind, LexerError> {
         // NOTE: We have already consumed the leading digits and '.' when this function
         //       is invoked
+
         let mut digit_found = false;
         let mut exp_found = false;
         let mut sign_found = false;
 
-        while self.peek().is_some_and(|c| !is_separator(*c)) {
+        while self.peek().is_some_and(|c| !is_separator(c)) {
             match self.advance() {
                 Some(c) if c.is_ascii_digit() => digit_found = true,
                 Some('e') | Some('E') if digit_found && !exp_found => exp_found = true,
                 Some('+') | Some('-') if exp_found && !sign_found => sign_found = true,
                 Some(_) => {
                     self.read_word(); // Recover
-                    return Err(LexerError::InvalidNumber);
+                    return Err(LexerError::new(LexerErrorKind::InvalidNumber));
                 }
                 None => unreachable!(),
             };
@@ -312,16 +299,17 @@ impl<'a> Lexer<'a> {
     fn complete_ratio(&mut self) -> Result<TokenKind, LexerError> {
         // NOTE: We have already consumed the leading digits and '/' when this function
         //       is invoked
+
         let mut sign_found = false;
         let mut digit_found = false;
 
-        while self.peek().is_some_and(|c| !is_separator(*c)) {
+        while self.peek().is_some_and(|c| !is_separator(c)) {
             match self.advance() {
                 Some(c) if c.is_ascii_digit() => digit_found = true,
                 Some('+') | Some('-') if !digit_found && !sign_found => sign_found = true,
                 Some(_) => {
                     self.read_word(); // Recover
-                    return Err(LexerError::InvalidNumber);
+                    return Err(LexerError::new(LexerErrorKind::InvalidNumber));
                 }
                 None => unreachable!(),
             };
@@ -329,7 +317,7 @@ impl<'a> Lexer<'a> {
 
         if !digit_found {
             self.read_word(); // Recover
-            return Err(LexerError::InvalidNumber);
+            return Err(LexerError::new(LexerErrorKind::InvalidNumber));
         }
 
         Ok(TokenKind::Ratio)
@@ -337,6 +325,7 @@ impl<'a> Lexer<'a> {
 
     fn read_string(&mut self) -> Result<TokenKind, LexerError> {
         // NOTE: We have already consumed the initial '"' when this function is invoked
+
         loop {
             match self.advance() {
                 Some('"') => break,
@@ -344,7 +333,7 @@ impl<'a> Lexer<'a> {
                     self.advance(); // '"'
                 }
                 Some(_) => {}
-                None => return Err(LexerError::UnclosedString),
+                None => return Err(LexerError::new(LexerErrorKind::UnclosedString)),
             }
         }
 
@@ -361,12 +350,12 @@ impl Iterator for Lexer<'_> {
 }
 
 #[inline]
-fn is_separator(c: char) -> bool {
+fn is_separator(c: &char) -> bool {
     c.is_ascii_whitespace() | matches!(c, '(' | ')' | '[' | ']' | '{' | '}' | ';')
 }
 
 #[inline]
-fn is_symbol_prefix(c: char) -> bool {
+fn is_symbol_prefix(c: &char) -> bool {
     c.is_alphabetic()
         | matches!(
             c,
@@ -382,7 +371,7 @@ mod tests {
     fn empty() {
         let mut lexer = Lexer::new("");
         assert_eq!(lexer.next(), None);
-        assert_eq!(lexer.span(), 0..0);
+        assert_eq!(lexer.span(), Span::default());
         assert_eq!(lexer.slice(), &[]);
     }
 
@@ -397,7 +386,7 @@ mod tests {
                         Err(e) => Err(e),
                     });
                     assert_eq!(kind, Some(token));
-                    assert_eq!(span, lexer.span());
+                    assert_eq!(span, lexer.span().into());
                     assert_eq!(slice.as_bytes(), lexer.slice());
                 }
                 assert_eq!(lexer.next(), None);
@@ -406,11 +395,11 @@ mod tests {
     }
 
     test!(line_comment: ";; foobar\nnil ; bar; baz" => [
-        (Ok(TokenKind::LineComment), 0..9, ";; foobar"),
+        (Ok(TokenKind::Comment), 0..9, ";; foobar"),
         (Ok(TokenKind::Whitespace), 9..10, "\n"),
         (Ok(TokenKind::Nil), 10..13, "nil"),
         (Ok(TokenKind::Whitespace), 13..14, " "),
-        (Ok(TokenKind::LineComment), 14..24, "; bar; baz"),
+        (Ok(TokenKind::Comment), 14..24, "; bar; baz"),
     ]);
 
     test!(list: "(0 1.2 -3/4 +5.6e-7)" => [
@@ -449,33 +438,21 @@ mod tests {
         (Ok(TokenKind::CloseBracket), 11..12, "]"),
     ]);
 
-    test!(dispatch: "#{} #_() #_ 4" => [
-        (Ok(TokenKind::OpenHashBrace), 0..2, "#{"),
-        (Ok(TokenKind::CloseBrace), 2..3, "}"),
-        (Ok(TokenKind::Whitespace), 3..4, " "),
-        (Ok(TokenKind::Discard), 4..6, "#_"),
-        (Ok(TokenKind::OpenParen), 6..7, "("),
-        (Ok(TokenKind::CloseParen), 7..8, ")"),
-        (Ok(TokenKind::Whitespace), 8..9, " "),
-        (Ok(TokenKind::Discard), 9..11, "#_"),
-        (Ok(TokenKind::Whitespace), 11..12, " "),
-        (Ok(TokenKind::Integer), 12..13, "4"),
-    ]);
-
-    test!(err_invalid_dispatch: "#@" => [
-        (Err(LexerError::InvalidToken), 0..2, "#@"),
-    ]);
-
-    test!(keyword: ":m :x0 :this-is-an-keyword-too!" => [
+    test!(keyword: ":m :0 :this-is-an-keyword-too! :😻" => [
         (Ok(TokenKind::Keyword), 0..2, ":m"),
         (Ok(TokenKind::Whitespace), 2..3, " "),
-        (Ok(TokenKind::Keyword), 3..6, ":x0"),
-        (Ok(TokenKind::Whitespace), 6..7, " "),
-        (Ok(TokenKind::Keyword), 7..31, ":this-is-an-keyword-too!"),
+        (Ok(TokenKind::Keyword), 3..5, ":0"),
+        (Ok(TokenKind::Whitespace), 5..6, " "),
+        (Ok(TokenKind::Keyword), 6..30, ":this-is-an-keyword-too!"),
+        (Ok(TokenKind::Whitespace), 30..31, " "),
+        (Ok(TokenKind::Keyword), 31..36, ":😻"),
     ]);
 
-    test!(err_invalid_keyword: ":0" => [
-        (Err(LexerError::InvalidKeyword), 0..2, ":0"),
+    test!(err_invalid_keyword: ": :;" => [
+        (Err(LexerError::new(LexerErrorKind::InvalidKeyword)), 0..1, ":"),
+        (Ok(TokenKind::Whitespace), 1..2, " "),
+        (Err(LexerError::new(LexerErrorKind::InvalidKeyword)), 2..3, ":"),
+        (Ok(TokenKind::Comment), 3..4, ";"),
     ]);
 
     test!(char: r"\a \? \7 \λ \\ \u \x" => [
@@ -495,47 +472,47 @@ mod tests {
     ]);
 
     test!(err_invalid_char: r"\ \xF \x0 \x111 \uG \u2222222" => [
-        (Err(LexerError::InvalidChar), 0..1, r"\"),
+        (Err(LexerError::new(LexerErrorKind::InvalidChar)), 0..1, r"\"),
         (Ok(TokenKind::Whitespace), 1..2, " "),
-        (Err(LexerError::InvalidChar), 2..5, r"\xF"),
+        (Err(LexerError::new(LexerErrorKind::InvalidChar)), 2..5, r"\xF"),
         (Ok(TokenKind::Whitespace), 5..6, " "),
-        (Err(LexerError::InvalidChar), 6..9, r"\x0"),
+        (Err(LexerError::new(LexerErrorKind::InvalidChar)), 6..9, r"\x0"),
         (Ok(TokenKind::Whitespace), 9..10, " "),
-        (Err(LexerError::InvalidChar), 10..15, r"\x111"),
+        (Err(LexerError::new(LexerErrorKind::InvalidChar)), 10..15, r"\x111"),
         (Ok(TokenKind::Whitespace), 15..16, " "),
-        (Err(LexerError::InvalidChar), 16..19, r"\uG"),
+        (Err(LexerError::new(LexerErrorKind::InvalidChar)), 16..19, r"\uG"),
         (Ok(TokenKind::Whitespace), 19..20, " "),
-        (Err(LexerError::InvalidChar), 20..29, r"\u2222222"),
+        (Err(LexerError::new(LexerErrorKind::InvalidChar)), 20..29, r"\u2222222"),
     ]);
 
     test!(err_invalid_integer: "0b012 0o8 0xFG 1N 0x" => [
-        (Err(LexerError::InvalidNumber), 0..5, "0b012"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..5, "0b012"),
         (Ok(TokenKind::Whitespace), 5..6, " "),
-        (Err(LexerError::InvalidNumber), 6..9, "0o8"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 6..9, "0o8"),
         (Ok(TokenKind::Whitespace), 9..10, " "),
-        (Err(LexerError::InvalidNumber), 10..14, "0xFG"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 10..14, "0xFG"),
         (Ok(TokenKind::Whitespace), 14..15, " "),
-        (Err(LexerError::InvalidNumber), 15..17, "1N"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 15..17, "1N"),
         (Ok(TokenKind::Whitespace), 17..18, " "),
-        (Err(LexerError::InvalidNumber), 18..20, "0x"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 18..20, "0x"),
     ]);
 
     test!(err_invalid_decimal: "1.2.3 4.e6 7.8+ 9.0+e1" => [
-        (Err(LexerError::InvalidNumber), 0..5, "1.2.3"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..5, "1.2.3"),
         (Ok(TokenKind::Whitespace), 5..6, " "),
-        (Err(LexerError::InvalidNumber), 6..10, "4.e6"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 6..10, "4.e6"),
         (Ok(TokenKind::Whitespace), 10..11, " "),
-        (Err(LexerError::InvalidNumber), 11..15, "7.8+"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 11..15, "7.8+"),
         (Ok(TokenKind::Whitespace), 15..16, " "),
-        (Err(LexerError::InvalidNumber), 16..22, "9.0+e1"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 16..22, "9.0+e1"),
     ]);
 
     test!(err_invalid_ratio: "1/ -2/3+ 4/-" => [
-        (Err(LexerError::InvalidNumber), 0..2, "1/"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..2, "1/"),
         (Ok(TokenKind::Whitespace), 2..3, " "),
-        (Err(LexerError::InvalidNumber), 3..8, "-2/3+"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 3..8, "-2/3+"),
         (Ok(TokenKind::Whitespace), 8..9, " "),
-        (Err(LexerError::InvalidNumber), 9..12, "4/-"),
+        (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 9..12, "4/-"),
     ]);
 
     test!(string: "\"föö bar1\nbaz\" \"\" \"凄い 😍\"" => [
@@ -547,7 +524,7 @@ mod tests {
     ]);
 
     test!(err_unclosed_string: "\"oops" => [
-        (Err(LexerError::UnclosedString), 0..5, "\"oops"),
+        (Err(LexerError::new(LexerErrorKind::UnclosedString)), 0..5, "\"oops"),
     ]);
 
     test!(symbol: "+ rev fold0 nil? x str-cat 猫" => [
@@ -572,9 +549,9 @@ mod tests {
                 #[test]
                 fn $name(x in $input) {
                     let mut lexer = Lexer::new(&x);
-                    assert_eq!(lexer.next(), Some(Ok(Token { kind: TokenKind::$kind })));
+                    assert_eq!(lexer.next(), Some(Ok(Token::new(TokenKind::$kind, lexer.span()))));
                     assert_eq!(lexer.slice(), x.as_bytes());
-                    assert_eq!(lexer.span(), 0..x.len());
+                    assert_eq!(lexer.span(), Span::new(0, x.len()));
                 }
             }
         };
diff --git a/onihime/src/lexer/token.rs b/onihime/src/lexer/token.rs
index cb43a34..52f64d5 100644
--- a/onihime/src/lexer/token.rs
+++ b/onihime/src/lexer/token.rs
@@ -1,12 +1,12 @@
+use crate::Span;
+
 /// Kinds of tokens which are valid in Onihime source code.
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum TokenKind {
+    /// Line comment, e.g. `; ...`
+    Comment,
     /// Whitespace, e.g. ' ', '\t', '\n'
     Whitespace,
-    /// Line comment, e.g. `; ...`
-    LineComment,
-    /// Discard, e.g. `#_ 4`, `#_( ... )`
-    Discard,
 
     /// Opening parenthesis, e.g. `(`
     OpenParen,
@@ -20,8 +20,6 @@ pub enum TokenKind {
     OpenBracket,
     /// Closing bracket, e.g. `]`
     CloseBracket,
-    /// Opening hash-brace, e.g. `#{`
-    OpenHashBrace,
 
     /// Boolean, e.g. `true`, `false`
     Bool,
@@ -41,15 +39,18 @@ pub enum TokenKind {
     Symbol,
     /// Nil, e.g. `nil`
     Nil,
+}
 
-    /// Comma, e.g. `,`
-    Comma,
-    /// Comma followed by at sign, e.g. `,@`
-    CommaAt,
-    /// Backtick quote, e.g. `` ` ``
-    BackQuote,
-    /// Single quote, e.g. `'`
-    Quote,
+impl TokenKind {
+    /// Returns `true` if the token type an atom.
+    pub fn is_atom(&self) -> bool {
+        use TokenKind::*;
+
+        matches!(
+            self,
+            Bool | Char | Keyword | Decimal | Integer | Ratio | String | Symbol | Nil
+        )
+    }
 }
 
 /// A valid token found in Onihime source code.
@@ -57,12 +58,14 @@ pub enum TokenKind {
 pub struct Token {
     /// Kind of token which was found.
     pub kind: TokenKind,
+    /// The token's span.
+    pub span: Span,
 }
 
 impl Token {
-    /// Construct a new instance of `Token`.
+    /// Construct a new instance of a token.
     #[must_use]
-    pub const fn new(kind: TokenKind) -> Self {
-        Self { kind }
+    pub const fn new(kind: TokenKind, span: Span) -> Self {
+        Self { kind, span }
     }
 }