Even more lexer improvments, increased test coverage too!
This commit is contained in:
		
							parent
							
								
									0839bd542c
								
							
						
					
					
						commit
						11917bb183
					
				@ -8,3 +8,6 @@ repository.workspace = true
 | 
			
		||||
license.workspace    = true
 | 
			
		||||
 | 
			
		||||
[dependencies]
 | 
			
		||||
 | 
			
		||||
[lints.rust]
 | 
			
		||||
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tarpaulin_include)'] }
 | 
			
		||||
 | 
			
		||||
@ -1,10 +1,8 @@
 | 
			
		||||
use crate::span::Span;
 | 
			
		||||
 | 
			
		||||
/// Errors that can occur during lexical analysis.
 | 
			
		||||
#[derive(Debug, Clone, PartialEq, Eq)]
 | 
			
		||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
 | 
			
		||||
pub enum LexerErrorKind {
 | 
			
		||||
    /// An invalid character was encountered.
 | 
			
		||||
    InvalidChar,
 | 
			
		||||
    /// An invalid escape sequence was encountered.
 | 
			
		||||
    InvalidEscape(char),
 | 
			
		||||
    /// An invalid number was encountered.
 | 
			
		||||
@ -18,7 +16,7 @@ pub enum LexerErrorKind {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Lexer error, with a start and end location.
 | 
			
		||||
#[derive(Debug, Clone, PartialEq)]
 | 
			
		||||
#[derive(Debug, Clone, PartialEq, Hash)]
 | 
			
		||||
pub struct LexerError {
 | 
			
		||||
    /// The type of error encountered.
 | 
			
		||||
    pub kind: LexerErrorKind,
 | 
			
		||||
@ -36,12 +34,12 @@ impl LexerError {
 | 
			
		||||
 | 
			
		||||
impl std::error::Error for LexerError {}
 | 
			
		||||
 | 
			
		||||
#[cfg(not(tarpaulin_include))]
 | 
			
		||||
impl std::fmt::Display for LexerError {
 | 
			
		||||
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
 | 
			
		||||
        use LexerErrorKind::*;
 | 
			
		||||
 | 
			
		||||
        match &self.kind {
 | 
			
		||||
            InvalidChar => write!(f, "Invalid character literal"),
 | 
			
		||||
            InvalidEscape(c) => write!(f, "Unknown escape sequence '\\{c}' in string"),
 | 
			
		||||
            InvalidNumber(n) => write!(f, "`{n}` is not a valid numeric literal"),
 | 
			
		||||
            InvalidString => write!(f, "Invalid string literal"),
 | 
			
		||||
 | 
			
		||||
@ -92,6 +92,14 @@ impl<'lexer> Lexer<'lexer> {
 | 
			
		||||
        Some(c)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Advance the lexer by one character, and then return the specified
 | 
			
		||||
    /// `TokenKind`:
 | 
			
		||||
    fn advance_and(&mut self, kind: TokenKind) -> TokenKind {
 | 
			
		||||
        self.advance();
 | 
			
		||||
 | 
			
		||||
        kind
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    /// Read a word from the input until a separator is reached.
 | 
			
		||||
    fn read_word(&mut self) -> String {
 | 
			
		||||
        let mut word = String::new();
 | 
			
		||||
@ -135,13 +143,12 @@ impl<'lexer> Lexer<'lexer> {
 | 
			
		||||
        let kind = match c {
 | 
			
		||||
            '#' if matches!(self.peek(1), Some('|')) => {
 | 
			
		||||
                self.advance(); // '#'
 | 
			
		||||
                self.advance(); // '|#'
 | 
			
		||||
                self.advance(); // '|'
 | 
			
		||||
 | 
			
		||||
                let mut comment = String::new();
 | 
			
		||||
                while let Some(c) = self.advance() {
 | 
			
		||||
                    match c {
 | 
			
		||||
                        '|' if matches!(self.peek(0), Some('#')) => {
 | 
			
		||||
                            self.advance(); // '|'
 | 
			
		||||
                            self.advance(); // '#'
 | 
			
		||||
                            break;
 | 
			
		||||
                        }
 | 
			
		||||
@ -154,44 +161,29 @@ impl<'lexer> Lexer<'lexer> {
 | 
			
		||||
                TokenKind::BlockComment(comment.trim().into())
 | 
			
		||||
            }
 | 
			
		||||
            ';' => {
 | 
			
		||||
                while self.current().is_some_and(|c| c == ';') {
 | 
			
		||||
                    self.advance();
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                let mut comment = String::new();
 | 
			
		||||
                while let Some(c) = self.advance() {
 | 
			
		||||
                    match c {
 | 
			
		||||
                        ';' => continue,
 | 
			
		||||
                        '\n' => break,
 | 
			
		||||
                        c => {
 | 
			
		||||
                    if c == '\n' {
 | 
			
		||||
                        break;
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
                    comment.push(c);
 | 
			
		||||
                }
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                TokenKind::LineComment(comment.trim().into())
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            '(' => {
 | 
			
		||||
                self.advance();
 | 
			
		||||
                TokenKind::OpenParen
 | 
			
		||||
            }
 | 
			
		||||
            ')' => {
 | 
			
		||||
                self.advance();
 | 
			
		||||
                TokenKind::CloseParen
 | 
			
		||||
            }
 | 
			
		||||
            '{' => {
 | 
			
		||||
                self.advance();
 | 
			
		||||
                TokenKind::OpenBrace
 | 
			
		||||
            }
 | 
			
		||||
            '}' => {
 | 
			
		||||
                self.advance();
 | 
			
		||||
                TokenKind::CloseBrace
 | 
			
		||||
            }
 | 
			
		||||
            '[' => {
 | 
			
		||||
                self.advance();
 | 
			
		||||
                TokenKind::OpenBracket
 | 
			
		||||
            }
 | 
			
		||||
            ']' => {
 | 
			
		||||
                self.advance();
 | 
			
		||||
                TokenKind::CloseBracket
 | 
			
		||||
            }
 | 
			
		||||
            '(' => self.advance_and(TokenKind::OpenParen),
 | 
			
		||||
            ')' => self.advance_and(TokenKind::CloseParen),
 | 
			
		||||
            '{' => self.advance_and(TokenKind::OpenBrace),
 | 
			
		||||
            '}' => self.advance_and(TokenKind::CloseBrace),
 | 
			
		||||
            '[' => self.advance_and(TokenKind::OpenBracket),
 | 
			
		||||
            ']' => self.advance_and(TokenKind::CloseBracket),
 | 
			
		||||
 | 
			
		||||
            '#' if matches!(self.peek(1), Some('{')) => {
 | 
			
		||||
                self.advance(); // '#'
 | 
			
		||||
                self.advance(); // '{'
 | 
			
		||||
@ -207,6 +199,7 @@ impl<'lexer> Lexer<'lexer> {
 | 
			
		||||
                        Some('n') => '\n',
 | 
			
		||||
                        Some('e') => '\x1b',
 | 
			
		||||
                        Some(c) => {
 | 
			
		||||
                            self.read_word(); // Recover from the error
 | 
			
		||||
                            return Err(LexerError::new(
 | 
			
		||||
                                LexerErrorKind::InvalidEscape(c),
 | 
			
		||||
                                span.join(&self.span()),
 | 
			
		||||
@ -229,9 +222,9 @@ impl<'lexer> Lexer<'lexer> {
 | 
			
		||||
                };
 | 
			
		||||
 | 
			
		||||
                if self.advance() != Some('\'') {
 | 
			
		||||
                    self.read_word();
 | 
			
		||||
                    self.read_word(); // Recover from the error
 | 
			
		||||
                    return Err(LexerError::new(
 | 
			
		||||
                        LexerErrorKind::InvalidChar,
 | 
			
		||||
                        LexerErrorKind::UnclosedChar,
 | 
			
		||||
                        span.join(&self.span()),
 | 
			
		||||
                    ));
 | 
			
		||||
                }
 | 
			
		||||
@ -277,7 +270,7 @@ impl<'lexer> Lexer<'lexer> {
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                if self.current().is_some_and(|c| !c.is_separator()) {
 | 
			
		||||
                    self.read_word();
 | 
			
		||||
                    self.read_word(); // Recover from the error
 | 
			
		||||
                    return Err(LexerError::new(
 | 
			
		||||
                        LexerErrorKind::InvalidString,
 | 
			
		||||
                        span.join(&self.span()),
 | 
			
		||||
@ -339,13 +332,17 @@ mod tests {
 | 
			
		||||
        };
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    test!(block_comment: "#| foo\nbar |#", [
 | 
			
		||||
        Ok(TokenKind::BlockComment("foo\nbar".into()))
 | 
			
		||||
    test!(block_comment: "#| foo\nbar |#(- 1)", [
 | 
			
		||||
        Ok(TokenKind::BlockComment("foo\nbar".into())),
 | 
			
		||||
        Ok(TokenKind::OpenParen),
 | 
			
		||||
        Ok(TokenKind::Symbol(Symbol::from("-"))),
 | 
			
		||||
        Ok(TokenKind::Number(1.0)),
 | 
			
		||||
        Ok(TokenKind::CloseParen),
 | 
			
		||||
    ]);
 | 
			
		||||
 | 
			
		||||
    test!(line_comment: "; foo\n;; bar baz qux", [
 | 
			
		||||
    test!(line_comment: "; foo\n;; bar baz; qux", [
 | 
			
		||||
        Ok(TokenKind::LineComment("foo".into())),
 | 
			
		||||
        Ok(TokenKind::LineComment("bar baz qux".into())),
 | 
			
		||||
        Ok(TokenKind::LineComment("bar baz; qux".into())),
 | 
			
		||||
    ]);
 | 
			
		||||
 | 
			
		||||
    test!(hashset: "{{} true false}", [
 | 
			
		||||
@ -369,9 +366,21 @@ mod tests {
 | 
			
		||||
        Ok(TokenKind::CloseParen),
 | 
			
		||||
    ]);
 | 
			
		||||
 | 
			
		||||
    test!(char_literal: "'x' '\n'", [
 | 
			
		||||
    test!(vector: "[0 1 2]", [
 | 
			
		||||
        Ok(TokenKind::OpenBracket),
 | 
			
		||||
        Ok(TokenKind::Number(0.0)),
 | 
			
		||||
        Ok(TokenKind::Number(1.0)),
 | 
			
		||||
        Ok(TokenKind::Number(2.0)),
 | 
			
		||||
        Ok(TokenKind::CloseBracket),
 | 
			
		||||
    ]);
 | 
			
		||||
 | 
			
		||||
    test!(char_literal: r"'x' '\n' '\e' '\\' '\q' 'b", [
 | 
			
		||||
        Ok(TokenKind::Char('x')),
 | 
			
		||||
        Ok(TokenKind::Char('\n')),
 | 
			
		||||
        Ok(TokenKind::Char('\x1b')),
 | 
			
		||||
        Ok(TokenKind::Char('\\')),
 | 
			
		||||
        Err(LexerErrorKind::InvalidEscape('q')),
 | 
			
		||||
        Err(LexerErrorKind::UnclosedChar),
 | 
			
		||||
    ]);
 | 
			
		||||
 | 
			
		||||
    test!(lex: "(+ 14 25.5 333 (* 2 5))", [
 | 
			
		||||
 | 
			
		||||
@ -1,7 +1,7 @@
 | 
			
		||||
use std::{cmp::Ordering, iter, ops::Range, sync::Arc};
 | 
			
		||||
 | 
			
		||||
/// A location within some source text.
 | 
			
		||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq)]
 | 
			
		||||
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
 | 
			
		||||
pub struct Location {
 | 
			
		||||
    line: usize,
 | 
			
		||||
    column: usize,
 | 
			
		||||
@ -25,7 +25,7 @@ impl PartialOrd for Location {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Some (optionally named) source text.
 | 
			
		||||
#[derive(Debug, Default, Clone, PartialEq, Eq)]
 | 
			
		||||
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
 | 
			
		||||
pub struct Source {
 | 
			
		||||
    name: Option<String>,
 | 
			
		||||
    contents: String,
 | 
			
		||||
@ -87,7 +87,7 @@ impl Source {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// A contiguous sequence of bytes within some source.
 | 
			
		||||
#[derive(Debug, Default, Clone)]
 | 
			
		||||
#[derive(Debug, Default, Clone, Hash)]
 | 
			
		||||
pub struct Span {
 | 
			
		||||
    bytes: Range<usize>,
 | 
			
		||||
    source: Arc<Source>,
 | 
			
		||||
 | 
			
		||||
		Loading…
	
		Reference in New Issue
	
	Block a user