diff --git a/onihime/src/lexer/error.rs b/onihime/src/lexer/error.rs index 8fa2f67..06c2252 100644 --- a/onihime/src/lexer/error.rs +++ b/onihime/src/lexer/error.rs @@ -1,5 +1,7 @@ use std::fmt; +use crate::Span; + /// Kinds of errors which can occur during lexical analysis. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum LexerErrorKind { @@ -35,6 +37,8 @@ impl fmt::Display for LexerErrorKind { pub struct LexerError { /// The kind of lexer error. pub kind: LexerErrorKind, + /// The span of the lexer error. + pub span: Span, /// Additional context regarding the lexer error. pub context: Option, } @@ -42,9 +46,10 @@ pub struct LexerError { impl LexerError { /// Construct a new instance of a lexer error. #[must_use] - pub const fn new(kind: LexerErrorKind) -> Self { + pub const fn new(kind: LexerErrorKind, span: Span) -> Self { Self { kind, + span, context: None, } } diff --git a/onihime/src/lexer/mod.rs b/onihime/src/lexer/mod.rs index 03984e1..758d6a5 100644 --- a/onihime/src/lexer/mod.rs +++ b/onihime/src/lexer/mod.rs @@ -96,7 +96,7 @@ impl<'a> Lexer<'a> { // Invalid tokens: _ => { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidSymbol)); + return Err(LexerError::new(LexerErrorKind::InvalidSymbol, self.span())); } }; @@ -140,14 +140,14 @@ impl<'a> Lexer<'a> { let c = if self.peek().is_some_and(|c| !is_separator(c)) { self.advance().unwrap() // SAFETY: This will never panic } else { - return Err(LexerError::new(LexerErrorKind::InvalidChar)); + return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span())); }; match c { 'u' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_unicode_escape(), 'x' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_ascii_escape(), _ if self.peek().is_some_and(|c| !is_separator(c)) => { - Err(LexerError::new(LexerErrorKind::InvalidChar)) + Err(LexerError::new(LexerErrorKind::InvalidChar, self.span())) } _ => Ok(TokenKind::Char), } @@ -161,7 +161,7 @@ impl<'a> Lexer<'a> { self.advance(); } else { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidChar)); + return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span())); } // Expect a single hexadecimal digit: @@ -169,14 +169,14 @@ impl<'a> Lexer<'a> { self.advance(); } else { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidChar)); + return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span())); } // We should be at the end of the literal now, i.e. next char should be a // separator: if self.peek().is_some_and(|c| !is_separator(c)) { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidChar)); + return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span())); } Ok(TokenKind::Char) @@ -192,7 +192,7 @@ impl<'a> Lexer<'a> { Some(c) if c.is_ascii_hexdigit() => count += 1, _ => { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidChar)); + return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span())); } }; } @@ -201,7 +201,7 @@ impl<'a> Lexer<'a> { // not at the end of the literal, then the literal is invalid: if count == 0 || self.peek().is_some_and(|c| !is_separator(c)) { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidChar)); + return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span())); } Ok(TokenKind::Char) @@ -215,7 +215,7 @@ impl<'a> Lexer<'a> { Ok(TokenKind::Keyword) } else { self.read_word(); // Recover - Err(LexerError::new(LexerErrorKind::InvalidKeyword)) + Err(LexerError::new(LexerErrorKind::InvalidKeyword, self.span())) } } @@ -236,7 +236,7 @@ impl<'a> Lexer<'a> { Some('/') => return self.complete_ratio(), _ => { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidNumber)); + return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span())); } } } @@ -259,14 +259,14 @@ impl<'a> Lexer<'a> { } _ => { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidNumber)); + return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span())); } }; } if !digit_found { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidNumber)); + return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span())); } Ok(TokenKind::Integer) @@ -287,7 +287,7 @@ impl<'a> Lexer<'a> { Some('+') | Some('-') if exp_found && !sign_found => sign_found = true, Some(_) => { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidNumber)); + return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span())); } None => unreachable!(), }; @@ -309,7 +309,7 @@ impl<'a> Lexer<'a> { Some('+') | Some('-') if !digit_found && !sign_found => sign_found = true, Some(_) => { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidNumber)); + return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span())); } None => unreachable!(), }; @@ -317,7 +317,7 @@ impl<'a> Lexer<'a> { if !digit_found { self.read_word(); // Recover - return Err(LexerError::new(LexerErrorKind::InvalidNumber)); + return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span())); } Ok(TokenKind::Ratio) @@ -333,7 +333,7 @@ impl<'a> Lexer<'a> { self.advance(); // '"' } Some(_) => {} - None => return Err(LexerError::new(LexerErrorKind::UnclosedString)), + None => return Err(LexerError::new(LexerErrorKind::UnclosedString, self.span())), } } @@ -375,17 +375,29 @@ mod tests { assert_eq!(lexer.slice(), &[]); } + // Okay... this kind of sucks. But, makes writing tests pretty easy :) + // + // Provide the name of the test, an input string to lex, and a list of expected + // results as parameters to the macro. A test case will be generated + // automagically as a result. macro_rules! test { ( $name:ident: $input:literal => $expected:expr ) => { #[test] fn $name() { let mut lexer = Lexer::new($input); - for (token, span, slice) in $expected { + for (result, span, slice) in $expected { let kind = lexer.next().map(|r| match r { Ok(t) => Ok(t.kind), - Err(e) => Err(e), + Err(mut e) => { + e.context = None; // Don't care about this for testing + Err(e) + } }); - assert_eq!(kind, Some(token)); + let result = match result { + Ok(t) => Ok(t), + Err(e) => Err(LexerError::new(e, lexer.span())), + }; + assert_eq!(kind, Some(result)); assert_eq!(span, lexer.span().into()); assert_eq!(slice.as_bytes(), lexer.slice()); } @@ -449,9 +461,9 @@ mod tests { ]); test!(err_invalid_keyword: ": :;" => [ - (Err(LexerError::new(LexerErrorKind::InvalidKeyword)), 0..1, ":"), + (Err(LexerErrorKind::InvalidKeyword), 0..1, ":"), (Ok(TokenKind::Whitespace), 1..2, " "), - (Err(LexerError::new(LexerErrorKind::InvalidKeyword)), 2..3, ":"), + (Err(LexerErrorKind::InvalidKeyword), 2..3, ":"), (Ok(TokenKind::Comment), 3..4, ";"), ]); @@ -472,47 +484,47 @@ mod tests { ]); test!(err_invalid_char: r"\ \xF \x0 \x111 \uG \u2222222" => [ - (Err(LexerError::new(LexerErrorKind::InvalidChar)), 0..1, r"\"), + (Err(LexerErrorKind::InvalidChar), 0..1, r"\"), (Ok(TokenKind::Whitespace), 1..2, " "), - (Err(LexerError::new(LexerErrorKind::InvalidChar)), 2..5, r"\xF"), + (Err(LexerErrorKind::InvalidChar), 2..5, r"\xF"), (Ok(TokenKind::Whitespace), 5..6, " "), - (Err(LexerError::new(LexerErrorKind::InvalidChar)), 6..9, r"\x0"), + (Err(LexerErrorKind::InvalidChar), 6..9, r"\x0"), (Ok(TokenKind::Whitespace), 9..10, " "), - (Err(LexerError::new(LexerErrorKind::InvalidChar)), 10..15, r"\x111"), + (Err(LexerErrorKind::InvalidChar), 10..15, r"\x111"), (Ok(TokenKind::Whitespace), 15..16, " "), - (Err(LexerError::new(LexerErrorKind::InvalidChar)), 16..19, r"\uG"), + (Err(LexerErrorKind::InvalidChar), 16..19, r"\uG"), (Ok(TokenKind::Whitespace), 19..20, " "), - (Err(LexerError::new(LexerErrorKind::InvalidChar)), 20..29, r"\u2222222"), + (Err(LexerErrorKind::InvalidChar), 20..29, r"\u2222222"), ]); test!(err_invalid_integer: "0b012 0o8 0xFG 1N 0x" => [ - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..5, "0b012"), + (Err(LexerErrorKind::InvalidNumber), 0..5, "0b012"), (Ok(TokenKind::Whitespace), 5..6, " "), - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 6..9, "0o8"), + (Err(LexerErrorKind::InvalidNumber), 6..9, "0o8"), (Ok(TokenKind::Whitespace), 9..10, " "), - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 10..14, "0xFG"), + (Err(LexerErrorKind::InvalidNumber), 10..14, "0xFG"), (Ok(TokenKind::Whitespace), 14..15, " "), - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 15..17, "1N"), + (Err(LexerErrorKind::InvalidNumber), 15..17, "1N"), (Ok(TokenKind::Whitespace), 17..18, " "), - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 18..20, "0x"), + (Err(LexerErrorKind::InvalidNumber), 18..20, "0x"), ]); test!(err_invalid_decimal: "1.2.3 4.e6 7.8+ 9.0+e1" => [ - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..5, "1.2.3"), + (Err(LexerErrorKind::InvalidNumber), 0..5, "1.2.3"), (Ok(TokenKind::Whitespace), 5..6, " "), - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 6..10, "4.e6"), + (Err(LexerErrorKind::InvalidNumber), 6..10, "4.e6"), (Ok(TokenKind::Whitespace), 10..11, " "), - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 11..15, "7.8+"), + (Err(LexerErrorKind::InvalidNumber), 11..15, "7.8+"), (Ok(TokenKind::Whitespace), 15..16, " "), - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 16..22, "9.0+e1"), + (Err(LexerErrorKind::InvalidNumber), 16..22, "9.0+e1"), ]); test!(err_invalid_ratio: "1/ -2/3+ 4/-" => [ - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..2, "1/"), + (Err(LexerErrorKind::InvalidNumber), 0..2, "1/"), (Ok(TokenKind::Whitespace), 2..3, " "), - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 3..8, "-2/3+"), + (Err(LexerErrorKind::InvalidNumber), 3..8, "-2/3+"), (Ok(TokenKind::Whitespace), 8..9, " "), - (Err(LexerError::new(LexerErrorKind::InvalidNumber)), 9..12, "4/-"), + (Err(LexerErrorKind::InvalidNumber), 9..12, "4/-"), ]); test!(string: "\"föö bar1\nbaz\" \"\" \"懄い 😍\"" => [ @@ -524,7 +536,7 @@ mod tests { ]); test!(err_unclosed_string: "\"oops" => [ - (Err(LexerError::new(LexerErrorKind::UnclosedString)), 0..5, "\"oops"), + (Err(LexerErrorKind::UnclosedString), 0..5, "\"oops"), ]); test!(symbol: "+ rev fold0 nil? x str-cat 猫" => [ @@ -543,6 +555,13 @@ mod tests { (Ok(TokenKind::Symbol), 27..30, "猫"), ]); + // More macro magic (yay!). Allows for creating proptests in one line, fancy! + // + // Provide the name of the test, a regular expression to generate valid inputs + // with, and the expected `TokenKind` variant as the parameters to the macro. + // A test case will be generated automagically as a result. + // + // e.g. `ptest!(TEST_NAME: REGEX => VARIANT);` macro_rules! ptest { ( $name:ident: $input:literal => $kind:ident ) => { proptest::proptest! {