Add spans to lexer errors
This commit is contained in:
parent
7add446d14
commit
2d95a58ce7
@ -1,5 +1,7 @@
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
|
|
||||||
|
use crate::Span;
|
||||||
|
|
||||||
/// Kinds of errors which can occur during lexical analysis.
|
/// Kinds of errors which can occur during lexical analysis.
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
pub enum LexerErrorKind {
|
pub enum LexerErrorKind {
|
||||||
@ -35,6 +37,8 @@ impl fmt::Display for LexerErrorKind {
|
|||||||
pub struct LexerError {
|
pub struct LexerError {
|
||||||
/// The kind of lexer error.
|
/// The kind of lexer error.
|
||||||
pub kind: LexerErrorKind,
|
pub kind: LexerErrorKind,
|
||||||
|
/// The span of the lexer error.
|
||||||
|
pub span: Span,
|
||||||
/// Additional context regarding the lexer error.
|
/// Additional context regarding the lexer error.
|
||||||
pub context: Option<String>,
|
pub context: Option<String>,
|
||||||
}
|
}
|
||||||
@ -42,9 +46,10 @@ pub struct LexerError {
|
|||||||
impl LexerError {
|
impl LexerError {
|
||||||
/// Construct a new instance of a lexer error.
|
/// Construct a new instance of a lexer error.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub const fn new(kind: LexerErrorKind) -> Self {
|
pub const fn new(kind: LexerErrorKind, span: Span) -> Self {
|
||||||
Self {
|
Self {
|
||||||
kind,
|
kind,
|
||||||
|
span,
|
||||||
context: None,
|
context: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -96,7 +96,7 @@ impl<'a> Lexer<'a> {
|
|||||||
// Invalid tokens:
|
// Invalid tokens:
|
||||||
_ => {
|
_ => {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidSymbol));
|
return Err(LexerError::new(LexerErrorKind::InvalidSymbol, self.span()));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -140,14 +140,14 @@ impl<'a> Lexer<'a> {
|
|||||||
let c = if self.peek().is_some_and(|c| !is_separator(c)) {
|
let c = if self.peek().is_some_and(|c| !is_separator(c)) {
|
||||||
self.advance().unwrap() // SAFETY: This will never panic
|
self.advance().unwrap() // SAFETY: This will never panic
|
||||||
} else {
|
} else {
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidChar));
|
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
||||||
};
|
};
|
||||||
|
|
||||||
match c {
|
match c {
|
||||||
'u' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_unicode_escape(),
|
'u' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_unicode_escape(),
|
||||||
'x' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_ascii_escape(),
|
'x' if self.peek().is_some_and(|c| !is_separator(c)) => self.complete_ascii_escape(),
|
||||||
_ if self.peek().is_some_and(|c| !is_separator(c)) => {
|
_ if self.peek().is_some_and(|c| !is_separator(c)) => {
|
||||||
Err(LexerError::new(LexerErrorKind::InvalidChar))
|
Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()))
|
||||||
}
|
}
|
||||||
_ => Ok(TokenKind::Char),
|
_ => Ok(TokenKind::Char),
|
||||||
}
|
}
|
||||||
@ -161,7 +161,7 @@ impl<'a> Lexer<'a> {
|
|||||||
self.advance();
|
self.advance();
|
||||||
} else {
|
} else {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidChar));
|
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Expect a single hexadecimal digit:
|
// Expect a single hexadecimal digit:
|
||||||
@ -169,14 +169,14 @@ impl<'a> Lexer<'a> {
|
|||||||
self.advance();
|
self.advance();
|
||||||
} else {
|
} else {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidChar));
|
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
||||||
}
|
}
|
||||||
|
|
||||||
// We should be at the end of the literal now, i.e. next char should be a
|
// We should be at the end of the literal now, i.e. next char should be a
|
||||||
// separator:
|
// separator:
|
||||||
if self.peek().is_some_and(|c| !is_separator(c)) {
|
if self.peek().is_some_and(|c| !is_separator(c)) {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidChar));
|
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(TokenKind::Char)
|
Ok(TokenKind::Char)
|
||||||
@ -192,7 +192,7 @@ impl<'a> Lexer<'a> {
|
|||||||
Some(c) if c.is_ascii_hexdigit() => count += 1,
|
Some(c) if c.is_ascii_hexdigit() => count += 1,
|
||||||
_ => {
|
_ => {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidChar));
|
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
@ -201,7 +201,7 @@ impl<'a> Lexer<'a> {
|
|||||||
// not at the end of the literal, then the literal is invalid:
|
// not at the end of the literal, then the literal is invalid:
|
||||||
if count == 0 || self.peek().is_some_and(|c| !is_separator(c)) {
|
if count == 0 || self.peek().is_some_and(|c| !is_separator(c)) {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidChar));
|
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(TokenKind::Char)
|
Ok(TokenKind::Char)
|
||||||
@ -215,7 +215,7 @@ impl<'a> Lexer<'a> {
|
|||||||
Ok(TokenKind::Keyword)
|
Ok(TokenKind::Keyword)
|
||||||
} else {
|
} else {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
Err(LexerError::new(LexerErrorKind::InvalidKeyword))
|
Err(LexerError::new(LexerErrorKind::InvalidKeyword, self.span()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -236,7 +236,7 @@ impl<'a> Lexer<'a> {
|
|||||||
Some('/') => return self.complete_ratio(),
|
Some('/') => return self.complete_ratio(),
|
||||||
_ => {
|
_ => {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidNumber));
|
return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -259,14 +259,14 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidNumber));
|
return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span()));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
if !digit_found {
|
if !digit_found {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidNumber));
|
return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(TokenKind::Integer)
|
Ok(TokenKind::Integer)
|
||||||
@ -287,7 +287,7 @@ impl<'a> Lexer<'a> {
|
|||||||
Some('+') | Some('-') if exp_found && !sign_found => sign_found = true,
|
Some('+') | Some('-') if exp_found && !sign_found => sign_found = true,
|
||||||
Some(_) => {
|
Some(_) => {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidNumber));
|
return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span()));
|
||||||
}
|
}
|
||||||
None => unreachable!(),
|
None => unreachable!(),
|
||||||
};
|
};
|
||||||
@ -309,7 +309,7 @@ impl<'a> Lexer<'a> {
|
|||||||
Some('+') | Some('-') if !digit_found && !sign_found => sign_found = true,
|
Some('+') | Some('-') if !digit_found && !sign_found => sign_found = true,
|
||||||
Some(_) => {
|
Some(_) => {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidNumber));
|
return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span()));
|
||||||
}
|
}
|
||||||
None => unreachable!(),
|
None => unreachable!(),
|
||||||
};
|
};
|
||||||
@ -317,7 +317,7 @@ impl<'a> Lexer<'a> {
|
|||||||
|
|
||||||
if !digit_found {
|
if !digit_found {
|
||||||
self.read_word(); // Recover
|
self.read_word(); // Recover
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidNumber));
|
return Err(LexerError::new(LexerErrorKind::InvalidNumber, self.span()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(TokenKind::Ratio)
|
Ok(TokenKind::Ratio)
|
||||||
@ -333,7 +333,7 @@ impl<'a> Lexer<'a> {
|
|||||||
self.advance(); // '"'
|
self.advance(); // '"'
|
||||||
}
|
}
|
||||||
Some(_) => {}
|
Some(_) => {}
|
||||||
None => return Err(LexerError::new(LexerErrorKind::UnclosedString)),
|
None => return Err(LexerError::new(LexerErrorKind::UnclosedString, self.span())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -375,17 +375,29 @@ mod tests {
|
|||||||
assert_eq!(lexer.slice(), &[]);
|
assert_eq!(lexer.slice(), &[]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Okay... this kind of sucks. But, makes writing tests pretty easy :)
|
||||||
|
//
|
||||||
|
// Provide the name of the test, an input string to lex, and a list of expected
|
||||||
|
// results as parameters to the macro. A test case will be generated
|
||||||
|
// automagically as a result.
|
||||||
macro_rules! test {
|
macro_rules! test {
|
||||||
( $name:ident: $input:literal => $expected:expr ) => {
|
( $name:ident: $input:literal => $expected:expr ) => {
|
||||||
#[test]
|
#[test]
|
||||||
fn $name() {
|
fn $name() {
|
||||||
let mut lexer = Lexer::new($input);
|
let mut lexer = Lexer::new($input);
|
||||||
for (token, span, slice) in $expected {
|
for (result, span, slice) in $expected {
|
||||||
let kind = lexer.next().map(|r| match r {
|
let kind = lexer.next().map(|r| match r {
|
||||||
Ok(t) => Ok(t.kind),
|
Ok(t) => Ok(t.kind),
|
||||||
Err(e) => Err(e),
|
Err(mut e) => {
|
||||||
|
e.context = None; // Don't care about this for testing
|
||||||
|
Err(e)
|
||||||
|
}
|
||||||
});
|
});
|
||||||
assert_eq!(kind, Some(token));
|
let result = match result {
|
||||||
|
Ok(t) => Ok(t),
|
||||||
|
Err(e) => Err(LexerError::new(e, lexer.span())),
|
||||||
|
};
|
||||||
|
assert_eq!(kind, Some(result));
|
||||||
assert_eq!(span, lexer.span().into());
|
assert_eq!(span, lexer.span().into());
|
||||||
assert_eq!(slice.as_bytes(), lexer.slice());
|
assert_eq!(slice.as_bytes(), lexer.slice());
|
||||||
}
|
}
|
||||||
@ -449,9 +461,9 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
test!(err_invalid_keyword: ": :;" => [
|
test!(err_invalid_keyword: ": :;" => [
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidKeyword)), 0..1, ":"),
|
(Err(LexerErrorKind::InvalidKeyword), 0..1, ":"),
|
||||||
(Ok(TokenKind::Whitespace), 1..2, " "),
|
(Ok(TokenKind::Whitespace), 1..2, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidKeyword)), 2..3, ":"),
|
(Err(LexerErrorKind::InvalidKeyword), 2..3, ":"),
|
||||||
(Ok(TokenKind::Comment), 3..4, ";"),
|
(Ok(TokenKind::Comment), 3..4, ";"),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
@ -472,47 +484,47 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
test!(err_invalid_char: r"\ \xF \x0 \x111 \uG \u2222222" => [
|
test!(err_invalid_char: r"\ \xF \x0 \x111 \uG \u2222222" => [
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidChar)), 0..1, r"\"),
|
(Err(LexerErrorKind::InvalidChar), 0..1, r"\"),
|
||||||
(Ok(TokenKind::Whitespace), 1..2, " "),
|
(Ok(TokenKind::Whitespace), 1..2, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidChar)), 2..5, r"\xF"),
|
(Err(LexerErrorKind::InvalidChar), 2..5, r"\xF"),
|
||||||
(Ok(TokenKind::Whitespace), 5..6, " "),
|
(Ok(TokenKind::Whitespace), 5..6, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidChar)), 6..9, r"\x0"),
|
(Err(LexerErrorKind::InvalidChar), 6..9, r"\x0"),
|
||||||
(Ok(TokenKind::Whitespace), 9..10, " "),
|
(Ok(TokenKind::Whitespace), 9..10, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidChar)), 10..15, r"\x111"),
|
(Err(LexerErrorKind::InvalidChar), 10..15, r"\x111"),
|
||||||
(Ok(TokenKind::Whitespace), 15..16, " "),
|
(Ok(TokenKind::Whitespace), 15..16, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidChar)), 16..19, r"\uG"),
|
(Err(LexerErrorKind::InvalidChar), 16..19, r"\uG"),
|
||||||
(Ok(TokenKind::Whitespace), 19..20, " "),
|
(Ok(TokenKind::Whitespace), 19..20, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidChar)), 20..29, r"\u2222222"),
|
(Err(LexerErrorKind::InvalidChar), 20..29, r"\u2222222"),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(err_invalid_integer: "0b012 0o8 0xFG 1N 0x" => [
|
test!(err_invalid_integer: "0b012 0o8 0xFG 1N 0x" => [
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..5, "0b012"),
|
(Err(LexerErrorKind::InvalidNumber), 0..5, "0b012"),
|
||||||
(Ok(TokenKind::Whitespace), 5..6, " "),
|
(Ok(TokenKind::Whitespace), 5..6, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 6..9, "0o8"),
|
(Err(LexerErrorKind::InvalidNumber), 6..9, "0o8"),
|
||||||
(Ok(TokenKind::Whitespace), 9..10, " "),
|
(Ok(TokenKind::Whitespace), 9..10, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 10..14, "0xFG"),
|
(Err(LexerErrorKind::InvalidNumber), 10..14, "0xFG"),
|
||||||
(Ok(TokenKind::Whitespace), 14..15, " "),
|
(Ok(TokenKind::Whitespace), 14..15, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 15..17, "1N"),
|
(Err(LexerErrorKind::InvalidNumber), 15..17, "1N"),
|
||||||
(Ok(TokenKind::Whitespace), 17..18, " "),
|
(Ok(TokenKind::Whitespace), 17..18, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 18..20, "0x"),
|
(Err(LexerErrorKind::InvalidNumber), 18..20, "0x"),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(err_invalid_decimal: "1.2.3 4.e6 7.8+ 9.0+e1" => [
|
test!(err_invalid_decimal: "1.2.3 4.e6 7.8+ 9.0+e1" => [
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..5, "1.2.3"),
|
(Err(LexerErrorKind::InvalidNumber), 0..5, "1.2.3"),
|
||||||
(Ok(TokenKind::Whitespace), 5..6, " "),
|
(Ok(TokenKind::Whitespace), 5..6, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 6..10, "4.e6"),
|
(Err(LexerErrorKind::InvalidNumber), 6..10, "4.e6"),
|
||||||
(Ok(TokenKind::Whitespace), 10..11, " "),
|
(Ok(TokenKind::Whitespace), 10..11, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 11..15, "7.8+"),
|
(Err(LexerErrorKind::InvalidNumber), 11..15, "7.8+"),
|
||||||
(Ok(TokenKind::Whitespace), 15..16, " "),
|
(Ok(TokenKind::Whitespace), 15..16, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 16..22, "9.0+e1"),
|
(Err(LexerErrorKind::InvalidNumber), 16..22, "9.0+e1"),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(err_invalid_ratio: "1/ -2/3+ 4/-" => [
|
test!(err_invalid_ratio: "1/ -2/3+ 4/-" => [
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 0..2, "1/"),
|
(Err(LexerErrorKind::InvalidNumber), 0..2, "1/"),
|
||||||
(Ok(TokenKind::Whitespace), 2..3, " "),
|
(Ok(TokenKind::Whitespace), 2..3, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 3..8, "-2/3+"),
|
(Err(LexerErrorKind::InvalidNumber), 3..8, "-2/3+"),
|
||||||
(Ok(TokenKind::Whitespace), 8..9, " "),
|
(Ok(TokenKind::Whitespace), 8..9, " "),
|
||||||
(Err(LexerError::new(LexerErrorKind::InvalidNumber)), 9..12, "4/-"),
|
(Err(LexerErrorKind::InvalidNumber), 9..12, "4/-"),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(string: "\"föö bar1\nbaz\" \"\" \"凄い 😍\"" => [
|
test!(string: "\"föö bar1\nbaz\" \"\" \"凄い 😍\"" => [
|
||||||
@ -524,7 +536,7 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
|
|
||||||
test!(err_unclosed_string: "\"oops" => [
|
test!(err_unclosed_string: "\"oops" => [
|
||||||
(Err(LexerError::new(LexerErrorKind::UnclosedString)), 0..5, "\"oops"),
|
(Err(LexerErrorKind::UnclosedString), 0..5, "\"oops"),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(symbol: "+ rev fold0 nil? x str-cat 猫" => [
|
test!(symbol: "+ rev fold0 nil? x str-cat 猫" => [
|
||||||
@ -543,6 +555,13 @@ mod tests {
|
|||||||
(Ok(TokenKind::Symbol), 27..30, "猫"),
|
(Ok(TokenKind::Symbol), 27..30, "猫"),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
// More macro magic (yay!). Allows for creating proptests in one line, fancy!
|
||||||
|
//
|
||||||
|
// Provide the name of the test, a regular expression to generate valid inputs
|
||||||
|
// with, and the expected `TokenKind` variant as the parameters to the macro.
|
||||||
|
// A test case will be generated automagically as a result.
|
||||||
|
//
|
||||||
|
// e.g. `ptest!(TEST_NAME: REGEX => VARIANT);`
|
||||||
macro_rules! ptest {
|
macro_rules! ptest {
|
||||||
( $name:ident: $input:literal => $kind:ident ) => {
|
( $name:ident: $input:literal => $kind:ident ) => {
|
||||||
proptest::proptest! {
|
proptest::proptest! {
|
||||||
|
Loading…
Reference in New Issue
Block a user