Lexer is starting to look pretty okay
This commit is contained in:
parent
11917bb183
commit
4cdbccbc8a
@ -1,13 +1,13 @@
|
|||||||
use crate::span::Span;
|
use crate::span::Span;
|
||||||
|
|
||||||
/// Errors that can occur during lexical analysis.
|
/// Kinds of errors that may occur during lexical analysis.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum LexerErrorKind {
|
pub enum LexerErrorKind {
|
||||||
/// An invalid escape sequence was encountered.
|
/// An invalid escape sequence was encountered.
|
||||||
InvalidEscape(char),
|
InvalidEscape(char),
|
||||||
/// An invalid number was encountered.
|
/// An invalid numeric literal was encountered.
|
||||||
InvalidNumber(String),
|
InvalidNumber(String),
|
||||||
/// An invalid string was encountered.
|
/// An invalid string literal was encountered.
|
||||||
InvalidString,
|
InvalidString,
|
||||||
/// An unclosed character literal was encountered.
|
/// An unclosed character literal was encountered.
|
||||||
UnclosedChar,
|
UnclosedChar,
|
||||||
@ -15,10 +15,16 @@ pub enum LexerErrorKind {
|
|||||||
UnclosedString,
|
UnclosedString,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Lexer error, with a start and end location.
|
/// An error which occurred during lexical analysis.
|
||||||
|
///
|
||||||
|
/// `LexerError`s contain the kind of error which occurred, as well as a [Span]
|
||||||
|
/// specifying the [Source] and [Location] of the error.
|
||||||
|
///
|
||||||
|
/// [Source]: crate::span::Source
|
||||||
|
/// [Location]: crate::span::Location
|
||||||
#[derive(Debug, Clone, PartialEq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Hash)]
|
||||||
pub struct LexerError {
|
pub struct LexerError {
|
||||||
/// The type of error encountered.
|
/// The kind of error encountered.
|
||||||
pub kind: LexerErrorKind,
|
pub kind: LexerErrorKind,
|
||||||
/// The span in which the error occurred.
|
/// The span in which the error occurred.
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
|
@ -94,6 +94,7 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
|
|
||||||
/// Advance the lexer by one character, and then return the specified
|
/// Advance the lexer by one character, and then return the specified
|
||||||
/// `TokenKind`:
|
/// `TokenKind`:
|
||||||
|
#[must_use]
|
||||||
fn advance_and(&mut self, kind: TokenKind) -> TokenKind {
|
fn advance_and(&mut self, kind: TokenKind) -> TokenKind {
|
||||||
self.advance();
|
self.advance();
|
||||||
|
|
||||||
@ -129,6 +130,8 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
|
|
||||||
/// Read the next token from the input.
|
/// Read the next token from the input.
|
||||||
pub fn read(&mut self) -> Result<Option<Token>, LexerError> {
|
pub fn read(&mut self) -> Result<Option<Token>, LexerError> {
|
||||||
|
// Eat whitespace until we encounter a meaningful character, or simply return if
|
||||||
|
// we have reached the end of input and no additional characters can be read:
|
||||||
let c = loop {
|
let c = loop {
|
||||||
match self.current() {
|
match self.current() {
|
||||||
Some(c) if c.is_ascii_whitespace() => {
|
Some(c) if c.is_ascii_whitespace() => {
|
||||||
@ -141,26 +144,25 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
|
|
||||||
let mut span = self.span();
|
let mut span = self.span();
|
||||||
let kind = match c {
|
let kind = match c {
|
||||||
'#' if matches!(self.peek(1), Some('|')) => {
|
'#' if self.peek(1) == Some('|') => {
|
||||||
self.advance(); // '#'
|
self.advance(); // '#'
|
||||||
self.advance(); // '|'
|
self.advance(); // '|'
|
||||||
|
|
||||||
let mut comment = String::new();
|
let mut comment = String::new();
|
||||||
while let Some(c) = self.advance() {
|
while let Some(c) = self.advance() {
|
||||||
match c {
|
if c == '|' && matches!(self.peek(0), Some('#')) {
|
||||||
'|' if matches!(self.peek(0), Some('#')) => {
|
self.advance(); // '#'
|
||||||
self.advance(); // '#'
|
break;
|
||||||
break;
|
|
||||||
}
|
|
||||||
c => {
|
|
||||||
comment.push(c);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
comment.push(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
TokenKind::BlockComment(comment.trim().into())
|
TokenKind::BlockComment(comment.trim().into())
|
||||||
}
|
}
|
||||||
';' => {
|
';' => {
|
||||||
|
// Line comments may start with any number of semicolons, so consume however
|
||||||
|
// many are present at the beginning of the comment:
|
||||||
while self.current().is_some_and(|c| c == ';') {
|
while self.current().is_some_and(|c| c == ';') {
|
||||||
self.advance();
|
self.advance();
|
||||||
}
|
}
|
||||||
@ -183,10 +185,10 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
'}' => self.advance_and(TokenKind::CloseBrace),
|
'}' => self.advance_and(TokenKind::CloseBrace),
|
||||||
'[' => self.advance_and(TokenKind::OpenBracket),
|
'[' => self.advance_and(TokenKind::OpenBracket),
|
||||||
']' => self.advance_and(TokenKind::CloseBracket),
|
']' => self.advance_and(TokenKind::CloseBracket),
|
||||||
|
'#' if self.peek(1) == Some('{') => {
|
||||||
'#' if matches!(self.peek(1), Some('{')) => {
|
|
||||||
self.advance(); // '#'
|
self.advance(); // '#'
|
||||||
self.advance(); // '{'
|
self.advance(); // '{'
|
||||||
|
|
||||||
TokenKind::OpenHashBrace
|
TokenKind::OpenHashBrace
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -281,6 +283,7 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
}
|
}
|
||||||
':' => {
|
':' => {
|
||||||
self.advance();
|
self.advance();
|
||||||
|
|
||||||
TokenKind::Keyword(Symbol(self.read_word()))
|
TokenKind::Keyword(Symbol(self.read_word()))
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
@ -354,16 +357,13 @@ mod tests {
|
|||||||
Ok(TokenKind::CloseBrace),
|
Ok(TokenKind::CloseBrace),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(hashmap: "(foo #{:bar 0 :baz 1})", [
|
test!(hashmap: "#{:bar 0 :baz 1}", [
|
||||||
Ok(TokenKind::OpenParen),
|
|
||||||
Ok(TokenKind::Symbol(Symbol::from("foo"))),
|
|
||||||
Ok(TokenKind::OpenHashBrace),
|
Ok(TokenKind::OpenHashBrace),
|
||||||
Ok(TokenKind::Keyword(Symbol::from("bar"))),
|
Ok(TokenKind::Keyword(Symbol::from("bar"))),
|
||||||
Ok(TokenKind::Number(0.0)),
|
Ok(TokenKind::Number(0.0)),
|
||||||
Ok(TokenKind::Keyword(Symbol::from("baz"))),
|
Ok(TokenKind::Keyword(Symbol::from("baz"))),
|
||||||
Ok(TokenKind::Number(1.0)),
|
Ok(TokenKind::Number(1.0)),
|
||||||
Ok(TokenKind::CloseBrace),
|
Ok(TokenKind::CloseBrace),
|
||||||
Ok(TokenKind::CloseParen),
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(vector: "[0 1 2]", [
|
test!(vector: "[0 1 2]", [
|
||||||
@ -383,7 +383,7 @@ mod tests {
|
|||||||
Err(LexerErrorKind::UnclosedChar),
|
Err(LexerErrorKind::UnclosedChar),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(lex: "(+ 14 25.5 333 (* 2 5))", [
|
test!(nested_lists: "(+ 14 25.5 333 (* 2 5))", [
|
||||||
Ok(TokenKind::OpenParen),
|
Ok(TokenKind::OpenParen),
|
||||||
Ok(TokenKind::Symbol(Symbol::from("+"))),
|
Ok(TokenKind::Symbol(Symbol::from("+"))),
|
||||||
Ok(TokenKind::Number(14.0)),
|
Ok(TokenKind::Number(14.0)),
|
||||||
@ -421,6 +421,14 @@ mod tests {
|
|||||||
Ok(TokenKind::CloseParen),
|
Ok(TokenKind::CloseParen),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
test!(error_unclosed_char_escape: r"'\", [
|
||||||
|
Err(LexerErrorKind::UnclosedChar),
|
||||||
|
]);
|
||||||
|
|
||||||
|
test!(error_unclosed_char_empty: r"'", [
|
||||||
|
Err(LexerErrorKind::UnclosedChar),
|
||||||
|
]);
|
||||||
|
|
||||||
test!(error_parse_numbers: "2 55 3.144 0.0001 1.1.1", [
|
test!(error_parse_numbers: "2 55 3.144 0.0001 1.1.1", [
|
||||||
Ok(TokenKind::Number(2.0)),
|
Ok(TokenKind::Number(2.0)),
|
||||||
Ok(TokenKind::Number(55.0)),
|
Ok(TokenKind::Number(55.0)),
|
||||||
|
@ -12,20 +12,9 @@ impl Symbol {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(not(tarpaulin_include))]
|
||||||
impl std::fmt::Display for Symbol {
|
impl std::fmt::Display for Symbol {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
write!(f, "{}", self.0)
|
write!(f, "{}", self.0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn display() {
|
|
||||||
assert_eq!(Symbol::from("foo").to_string(), "foo");
|
|
||||||
assert_eq!(Symbol::from("+").to_string(), "+");
|
|
||||||
assert_eq!(Symbol::from("bar0").to_string(), "bar0");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -1,49 +1,55 @@
|
|||||||
use super::Symbol;
|
use super::Symbol;
|
||||||
use crate::span::Span;
|
use crate::span::Span;
|
||||||
|
|
||||||
/// The type of a [Token].
|
/// Possible kinds of a [Token].
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum TokenKind {
|
pub enum TokenKind {
|
||||||
/// Block comment, e.g. '#| ... |#'
|
/// Block comment, e.g. `#| ... |#`
|
||||||
BlockComment(String),
|
BlockComment(String),
|
||||||
/// Line comment, e.g. '; ...'
|
/// Line comment, e.g. `; ...`
|
||||||
LineComment(String),
|
LineComment(String),
|
||||||
|
|
||||||
/// Opening parenthesis, e.g. '('
|
/// Opening parenthesis, e.g. `(`
|
||||||
OpenParen,
|
OpenParen,
|
||||||
/// Closing parenthesis, e.g. ')'
|
/// Closing parenthesis, e.g. `)`
|
||||||
CloseParen,
|
CloseParen,
|
||||||
/// Opening brace, e.g. '{'
|
/// Opening brace, e.g. `{`
|
||||||
OpenBrace,
|
OpenBrace,
|
||||||
/// Closing brace, e.g. '}'
|
/// Closing brace, e.g. `}`
|
||||||
CloseBrace,
|
CloseBrace,
|
||||||
/// Opening bracket, e.g. '['
|
/// Opening bracket, e.g. `[`
|
||||||
OpenBracket,
|
OpenBracket,
|
||||||
/// Closing bracket, e.g. ']'
|
/// Closing bracket, e.g. `]`
|
||||||
CloseBracket,
|
CloseBracket,
|
||||||
/// Opening hash-brace, e.g. '#{'
|
/// Opening hash-brace, e.g. `#{`
|
||||||
OpenHashBrace,
|
OpenHashBrace,
|
||||||
|
|
||||||
/// Boolean, e.g. 'true', 'false'
|
/// Boolean, e.g. `true`, `false`
|
||||||
Bool(bool),
|
Bool(bool),
|
||||||
/// Character, e.g. 'c', '\n'
|
/// Character, e.g. `'c'`, `'\n'`
|
||||||
Char(char),
|
Char(char),
|
||||||
/// Number, e.g. '1', '2.0', '0.003'
|
/// Number, e.g. `1`, `2.0`, `0.003`
|
||||||
Number(f64),
|
Number(f64),
|
||||||
/// String, e.g. '"foo bar"'
|
/// String, e.g. `"foo bar"`
|
||||||
String(String),
|
String(String),
|
||||||
/// Keyword, e.g. ':baz'
|
/// Keyword, e.g. `:baz`
|
||||||
Keyword(Symbol),
|
Keyword(Symbol),
|
||||||
/// Symbol, e.g. 'qux', '+'
|
/// Symbol, e.g. `qux`, `+`
|
||||||
Symbol(Symbol),
|
Symbol(Symbol),
|
||||||
/// Nil, e.g. 'nil'
|
/// Nil, e.g. `nil`
|
||||||
Nil,
|
Nil,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A token with a start and end location.
|
/// A token encountered during lexical analysis.
|
||||||
|
///
|
||||||
|
/// `Token`s contain the kind of token which was found, as well as a [Span]
|
||||||
|
/// specifying the [Source] and [Location] of the token.
|
||||||
|
///
|
||||||
|
/// [Source]: crate::span::Source
|
||||||
|
/// [Location]: crate::span::Location
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
/// The type of token.
|
/// The kind of token.
|
||||||
pub kind: TokenKind,
|
pub kind: TokenKind,
|
||||||
/// The span in which the token occurs.
|
/// The span in which the token occurs.
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
|
Loading…
Reference in New Issue
Block a user