Miscellaneous improvments/cleanup/tweaks in the lexer module
This commit is contained in:
parent
1b021f1a89
commit
b7f28b32f0
@ -46,8 +46,8 @@ impl std::fmt::Display for LexerError {
|
|||||||
use LexerErrorKind::*;
|
use LexerErrorKind::*;
|
||||||
|
|
||||||
match &self.kind {
|
match &self.kind {
|
||||||
InvalidEscape(c) => write!(f, "Unknown escape sequence '\\{c}' in string"),
|
InvalidEscape(c) => write!(f, "Invalid escape sequence '\\{c}'"),
|
||||||
InvalidNumber(n) => write!(f, "`{n}` is not a valid numeric literal"),
|
InvalidNumber(n) => write!(f, "Invalid numeric literal `{n}`"),
|
||||||
InvalidString => write!(f, "Invalid string literal"),
|
InvalidString => write!(f, "Invalid string literal"),
|
||||||
UnclosedChar => write!(f, "Unclosed character literal"),
|
UnclosedChar => write!(f, "Unclosed character literal"),
|
||||||
UnclosedString => write!(f, "Unclosed string literal"),
|
UnclosedString => write!(f, "Unclosed string literal"),
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use std::{str::Chars, sync::Arc};
|
use std::{str::Chars, sync::Arc};
|
||||||
|
|
||||||
pub use self::{
|
pub(crate) use self::{
|
||||||
error::{LexerError, LexerErrorKind},
|
error::{LexerError, LexerErrorKind},
|
||||||
symbol::Symbol,
|
symbol::Symbol,
|
||||||
token::{Token, TokenKind},
|
token::{Token, TokenKind},
|
||||||
@ -30,7 +30,7 @@ pub(crate) struct Lexer<'lexer> {
|
|||||||
impl<'lexer> Lexer<'lexer> {
|
impl<'lexer> Lexer<'lexer> {
|
||||||
/// Create a new lexer instance from a string.
|
/// Create a new lexer instance from a string.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn new(input: &'lexer str) -> Self {
|
pub(crate) fn new(input: &'lexer str) -> Self {
|
||||||
let source = Arc::new(Source::new(None, input.to_string()));
|
let source = Arc::new(Source::new(None, input.to_string()));
|
||||||
|
|
||||||
Self {
|
Self {
|
||||||
@ -41,20 +41,21 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Set the name of the [Source] being lexically analyzed.
|
/// Set the name of the [Source] being lexically analyzed.
|
||||||
pub fn set_name(&mut self, name: String) {
|
pub(crate) fn set_name(&mut self, name: String) {
|
||||||
// TODO: Avoid unwrapping here (if possible?)
|
// TODO: Avoid unwrapping here (if possible?)
|
||||||
Arc::get_mut(&mut self.source).unwrap().set_name(name);
|
Arc::get_mut(&mut self.source).unwrap().set_name(name);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The source being lexically analyzed.
|
/// The source being lexically analyzed.
|
||||||
|
#[cfg(test)]
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn source(&self) -> Arc<Source> {
|
pub(crate) fn source(&self) -> Arc<Source> {
|
||||||
self.source.clone()
|
self.source.clone()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Get the current position of the lexer.
|
/// Get the current position of the lexer.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub fn span(&self) -> Span {
|
pub(crate) fn span(&self) -> Span {
|
||||||
Span::new(self.byte..self.byte, self.source.clone())
|
Span::new(self.byte..self.byte, self.source.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -103,7 +104,8 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Read the next token from the input.
|
/// Read the next token from the input.
|
||||||
pub fn read(&mut self) -> Result<Option<Token>, LexerError> {
|
#[must_use]
|
||||||
|
pub(crate) fn read(&mut self) -> Result<Option<Token>, LexerError> {
|
||||||
// Eat whitespace until we encounter a meaningful character, or simply return if
|
// Eat whitespace until we encounter a meaningful character, or simply return if
|
||||||
// we have reached the end of input and no additional characters can be read:
|
// we have reached the end of input and no additional characters can be read:
|
||||||
let c = loop {
|
let c = loop {
|
||||||
@ -178,6 +180,7 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
Ok(Some(Token::new(kind, span)))
|
Ok(Some(Token::new(kind, span)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
fn line_comment(&mut self) -> TokenKind {
|
fn line_comment(&mut self) -> TokenKind {
|
||||||
// Line comments may start with any number of semicolons, so consume however
|
// Line comments may start with any number of semicolons, so consume however
|
||||||
// many are present at the beginning of the comment:
|
// many are present at the beginning of the comment:
|
||||||
@ -198,6 +201,7 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
TokenKind::LineComment(comment.trim().into())
|
TokenKind::LineComment(comment.trim().into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
fn block_comment(&mut self) -> TokenKind {
|
fn block_comment(&mut self) -> TokenKind {
|
||||||
// TODO: This currently allows for unclosed block comments; do we care?
|
// TODO: This currently allows for unclosed block comments; do we care?
|
||||||
self.advance(); // '#'
|
self.advance(); // '#'
|
||||||
@ -216,6 +220,7 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
TokenKind::BlockComment(comment.trim().into())
|
TokenKind::BlockComment(comment.trim().into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
fn float_literal(&self, word: String, span: Span) -> Result<TokenKind, LexerError> {
|
fn float_literal(&self, word: String, span: Span) -> Result<TokenKind, LexerError> {
|
||||||
let float = word.parse().map_err(|_| {
|
let float = word.parse().map_err(|_| {
|
||||||
LexerError::new(LexerErrorKind::InvalidNumber(word), span.join(&self.span()))
|
LexerError::new(LexerErrorKind::InvalidNumber(word), span.join(&self.span()))
|
||||||
@ -224,6 +229,7 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
Ok(TokenKind::Float(float))
|
Ok(TokenKind::Float(float))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
fn integer_literal(
|
fn integer_literal(
|
||||||
&self,
|
&self,
|
||||||
word: String,
|
word: String,
|
||||||
@ -244,6 +250,7 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
Ok(TokenKind::Integer(integer))
|
Ok(TokenKind::Integer(integer))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
fn numeric_literal(&mut self, span: Span) -> Result<TokenKind, LexerError> {
|
fn numeric_literal(&mut self, span: Span) -> Result<TokenKind, LexerError> {
|
||||||
let word = self.read_word();
|
let word = self.read_word();
|
||||||
|
|
||||||
@ -256,6 +263,7 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
Ok(kind)
|
Ok(kind)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
fn char_literal(&mut self, span: Span) -> Result<TokenKind, LexerError> {
|
fn char_literal(&mut self, span: Span) -> Result<TokenKind, LexerError> {
|
||||||
self.advance(); // '\''
|
self.advance(); // '\''
|
||||||
|
|
||||||
@ -300,6 +308,7 @@ impl<'lexer> Lexer<'lexer> {
|
|||||||
Ok(TokenKind::Char(c))
|
Ok(TokenKind::Char(c))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[must_use]
|
||||||
fn string_literal(&mut self, span: Span) -> Result<TokenKind, LexerError> {
|
fn string_literal(&mut self, span: Span) -> Result<TokenKind, LexerError> {
|
||||||
self.advance(); // '"'
|
self.advance(); // '"'
|
||||||
|
|
||||||
@ -436,9 +445,10 @@ mod tests {
|
|||||||
Ok(TokenKind::Integer(255)),
|
Ok(TokenKind::Integer(255)),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(error_parse_number: "1.1.1 0.x", [
|
test!(error_parse_number: "1.1.1 0.x 7b", [
|
||||||
Err(LexerErrorKind::InvalidNumber("1.1.1".into())),
|
Err(LexerErrorKind::InvalidNumber("1.1.1".into())),
|
||||||
Err(LexerErrorKind::InvalidNumber("0.x".into())),
|
Err(LexerErrorKind::InvalidNumber("0.x".into())),
|
||||||
|
Err(LexerErrorKind::InvalidNumber("7b".into())),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
test!(char_literal: r"'x' '\n' '\r' '\t' '\e' '\\' '\q' 'b", [
|
test!(char_literal: r"'x' '\n' '\r' '\t' '\e' '\\' '\q' 'b", [
|
||||||
@ -479,6 +489,10 @@ mod tests {
|
|||||||
Err(LexerErrorKind::UnclosedString),
|
Err(LexerErrorKind::UnclosedString),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
|
test!(error_escape_unclosed_string: "\"oops\\", [
|
||||||
|
Err(LexerErrorKind::UnclosedString),
|
||||||
|
]);
|
||||||
|
|
||||||
test!(error_invalid_string: "\"hiii\"222", [
|
test!(error_invalid_string: "\"hiii\"222", [
|
||||||
Err(LexerErrorKind::InvalidString),
|
Err(LexerErrorKind::InvalidString),
|
||||||
]);
|
]);
|
||||||
|
@ -1,10 +1,11 @@
|
|||||||
/// A symbol used to identify a function or variable.
|
/// A symbol used to identify a function or variable.
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||||
pub struct Symbol(pub String);
|
#[repr(transparent)]
|
||||||
|
pub(crate) struct Symbol(String);
|
||||||
|
|
||||||
impl Symbol {
|
impl Symbol {
|
||||||
/// Create a new `Symbol` from a string.
|
/// Create a new `Symbol` from a string.
|
||||||
pub fn from<S>(s: S) -> Self
|
pub(crate) fn from<S>(s: S) -> Self
|
||||||
where
|
where
|
||||||
S: Into<String>,
|
S: Into<String>,
|
||||||
{
|
{
|
||||||
|
@ -3,7 +3,7 @@ use crate::span::Span;
|
|||||||
|
|
||||||
/// Possible kinds of a [Token].
|
/// Possible kinds of a [Token].
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub enum TokenKind {
|
pub(crate) enum TokenKind {
|
||||||
/// Block comment, e.g. `#| ... |#`
|
/// Block comment, e.g. `#| ... |#`
|
||||||
BlockComment(String),
|
BlockComment(String),
|
||||||
/// Line comment, e.g. `; ...`
|
/// Line comment, e.g. `; ...`
|
||||||
@ -32,10 +32,10 @@ pub enum TokenKind {
|
|||||||
Float(f64),
|
Float(f64),
|
||||||
/// Integer, e.g. `0`, `-1`, `+200`
|
/// Integer, e.g. `0`, `-1`, `+200`
|
||||||
Integer(i64),
|
Integer(i64),
|
||||||
/// String, e.g. `"foo bar"`
|
|
||||||
String(String),
|
|
||||||
/// Keyword, e.g. `:baz`
|
/// Keyword, e.g. `:baz`
|
||||||
Keyword(Symbol),
|
Keyword(Symbol),
|
||||||
|
/// String, e.g. `"foo bar"`
|
||||||
|
String(String),
|
||||||
/// Symbol, e.g. `qux`, `+`
|
/// Symbol, e.g. `qux`, `+`
|
||||||
Symbol(Symbol),
|
Symbol(Symbol),
|
||||||
/// Nil, e.g. `nil`
|
/// Nil, e.g. `nil`
|
||||||
@ -50,17 +50,17 @@ pub enum TokenKind {
|
|||||||
/// [Source]: crate::span::Source
|
/// [Source]: crate::span::Source
|
||||||
/// [Location]: crate::span::Location
|
/// [Location]: crate::span::Location
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub struct Token {
|
pub(crate) struct Token {
|
||||||
/// The kind of token.
|
/// The kind of token.
|
||||||
pub kind: TokenKind,
|
pub(crate) kind: TokenKind,
|
||||||
/// The span in which the token occurs.
|
/// The span in which the token occurs.
|
||||||
pub span: Span,
|
pub(crate) span: Span,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl Token {
|
||||||
/// Construct a new instance of `Token`.
|
/// Construct a new instance of `Token`.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub const fn new(kind: TokenKind, span: Span) -> Self {
|
pub(crate) const fn new(kind: TokenKind, span: Span) -> Self {
|
||||||
Self { kind, span }
|
Self { kind, span }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user