Miscellaneous improvments/cleanup/tweaks in the lexer module
This commit is contained in:
parent
1b021f1a89
commit
b7f28b32f0
@ -46,8 +46,8 @@ impl std::fmt::Display for LexerError {
|
||||
use LexerErrorKind::*;
|
||||
|
||||
match &self.kind {
|
||||
InvalidEscape(c) => write!(f, "Unknown escape sequence '\\{c}' in string"),
|
||||
InvalidNumber(n) => write!(f, "`{n}` is not a valid numeric literal"),
|
||||
InvalidEscape(c) => write!(f, "Invalid escape sequence '\\{c}'"),
|
||||
InvalidNumber(n) => write!(f, "Invalid numeric literal `{n}`"),
|
||||
InvalidString => write!(f, "Invalid string literal"),
|
||||
UnclosedChar => write!(f, "Unclosed character literal"),
|
||||
UnclosedString => write!(f, "Unclosed string literal"),
|
||||
|
@ -1,6 +1,6 @@
|
||||
use std::{str::Chars, sync::Arc};
|
||||
|
||||
pub use self::{
|
||||
pub(crate) use self::{
|
||||
error::{LexerError, LexerErrorKind},
|
||||
symbol::Symbol,
|
||||
token::{Token, TokenKind},
|
||||
@ -30,7 +30,7 @@ pub(crate) struct Lexer<'lexer> {
|
||||
impl<'lexer> Lexer<'lexer> {
|
||||
/// Create a new lexer instance from a string.
|
||||
#[must_use]
|
||||
pub fn new(input: &'lexer str) -> Self {
|
||||
pub(crate) fn new(input: &'lexer str) -> Self {
|
||||
let source = Arc::new(Source::new(None, input.to_string()));
|
||||
|
||||
Self {
|
||||
@ -41,20 +41,21 @@ impl<'lexer> Lexer<'lexer> {
|
||||
}
|
||||
|
||||
/// Set the name of the [Source] being lexically analyzed.
|
||||
pub fn set_name(&mut self, name: String) {
|
||||
pub(crate) fn set_name(&mut self, name: String) {
|
||||
// TODO: Avoid unwrapping here (if possible?)
|
||||
Arc::get_mut(&mut self.source).unwrap().set_name(name);
|
||||
}
|
||||
|
||||
/// The source being lexically analyzed.
|
||||
#[cfg(test)]
|
||||
#[must_use]
|
||||
pub fn source(&self) -> Arc<Source> {
|
||||
pub(crate) fn source(&self) -> Arc<Source> {
|
||||
self.source.clone()
|
||||
}
|
||||
|
||||
/// Get the current position of the lexer.
|
||||
#[must_use]
|
||||
pub fn span(&self) -> Span {
|
||||
pub(crate) fn span(&self) -> Span {
|
||||
Span::new(self.byte..self.byte, self.source.clone())
|
||||
}
|
||||
|
||||
@ -103,7 +104,8 @@ impl<'lexer> Lexer<'lexer> {
|
||||
}
|
||||
|
||||
/// Read the next token from the input.
|
||||
pub fn read(&mut self) -> Result<Option<Token>, LexerError> {
|
||||
#[must_use]
|
||||
pub(crate) fn read(&mut self) -> Result<Option<Token>, LexerError> {
|
||||
// Eat whitespace until we encounter a meaningful character, or simply return if
|
||||
// we have reached the end of input and no additional characters can be read:
|
||||
let c = loop {
|
||||
@ -178,6 +180,7 @@ impl<'lexer> Lexer<'lexer> {
|
||||
Ok(Some(Token::new(kind, span)))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn line_comment(&mut self) -> TokenKind {
|
||||
// Line comments may start with any number of semicolons, so consume however
|
||||
// many are present at the beginning of the comment:
|
||||
@ -198,6 +201,7 @@ impl<'lexer> Lexer<'lexer> {
|
||||
TokenKind::LineComment(comment.trim().into())
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn block_comment(&mut self) -> TokenKind {
|
||||
// TODO: This currently allows for unclosed block comments; do we care?
|
||||
self.advance(); // '#'
|
||||
@ -216,6 +220,7 @@ impl<'lexer> Lexer<'lexer> {
|
||||
TokenKind::BlockComment(comment.trim().into())
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn float_literal(&self, word: String, span: Span) -> Result<TokenKind, LexerError> {
|
||||
let float = word.parse().map_err(|_| {
|
||||
LexerError::new(LexerErrorKind::InvalidNumber(word), span.join(&self.span()))
|
||||
@ -224,6 +229,7 @@ impl<'lexer> Lexer<'lexer> {
|
||||
Ok(TokenKind::Float(float))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn integer_literal(
|
||||
&self,
|
||||
word: String,
|
||||
@ -244,6 +250,7 @@ impl<'lexer> Lexer<'lexer> {
|
||||
Ok(TokenKind::Integer(integer))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn numeric_literal(&mut self, span: Span) -> Result<TokenKind, LexerError> {
|
||||
let word = self.read_word();
|
||||
|
||||
@ -256,6 +263,7 @@ impl<'lexer> Lexer<'lexer> {
|
||||
Ok(kind)
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn char_literal(&mut self, span: Span) -> Result<TokenKind, LexerError> {
|
||||
self.advance(); // '\''
|
||||
|
||||
@ -300,6 +308,7 @@ impl<'lexer> Lexer<'lexer> {
|
||||
Ok(TokenKind::Char(c))
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
fn string_literal(&mut self, span: Span) -> Result<TokenKind, LexerError> {
|
||||
self.advance(); // '"'
|
||||
|
||||
@ -436,9 +445,10 @@ mod tests {
|
||||
Ok(TokenKind::Integer(255)),
|
||||
]);
|
||||
|
||||
test!(error_parse_number: "1.1.1 0.x", [
|
||||
test!(error_parse_number: "1.1.1 0.x 7b", [
|
||||
Err(LexerErrorKind::InvalidNumber("1.1.1".into())),
|
||||
Err(LexerErrorKind::InvalidNumber("0.x".into())),
|
||||
Err(LexerErrorKind::InvalidNumber("7b".into())),
|
||||
]);
|
||||
|
||||
test!(char_literal: r"'x' '\n' '\r' '\t' '\e' '\\' '\q' 'b", [
|
||||
@ -479,6 +489,10 @@ mod tests {
|
||||
Err(LexerErrorKind::UnclosedString),
|
||||
]);
|
||||
|
||||
test!(error_escape_unclosed_string: "\"oops\\", [
|
||||
Err(LexerErrorKind::UnclosedString),
|
||||
]);
|
||||
|
||||
test!(error_invalid_string: "\"hiii\"222", [
|
||||
Err(LexerErrorKind::InvalidString),
|
||||
]);
|
||||
|
@ -1,10 +1,11 @@
|
||||
/// A symbol used to identify a function or variable.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub struct Symbol(pub String);
|
||||
#[repr(transparent)]
|
||||
pub(crate) struct Symbol(String);
|
||||
|
||||
impl Symbol {
|
||||
/// Create a new `Symbol` from a string.
|
||||
pub fn from<S>(s: S) -> Self
|
||||
pub(crate) fn from<S>(s: S) -> Self
|
||||
where
|
||||
S: Into<String>,
|
||||
{
|
||||
|
@ -3,7 +3,7 @@ use crate::span::Span;
|
||||
|
||||
/// Possible kinds of a [Token].
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum TokenKind {
|
||||
pub(crate) enum TokenKind {
|
||||
/// Block comment, e.g. `#| ... |#`
|
||||
BlockComment(String),
|
||||
/// Line comment, e.g. `; ...`
|
||||
@ -32,10 +32,10 @@ pub enum TokenKind {
|
||||
Float(f64),
|
||||
/// Integer, e.g. `0`, `-1`, `+200`
|
||||
Integer(i64),
|
||||
/// String, e.g. `"foo bar"`
|
||||
String(String),
|
||||
/// Keyword, e.g. `:baz`
|
||||
Keyword(Symbol),
|
||||
/// String, e.g. `"foo bar"`
|
||||
String(String),
|
||||
/// Symbol, e.g. `qux`, `+`
|
||||
Symbol(Symbol),
|
||||
/// Nil, e.g. `nil`
|
||||
@ -50,17 +50,17 @@ pub enum TokenKind {
|
||||
/// [Source]: crate::span::Source
|
||||
/// [Location]: crate::span::Location
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Token {
|
||||
pub(crate) struct Token {
|
||||
/// The kind of token.
|
||||
pub kind: TokenKind,
|
||||
pub(crate) kind: TokenKind,
|
||||
/// The span in which the token occurs.
|
||||
pub span: Span,
|
||||
pub(crate) span: Span,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
/// Construct a new instance of `Token`.
|
||||
#[must_use]
|
||||
pub const fn new(kind: TokenKind, span: Span) -> Self {
|
||||
pub(crate) const fn new(kind: TokenKind, span: Span) -> Self {
|
||||
Self { kind, span }
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user