diff --git a/onihime/src/lexer/mod.rs b/onihime/src/lexer/mod.rs index b065461..a4b53a7 100644 --- a/onihime/src/lexer/mod.rs +++ b/onihime/src/lexer/mod.rs @@ -22,7 +22,7 @@ pub trait Separator { impl Separator for char { fn is_separator(&self) -> bool { - self.is_ascii_whitespace() || matches!(self, '(' | ')' | '[' | ']' | '{' | '}' | ',') + self.is_ascii_whitespace() || matches!(self, '(' | ')' | '[' | ']' | '{' | '}') } } @@ -49,11 +49,8 @@ impl<'lexer> Lexer<'lexer> { /// Set the name of the [Source] being lexically analyzed. pub fn set_name(&mut self, name: String) { - if let Some(source) = Arc::get_mut(&mut self.source) { - source.set_name(name); - } else { - unimplemented!(); // FIXME: What should we do in this case? - } + // FIXME: Avoid unwrapping here (if possible?) + Arc::get_mut(&mut self.source).unwrap().set_name(name); } /// The source being lexically analyzed. @@ -62,12 +59,6 @@ impl<'lexer> Lexer<'lexer> { self.source.clone() } - /// Get the current character. - #[must_use] - pub fn current(&self) -> Option { - self.input.as_str().chars().next() - } - /// Get the unparsed input. #[must_use] pub fn get_unparsed(&self) -> &str { @@ -81,7 +72,14 @@ impl<'lexer> Lexer<'lexer> { Span::new(self.byte..self.byte, self.source.clone()) } + /// Get the current character. + #[must_use] + fn current(&self) -> Option { + self.input.as_str().chars().next() + } + /// Get the nth character ahead of the current character without advancing. + #[inline] fn peek(&self, n: usize) -> Option { self.input.as_str().chars().nth(n) } @@ -98,7 +96,7 @@ impl<'lexer> Lexer<'lexer> { fn read_word(&mut self) -> String { let mut word = String::new(); while let Some(c) = self.current() { - if c.is_separator() { + if c.is_separator() || (c == '#' && self.peek(1) == Some('{')) { break; } @@ -125,7 +123,7 @@ impl<'lexer> Lexer<'lexer> { pub fn read(&mut self) -> Result, LexerError> { let c = loop { match self.current() { - Some(c) if c.is_ascii_whitespace() || c == ',' => { + Some(c) if c.is_ascii_whitespace() => { self.advance(); } Some(c) => break c, @@ -135,6 +133,41 @@ impl<'lexer> Lexer<'lexer> { let mut span = self.span(); let kind = match c { + '#' if matches!(self.peek(1), Some('|')) => { + self.advance(); // '#' + self.advance(); // '|#' + + let mut comment = String::new(); + while let Some(c) = self.advance() { + match c { + '|' if matches!(self.peek(0), Some('#')) => { + self.advance(); // '|' + self.advance(); // '#' + break; + } + c => { + comment.push(c); + } + } + } + + TokenKind::BlockComment(comment.trim().into()) + } + ';' => { + let mut comment = String::new(); + while let Some(c) = self.advance() { + match c { + ';' => continue, + '\n' => break, + c => { + comment.push(c); + } + } + } + + TokenKind::LineComment(comment.trim().into()) + } + '(' => { self.advance(); TokenKind::OpenParen @@ -159,48 +192,56 @@ impl<'lexer> Lexer<'lexer> { self.advance(); TokenKind::CloseBracket } + '#' if matches!(self.peek(1), Some('{')) => { + self.advance(); // '#' + self.advance(); // '{' + TokenKind::OpenHashBrace + } + + '\'' => { + self.advance(); // '\'' + + let c = match self.advance() { + Some('\\') => match self.advance() { + Some(c @ ('"' | '\\')) => c, + Some('n') => '\n', + Some('e') => '\x1b', + Some(c) => { + return Err(LexerError::new( + LexerErrorKind::InvalidEscape(c), + span.join(&self.span()), + )); + } + None => { + return Err(LexerError::new( + LexerErrorKind::UnclosedChar, + span.join(&self.span()), + )); + } + }, + Some(c) => c, + None => { + return Err(LexerError::new( + LexerErrorKind::UnclosedChar, + span.join(&self.span()), + )) + } + }; + + if self.advance() != Some('\'') { + self.read_word(); + return Err(LexerError::new( + LexerErrorKind::InvalidChar, + span.join(&self.span()), + )); + } + + TokenKind::Char(c) + } '0'..='9' => TokenKind::Number(self.parse_or(LexerErrorKind::InvalidNumber)?), '+' | '-' if matches!(self.peek(1), Some('0'..='9')) => { TokenKind::Number(self.parse_or(LexerErrorKind::InvalidNumber)?) } - ';' => { - let mut comment = String::new(); - while let Some(c) = self.advance() { - match c { - ';' => continue, - '\n' => break, - c => { - comment.push(c); - } - } - } - - TokenKind::LineComment(comment.trim().into()) - } - '#' if matches!(self.peek(1), Some('|')) => { - self.advance(); // '#' - self.advance(); // '|#' - - let mut comment = String::new(); - while let Some(c) = self.advance() { - match c { - '|' if matches!(self.peek(0), Some('#')) => { - self.advance(); // '|' - self.advance(); // '#' - break; - } - c => { - comment.push(c); - } - } - } - - TokenKind::BlockComment(comment.trim().into()) - } - ':' => { - self.advance(); - TokenKind::Keyword(Symbol(self.read_word())) - } '"' => { self.advance(); // '"' @@ -245,45 +286,9 @@ impl<'lexer> Lexer<'lexer> { TokenKind::String(string) } - '\'' => { - self.advance(); // '\'' - - let c = match self.advance() { - Some('\\') => match self.advance() { - Some(c @ ('"' | '\\')) => c, - Some('n') => '\n', - Some('e') => '\x1b', - Some(c) => { - return Err(LexerError::new( - LexerErrorKind::InvalidEscape(c), - span.join(&self.span()), - )); - } - None => { - return Err(LexerError::new( - LexerErrorKind::UnclosedChar, - span.join(&self.span()), - )); - } - }, - Some(c) => c, - None => { - return Err(LexerError::new( - LexerErrorKind::UnclosedChar, - span.join(&self.span()), - )) - } - }; - - if self.advance() != Some('\'') { - self.read_word(); - return Err(LexerError::new( - LexerErrorKind::InvalidChar, - span.join(&self.span()), - )); - } - - TokenKind::Char(c) + ':' => { + self.advance(); + TokenKind::Keyword(Symbol(self.read_word())) } _ => { let word = self.read_word(); @@ -334,6 +339,41 @@ mod tests { }; } + test!(block_comment: "#| foo\nbar |#", [ + Ok(TokenKind::BlockComment("foo\nbar".into())) + ]); + + test!(line_comment: "; foo\n;; bar baz qux", [ + Ok(TokenKind::LineComment("foo".into())), + Ok(TokenKind::LineComment("bar baz qux".into())), + ]); + + test!(hashset: "{{} true false}", [ + Ok(TokenKind::OpenBrace), + Ok(TokenKind::OpenBrace), + Ok(TokenKind::CloseBrace), + Ok(TokenKind::Bool(true)), + Ok(TokenKind::Bool(false)), + Ok(TokenKind::CloseBrace), + ]); + + test!(hashmap: "(foo #{:bar 0 :baz 1})", [ + Ok(TokenKind::OpenParen), + Ok(TokenKind::Symbol(Symbol::from("foo"))), + Ok(TokenKind::OpenHashBrace), + Ok(TokenKind::Keyword(Symbol::from("bar"))), + Ok(TokenKind::Number(0.0)), + Ok(TokenKind::Keyword(Symbol::from("baz"))), + Ok(TokenKind::Number(1.0)), + Ok(TokenKind::CloseBrace), + Ok(TokenKind::CloseParen), + ]); + + test!(char_literal: "'x' '\n'", [ + Ok(TokenKind::Char('x')), + Ok(TokenKind::Char('\n')), + ]); + test!(lex: "(+ 14 25.5 333 (* 2 5))", [ Ok(TokenKind::OpenParen), Ok(TokenKind::Symbol(Symbol::from("+"))), @@ -372,15 +412,6 @@ mod tests { Ok(TokenKind::CloseParen), ]); - test!(line_comment: "; foo\n;; bar baz qux", [ - Ok(TokenKind::LineComment("foo".into())), - Ok(TokenKind::LineComment("bar baz qux".into())), - ]); - - test!(block_comment: "#| foo\nbar |#", [ - Ok(TokenKind::BlockComment("foo\nbar".into())) - ]); - test!(error_parse_numbers: "2 55 3.144 0.0001 1.1.1", [ Ok(TokenKind::Number(2.0)), Ok(TokenKind::Number(55.0)), diff --git a/onihime/src/lexer/symbol.rs b/onihime/src/lexer/symbol.rs index 3f7c697..0eac9fa 100644 --- a/onihime/src/lexer/symbol.rs +++ b/onihime/src/lexer/symbol.rs @@ -17,3 +17,15 @@ impl std::fmt::Display for Symbol { write!(f, "{}", self.0) } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn display() { + assert_eq!(Symbol::from("foo").to_string(), "foo"); + assert_eq!(Symbol::from("+").to_string(), "+"); + assert_eq!(Symbol::from("bar0").to_string(), "bar0"); + } +} diff --git a/onihime/src/lexer/token.rs b/onihime/src/lexer/token.rs index f3f9c6f..6ebc0ee 100644 --- a/onihime/src/lexer/token.rs +++ b/onihime/src/lexer/token.rs @@ -4,6 +4,11 @@ use crate::span::Span; /// The type of a [Token]. #[derive(Debug, Clone, PartialEq)] pub enum TokenKind { + /// Block comment, e.g. '#| ... |#' + BlockComment(String), + /// Line comment, e.g. '; ...' + LineComment(String), + /// Opening parenthesis, e.g. '(' OpenParen, /// Closing parenthesis, e.g. ')' @@ -16,11 +21,8 @@ pub enum TokenKind { OpenBracket, /// Closing bracket, e.g. ']' CloseBracket, - - /// Block comment, e.g. '#| ... |#' - BlockComment(String), - /// Line comment, e.g. '; ...' - LineComment(String), + /// Opening hash-brace, e.g. '#{' + OpenHashBrace, /// Boolean, e.g. 'true', 'false' Bool(bool),