diff --git a/onihime/src/parser/error.rs b/onihime/src/parser/error.rs index 068d3a1..320f7f4 100644 --- a/onihime/src/parser/error.rs +++ b/onihime/src/parser/error.rs @@ -1,19 +1,23 @@ use crate::{lexer::LexerError, span::Span}; -/// Errors that can occur during parsing. +/// Kinds of errors that can occur during parsing. #[derive(Debug, Clone, PartialEq)] pub enum ParserErrorKind { /// An error which ocurred during lexical analysis. Lexer(LexerError), - /// An unexpecting closing parenthesis/bracket was encountered. + /// Expected a value for key in map, but no value was found. + MissingValueInMap, + /// An unexpecting closing parenthesis/brace/bracket was encountered. UnexpectedCloseBracket, - /// Opening parenthesis/bracket does not have a matching closing - /// parenthesis/bracket. - UnclosedBracket, - /// An unmatched parenthesis/bracket was encountered. - UnmatchedBracket, + /// Unexpectedly reached the end of the input. + UnexpectedEof, /// Unexpected parser state reached. - Unreachable, + UnexpectedState, + /// Opening parenthesis/brace/bracket does not have a matching closing + /// parenthesis/brace/bracket. + UnclosedBracket, + /// An unmatched parenthesis/brace/bracket was encountered. + UnmatchedBracket, } /// Parser error, with a start and end location. @@ -35,16 +39,19 @@ impl ParserError { impl std::error::Error for ParserError {} +#[cfg(not(tarpaulin_include))] impl std::fmt::Display for ParserError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use ParserErrorKind::*; match &self.kind { Lexer(err) => write!(f, "{err}"), + MissingValueInMap => write!(f, "Key in map is missing its value"), UnexpectedCloseBracket => write!(f, "Unexpected closing bracket"), - UnclosedBracket => write!(f, "Unclosed parenthesis"), + UnexpectedEof => write!(f, "Unexpected reached end of input"), + UnexpectedState => write!(f, "Unexpected parsing state reached"), + UnclosedBracket => write!(f, "Unclosed bracket"), UnmatchedBracket => write!(f, "Unmatched bracket"), - Unreachable => write!(f, "Unexpected parsing state reached"), } } } diff --git a/onihime/src/parser/mod.rs b/onihime/src/parser/mod.rs index 4de2106..c510c15 100644 --- a/onihime/src/parser/mod.rs +++ b/onihime/src/parser/mod.rs @@ -7,7 +7,8 @@ use crate::lexer::{Lexer, TokenKind}; mod error; mod node; -/// A parser for the AST. +/// Parser for converting Onihime source code into an Abstract Syntax Tree +/// (AST). #[derive(Debug)] pub struct Parser<'parser> { lexer: Lexer<'parser>, @@ -35,6 +36,7 @@ impl<'parser> Parser<'parser> { while let Some(token) = self.lexer.read()? { match token.kind { TokenKind::BlockComment(_) | TokenKind::LineComment(_) => {} + TokenKind::OpenParen => { let child = Node::new(NodeKind::List(Vec::new()), token.span); parents.push(cur_node); @@ -60,6 +62,47 @@ impl<'parser> Parser<'parser> { parent.push_node(cur_node)?; cur_node = parent; } + + TokenKind::OpenBrace => { + let child = Node::new(NodeKind::Set(Vec::new()), token.span); + parents.push(cur_node); + cur_node = child; + } + TokenKind::OpenHashBrace => { + let child = Node::new(NodeKind::Map(Vec::new()), token.span); + parents.push(cur_node); + cur_node = child; + } + TokenKind::CloseBrace => { + let mut parent = parents.pop().ok_or_else(|| { + ParserError::new( + ParserErrorKind::UnexpectedCloseBracket, + token.span.clone(), + ) + })?; + + cur_node.span.extend(&token.span); + + if !matches!(cur_node.kind, NodeKind::Map(_) | NodeKind::Set(_)) { + return Err(ParserError::new( + ParserErrorKind::UnmatchedBracket, + token.span, + )); + } + + if let NodeKind::Map(ref vec) = cur_node.kind { + if vec.len() % 2 != 0 { + return Err(ParserError::new( + ParserErrorKind::MissingValueInMap, + token.span, + )); + } + } + + parent.push_node(cur_node)?; + cur_node = parent; + } + TokenKind::OpenBracket => { let child = Node::new(NodeKind::Vector(Vec::new()), token.span); parents.push(cur_node); @@ -86,7 +129,10 @@ impl<'parser> Parser<'parser> { cur_node = parent; } - _ => cur_node.push_node(Node::try_from(token)?)?, + _ => { + let node = Node::try_from(token)?; + cur_node.push_node(node)?; + } } } @@ -100,10 +146,7 @@ impl<'parser> Parser<'parser> { if let NodeKind::List(body) = cur_node.kind { Ok(body) } else { - Err(ParserError::new( - ParserErrorKind::Unreachable, - cur_node.span, - )) + unreachable!() // In theory, at least... } } } @@ -113,6 +156,7 @@ mod tests { use super::*; use crate::{ lexer::{LexerError, LexerErrorKind, Symbol}, + parser::node::Atom, span::Span, }; @@ -127,75 +171,7 @@ mod tests { }; } - test!(parse_list: "(+ 1 2)", src => Ok(vec![ - Node::new( - NodeKind::List(vec![ - Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(1..2, src.clone())), - Node::new(NodeKind::Number(1.0), Span::new(3..4, src.clone())), - Node::new(NodeKind::Number(2.0), Span::new(5..6, src.clone())), - ]), - Span::new(0..7, src) - ) - ])); - - test!(parse_nested_list: "(+ 2.5 64 (* 2 3))", src => Ok(vec![Node::new( - NodeKind::List(vec![ - Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(1..2, src.clone())), - Node::new(NodeKind::Number(2.5), Span::new(3..6, src.clone())), - Node::new(NodeKind::Number(64.0), Span::new(7..9, src.clone())), - Node::new( - NodeKind::List(vec![ - Node::new(NodeKind::Symbol(Symbol::from("*")), Span::new(11..12, src.clone())), - Node::new(NodeKind::Number(2.0), Span::new(13..14, src.clone())), - Node::new(NodeKind::Number(3.0), Span::new(15..16, src.clone())), - ]), - Span::new(10..17, src.clone()) - ), - ]), - Span::new(0..18, src) - )])); - - test!(parse_multiple_expressions: "(/ 6 3 (+ 1 2)) (* 2 5)\n(- 10 5)", src => Ok(vec![ - Node::new( - NodeKind::List(vec![ - Node::new(NodeKind::Symbol(Symbol::from("/")), Span::new(1..2, src.clone())), - Node::new(NodeKind::Number(6.0), Span::new(3..4, src.clone())), - Node::new(NodeKind::Number(3.0), Span::new(5..6, src.clone())), - Node::new( - NodeKind::List(vec![ - Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(8..9, src.clone())), - Node::new(NodeKind::Number(1.0), Span::new(10..11, src.clone())), - Node::new(NodeKind::Number(2.0), Span::new(12..13, src.clone())), - ]), - Span::new(7..14, src.clone()) - ), - ]), - Span::new(0..15, src.clone()) - ), - Node::new( - NodeKind::List(vec![ - Node::new(NodeKind::Symbol(Symbol::from("*")), Span::new(17..18, src.clone())), - Node::new(NodeKind::Number(2.0), Span::new(19..20, src.clone())), - Node::new(NodeKind::Number(5.0), Span::new(21..22, src.clone())), - ]), - Span::new(16..23, src.clone()) - ), - Node::new( - NodeKind::List(vec![ - Node::new(NodeKind::Symbol(Symbol::from("-")), Span::new(25..26, src.clone())), - Node::new(NodeKind::Number(10.0), Span::new(27..29, src.clone())), - Node::new(NodeKind::Number(5.0), Span::new(30..31, src.clone())), - ]), - Span::new(24..32, src) - ), - ])); - - test!(parse_float: "(2.500000)", src => Ok(vec![Node::new( - NodeKind::List(vec![Node::new(NodeKind::Number(2.5), Span::new(1..9, src.clone()))]), - Span::new(0..10, src) - )])); - - test!(parse_empty: "", _src => Ok(vec![])); + test!(empty: "", _src => Ok(vec![])); test!(error_invalid_number: "(+ 1.2.3)", src => Err(ParserError::new( ParserErrorKind::Lexer(LexerError::new( @@ -205,8 +181,151 @@ mod tests { Span::new(3..8, src) ))); - test!(error_unexpected_close_paren: ")", src => Err(ParserError::new( + test!(list: "(+ 1 2) ; sneaky comment :)", src => Ok(vec![ + Node::new( + NodeKind::List(vec![ + Node::new(Atom::Symbol(Symbol::from("+")).into(), Span::new(1..2, src.clone())), + Node::new(Atom::Integer(1).into(), Span::new(3..4, src.clone())), + Node::new(Atom::Integer(2).into(), Span::new(5..6, src.clone())), + ]), + Span::new(0..7, src) + ) + ])); + + test!(error_list_unmatched_bracket: "(]", src => Err(ParserError::new( + ParserErrorKind::UnmatchedBracket, + Span::new(1..2, src), + ))); + + test!(error_list_missing_close_paren: "(true", src => Err(ParserError::new( + ParserErrorKind::UnclosedBracket, + Span::new(0..1, src), + ))); + + test!(error_list_unexpected_close_paren: ")", src => Err(ParserError::new( ParserErrorKind::UnexpectedCloseBracket, Span::new(0..1, src) ))); + + test!(map: "#{:a 0.0 :b 1.0}", src => Ok(vec![ + Node::new( + NodeKind::Map(vec![ + Node::new(Atom::Keyword(Symbol::from("a")).into(), Span::new(2..4, src.clone())), + Node::new(Atom::Float(0.0).into(), Span::new(5..8, src.clone())), + Node::new(Atom::Keyword(Symbol::from("b")).into(), Span::new(9..11, src.clone())), + Node::new(Atom::Float(1.0).into(), Span::new(12..15, src.clone())), + ]), + Span::new(0..16, src) + ) + ])); + + test!(error_map_missing_value: "#{:x}", src => Err(ParserError::new( + ParserErrorKind::MissingValueInMap, + Span::new(4..5, src.clone()) + ))); + + test!(error_map_unmatched_bracket: "#{)", src => Err(ParserError::new( + ParserErrorKind::UnmatchedBracket, + Span::new(2..3, src), + ))); + + test!(error_map_missing_close_brace: "#{", src => Err(ParserError::new( + ParserErrorKind::UnclosedBracket, + Span::new(0..2, src), + ))); + + test!(set: "{{} nil}", src => Ok(vec![ + Node::new( + NodeKind::Set(vec![ + Node::new(NodeKind::Set(vec![]), Span::new(1..3, src.clone())), + Node::new(NodeKind::Atom(Atom::Nil), Span::new(4..7, src.clone())), + ]), + Span::new(0..8, src), + ) + ])); + + test!(error_set_unmatched_bracket: "{]", src => Err(ParserError::new( + ParserErrorKind::UnmatchedBracket, + Span::new(1..2, src), + ))); + + test!(error_set_missing_close_brace: "{", src => Err(ParserError::new( + ParserErrorKind::UnclosedBracket, + Span::new(0..1, src), + ))); + + test!(error_map_set_unexpected_close_brace: "}", src => Err(ParserError::new( + ParserErrorKind::UnexpectedCloseBracket, + Span::new(0..1, src) + ))); + + test!(vector: "['a' 'b' 'c']", src => Ok(vec![ + Node::new( + NodeKind::Vector(vec![ + Node::new(NodeKind::Atom(Atom::Char('a')), Span::new(1..4, src.clone())), + Node::new(NodeKind::Atom(Atom::Char('b')), Span::new(5..8, src.clone())), + Node::new(NodeKind::Atom(Atom::Char('c')), Span::new(9..12, src.clone())), + ]), + Span::new(0..13, src), + ) + ])); + + test!(error_vector_unmatched_bracket: "[}", src => Err(ParserError::new( + ParserErrorKind::UnmatchedBracket, + Span::new(1..2, src), + ))); + + test!(error_vector_missing_close_bracket: "{", src => Err(ParserError::new( + ParserErrorKind::UnclosedBracket, + Span::new(0..1, src), + ))); + + test!(error_vector_unexpected_close_bracket: "]", src => Err(ParserError::new( + ParserErrorKind::UnexpectedCloseBracket, + Span::new(0..1, src) + ))); + + test!(multiple_expressions: "(/ 6 3 (+ 1 2)) (* 2 5)\n(- 10 5)", src => Ok(vec![ + Node::new( + NodeKind::List(vec![ + Node::new(Atom::Symbol(Symbol::from("/")).into(), Span::new(1..2, src.clone())), + Node::new(Atom::Integer(6).into(), Span::new(3..4, src.clone())), + Node::new(Atom::Integer(3).into(), Span::new(5..6, src.clone())), + Node::new( + NodeKind::List(vec![ + Node::new(Atom::Symbol(Symbol::from("+")).into(), Span::new(8..9, src.clone())), + Node::new(Atom::Integer(1).into(), Span::new(10..11, src.clone())), + Node::new(Atom::Integer(2).into(), Span::new(12..13, src.clone())), + ]), + Span::new(7..14, src.clone()) + ), + ]), + Span::new(0..15, src.clone()) + ), + Node::new( + NodeKind::List(vec![ + Node::new(Atom::Symbol(Symbol::from("*")).into(), Span::new(17..18, src.clone())), + Node::new(Atom::Integer(2).into(), Span::new(19..20, src.clone())), + Node::new(Atom::Integer(5).into(), Span::new(21..22, src.clone())), + ]), + Span::new(16..23, src.clone()) + ), + Node::new( + NodeKind::List(vec![ + Node::new(Atom::Symbol(Symbol::from("-")).into(), Span::new(25..26, src.clone())), + Node::new(Atom::Integer(10).into(), Span::new(27..29, src.clone())), + Node::new(Atom::Integer(5).into(), Span::new(30..31, src.clone())), + ]), + Span::new(24..32, src) + ), + ])); + + test!(function_application: "(join \"foo\" \"bar\")", src => Ok(vec![Node::new( + NodeKind::List(vec![ + Node::new(Atom::Symbol(Symbol::from("join")).into(), Span::new(1..5, src.clone())), + Node::new(Atom::String("foo".into()).into(), Span::new(6..11, src.clone())), + Node::new(Atom::String("bar".into()).into(), Span::new(12..17, src.clone())), + ]), + Span::new(0..18, src) + )])); } diff --git a/onihime/src/parser/node.rs b/onihime/src/parser/node.rs index e04641b..62272b1 100644 --- a/onihime/src/parser/node.rs +++ b/onihime/src/parser/node.rs @@ -4,50 +4,116 @@ use crate::{ span::Span, }; -/// The type of a node in the AST. +/// An atomic value. #[derive(Debug, Clone, PartialEq)] -pub enum NodeKind { - /// Bool. +pub enum Atom { + /// Boolean, e.g. `true`, `false` Bool(bool), - /// Character. + /// Character, e.g. `'c'`, `'\n'` Char(char), - /// Keyword. + /// Floating-point number, e.g. `-1.0`, `2.0`, `+0.003` + Float(f64), + /// Integer, e.g. `0`, `-1`, `+200` + Integer(i64), + /// Keyword, e.g. `:baz` Keyword(Symbol), - /// Number. - Number(f64), - /// String. + /// String, e.g. `"foo bar"` String(String), - /// Symbol. + /// Symbol, e.g. `qux`, `+` Symbol(Symbol), - /// Nil. + /// Nil, e.g. `nil` Nil, - - /// List. - List(Vec), - /// Vector. - Vector(Vec), } -impl std::fmt::Display for NodeKind { +#[cfg(not(tarpaulin_include))] +impl std::fmt::Display for Atom { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use Atom::*; + match self { - NodeKind::Bool(_) => write!(f, "BOOL"), - NodeKind::Char(_) => write!(f, "CHAR"), - NodeKind::Keyword(_) => write!(f, "KEYWORD"), - NodeKind::Number(_) => write!(f, "NUMBER"), - NodeKind::String(_) => write!(f, "STRING"), - NodeKind::Symbol(_) => write!(f, "SYMBOL"), - NodeKind::Nil => write!(f, "NIL"), - NodeKind::List(_) => write!(f, "LIST"), - NodeKind::Vector(_) => write!(f, "VECTOR"), + Bool(_) => write!(f, "BOOL"), + Char(_) => write!(f, "CHAR"), + Float(_) => write!(f, "FLOAT"), + Integer(_) => write!(f, "INTEGER"), + Keyword(_) => write!(f, "KEYWORD"), + String(_) => write!(f, "STRING"), + Symbol(_) => write!(f, "SYMBOL"), + Nil => write!(f, "NIL"), } } } -/// A node in the AST with a start and end location. +/// An expression. +#[derive(Debug, Clone, PartialEq)] +pub enum NodeKind { + /// An atomic value. + Atom(Atom), + /// A list of nodes. + List(Vec), + /// A map of nodes. + Map(Vec), + /// A set of nodes. + Set(Vec), + /// A vector of nodes. + Vector(Vec), +} + +impl From for NodeKind { + fn from(atom: Atom) -> Self { + Self::Atom(atom) + } +} + +impl TryFrom for NodeKind { + type Error = ParserError; + + fn try_from(token: Token) -> Result { + let kind = match token.kind { + TokenKind::Bool(b) => Atom::Bool(b), + TokenKind::Char(c) => Atom::Char(c), + TokenKind::Float(n) => Atom::Float(n), + TokenKind::Integer(n) => Atom::Integer(n), + TokenKind::Keyword(k) => Atom::Keyword(k), + TokenKind::String(s) => Atom::String(s), + TokenKind::Symbol(s) => Atom::Symbol(s), + TokenKind::Nil => Atom::Nil, + _ => { + return Err(ParserError::new( + ParserErrorKind::UnexpectedState, + token.span, + )) + } + }; + + Ok(kind.into()) + } +} + +#[cfg(not(tarpaulin_include))] +impl std::fmt::Display for NodeKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use NodeKind::*; + + match self { + Atom(atom) => write!(f, "{atom}"), + List(_) => write!(f, "LIST"), + Map(_) => write!(f, "MAP"), + Set(_) => write!(f, "SET"), + Vector(_) => write!(f, "VECTOR"), + } + } +} + +/// A node in the Abstract Syntax Tree (AST). +/// +/// `Nodes`s contain the kind of node which was found, as well as a [Span] +/// specifying the [Source] and [Location] of the node. +/// +/// [Source]: crate::span::Source +/// [Location]: crate::span::Location #[derive(Debug, Clone, PartialEq)] pub struct Node { - /// The type of node. + /// The kind of node. pub kind: NodeKind, /// The span in which the node occurs. pub span: Span, @@ -63,15 +129,24 @@ impl Node { /// Push a child node onto a list node. pub fn push_node(&mut self, child: Self) -> Result<(), ParserError> { match &mut self.kind { - NodeKind::List(c) | NodeKind::Vector(c) => { - c.push(child); + NodeKind::List(vec) + | NodeKind::Map(vec) + | NodeKind::Set(vec) + | NodeKind::Vector(vec) => { + vec.push(child); + } + _ => { + return Err(ParserError::new( + ParserErrorKind::UnexpectedState, + child.span, + )) } - _ => return Err(ParserError::new(ParserErrorKind::Unreachable, child.span)), } Ok(()) } + #[cfg(not(tarpaulin_include))] fn display(&self, indent: usize) -> String { let mut text = format!( "{}{}@{}..{}\n", @@ -82,48 +157,35 @@ impl Node { ); match &self.kind { - NodeKind::List(vec) | NodeKind::Vector(vec) => { + NodeKind::Atom(_) => {} + NodeKind::List(vec) + | NodeKind::Map(vec) + | NodeKind::Set(vec) + | NodeKind::Vector(vec) => { for node in vec { - text.push_str(&node.display(indent + 1)); + text.push_str(&format!("{}\n", node.display(indent + 1))); } } - _ => {} } text.trim_end().to_string() } } -impl std::fmt::Display for Node { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.display(0)) - } -} - impl TryFrom for Node { type Error = ParserError; fn try_from(token: Token) -> Result { - let kind = match token.kind { - TokenKind::Bool(b) => NodeKind::Bool(b), - TokenKind::Char(c) => NodeKind::Char(c), - TokenKind::Number(n) => NodeKind::Number(n), - TokenKind::String(s) => NodeKind::String(s), - TokenKind::Keyword(k) => NodeKind::Keyword(k), - TokenKind::Symbol(s) => NodeKind::Symbol(s), - TokenKind::Nil => NodeKind::Nil, - TokenKind::OpenParen - | TokenKind::CloseParen - | TokenKind::OpenBrace - | TokenKind::CloseBrace - | TokenKind::OpenBracket - | TokenKind::CloseBracket - | TokenKind::LineComment(_) - | TokenKind::BlockComment(_) => { - return Err(ParserError::new(ParserErrorKind::Unreachable, token.span)) - } - }; + let span = token.span.clone(); + let kind = NodeKind::try_from(token)?; - Ok(Self::new(kind, token.span)) + Ok(Self::new(kind, span)) + } +} + +#[cfg(not(tarpaulin_include))] +impl std::fmt::Display for Node { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.display(0)) } }