Add the initial implementation of the parser
This commit is contained in:
parent
cd76ceaa77
commit
de78b9840a
@ -3,4 +3,5 @@
|
||||
#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)]
|
||||
|
||||
mod lexer;
|
||||
mod parser;
|
||||
mod span;
|
||||
|
57
onihime/src/parser/error.rs
Normal file
57
onihime/src/parser/error.rs
Normal file
@ -0,0 +1,57 @@
|
||||
use crate::{lexer::LexerError, span::Span};
|
||||
|
||||
/// Errors that can occur during parsing.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum ParserErrorKind {
|
||||
/// An error which ocurred during lexical analysis.
|
||||
Lexer(LexerError),
|
||||
/// An unexpecting closing parenthesis/bracket was encountered.
|
||||
UnexpectedCloseBracket,
|
||||
/// Opening parenthesis/bracket does not have a matching closing
|
||||
/// parenthesis/bracket.
|
||||
UnclosedBracket,
|
||||
/// An unmatched parenthesis/bracket was encountered.
|
||||
UnmatchedBracket,
|
||||
/// Unexpected parser state reached.
|
||||
Unreachable,
|
||||
}
|
||||
|
||||
/// Parser error, with a start and end location.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct ParserError {
|
||||
/// The type of error encountered.
|
||||
pub kind: ParserErrorKind,
|
||||
/// The span in which the error occurred.
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl ParserError {
|
||||
/// Construct a new instance of `ParserErorr`.
|
||||
#[must_use]
|
||||
pub const fn new(kind: ParserErrorKind, span: Span) -> Self {
|
||||
Self { kind, span }
|
||||
}
|
||||
}
|
||||
|
||||
impl std::error::Error for ParserError {}
|
||||
|
||||
impl std::fmt::Display for ParserError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
use ParserErrorKind::*;
|
||||
|
||||
match &self.kind {
|
||||
Lexer(err) => write!(f, "{err}"),
|
||||
UnexpectedCloseBracket => write!(f, "Unexpected closing bracket"),
|
||||
UnclosedBracket => write!(f, "Unclosed parenthesis"),
|
||||
UnmatchedBracket => write!(f, "Unmatched bracket"),
|
||||
Unreachable => write!(f, "Unexpected parsing state reached"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<LexerError> for ParserError {
|
||||
fn from(err: LexerError) -> Self {
|
||||
let span = err.span.clone();
|
||||
Self::new(ParserErrorKind::Lexer(err), span)
|
||||
}
|
||||
}
|
212
onihime/src/parser/mod.rs
Normal file
212
onihime/src/parser/mod.rs
Normal file
@ -0,0 +1,212 @@
|
||||
pub use self::{
|
||||
error::{ParserError, ParserErrorKind},
|
||||
node::{Node, NodeKind},
|
||||
};
|
||||
use crate::lexer::{Lexer, TokenKind};
|
||||
|
||||
mod error;
|
||||
mod node;
|
||||
|
||||
/// A parser for the AST.
|
||||
#[derive(Debug)]
|
||||
pub struct Parser<'parser> {
|
||||
lexer: Lexer<'parser>,
|
||||
}
|
||||
|
||||
impl<'parser> Parser<'parser> {
|
||||
/// Create a new parser instance from a string.
|
||||
#[must_use]
|
||||
pub fn new(input: &'parser str) -> Self {
|
||||
Self {
|
||||
lexer: Lexer::new(input),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the name of the lexer's source.
|
||||
pub fn set_name(&mut self, name: String) {
|
||||
self.lexer.set_name(name);
|
||||
}
|
||||
|
||||
/// Parse the input string into an AST.
|
||||
pub fn parse(&mut self) -> Result<Vec<Node>, ParserError> {
|
||||
let mut parents = Vec::new();
|
||||
let mut cur_node = Node::new(NodeKind::List(Vec::new()), self.lexer.span());
|
||||
|
||||
while let Some(token) = self.lexer.read()? {
|
||||
match token.kind {
|
||||
TokenKind::BlockComment(_) | TokenKind::LineComment(_) => {}
|
||||
TokenKind::OpenParen => {
|
||||
let child = Node::new(NodeKind::List(Vec::new()), token.span);
|
||||
parents.push(cur_node);
|
||||
cur_node = child;
|
||||
}
|
||||
TokenKind::CloseParen => {
|
||||
let mut parent = parents.pop().ok_or_else(|| {
|
||||
ParserError::new(
|
||||
ParserErrorKind::UnexpectedCloseBracket,
|
||||
token.span.clone(),
|
||||
)
|
||||
})?;
|
||||
|
||||
cur_node.span.extend(&token.span);
|
||||
|
||||
if !matches!(cur_node.kind, NodeKind::List(_)) {
|
||||
return Err(ParserError::new(
|
||||
ParserErrorKind::UnmatchedBracket,
|
||||
token.span,
|
||||
));
|
||||
}
|
||||
|
||||
parent.push_node(cur_node)?;
|
||||
cur_node = parent;
|
||||
}
|
||||
TokenKind::OpenBracket => {
|
||||
let child = Node::new(NodeKind::Vector(Vec::new()), token.span);
|
||||
parents.push(cur_node);
|
||||
cur_node = child;
|
||||
}
|
||||
TokenKind::CloseBracket => {
|
||||
let mut parent = parents.pop().ok_or_else(|| {
|
||||
ParserError::new(
|
||||
ParserErrorKind::UnexpectedCloseBracket,
|
||||
token.span.clone(),
|
||||
)
|
||||
})?;
|
||||
|
||||
cur_node.span.extend(&token.span);
|
||||
|
||||
if !matches!(cur_node.kind, NodeKind::Vector(_)) {
|
||||
return Err(ParserError::new(
|
||||
ParserErrorKind::UnmatchedBracket,
|
||||
token.span,
|
||||
));
|
||||
}
|
||||
|
||||
parent.push_node(cur_node)?;
|
||||
cur_node = parent;
|
||||
}
|
||||
|
||||
_ => cur_node.push_node(Node::try_from(token)?)?,
|
||||
}
|
||||
}
|
||||
|
||||
if !parents.is_empty() {
|
||||
return Err(ParserError::new(
|
||||
ParserErrorKind::UnclosedBracket,
|
||||
cur_node.span,
|
||||
));
|
||||
}
|
||||
|
||||
if let NodeKind::List(body) = cur_node.kind {
|
||||
Ok(body)
|
||||
} else {
|
||||
Err(ParserError::new(
|
||||
ParserErrorKind::Unreachable,
|
||||
cur_node.span,
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::{
|
||||
lexer::{LexerError, LexerErrorKind, Symbol},
|
||||
span::Span,
|
||||
};
|
||||
|
||||
macro_rules! test {
|
||||
( $name:ident: $input:literal, $src:ident => $ast:expr ) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut parser = Parser::new($input);
|
||||
let $src = parser.lexer.source();
|
||||
assert_eq!(parser.parse(), $ast);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test!(parse_list: "(+ 1 2)", src => Ok(vec![
|
||||
Node::new(
|
||||
NodeKind::List(vec![
|
||||
Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(1..2, src.clone())),
|
||||
Node::new(NodeKind::Number(1.0), Span::new(3..4, src.clone())),
|
||||
Node::new(NodeKind::Number(2.0), Span::new(5..6, src.clone())),
|
||||
]),
|
||||
Span::new(0..7, src)
|
||||
)
|
||||
]));
|
||||
|
||||
test!(parse_nested_list: "(+ 2.5 64 (* 2 3))", src => Ok(vec![Node::new(
|
||||
NodeKind::List(vec![
|
||||
Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(1..2, src.clone())),
|
||||
Node::new(NodeKind::Number(2.5), Span::new(3..6, src.clone())),
|
||||
Node::new(NodeKind::Number(64.0), Span::new(7..9, src.clone())),
|
||||
Node::new(
|
||||
NodeKind::List(vec![
|
||||
Node::new(NodeKind::Symbol(Symbol::from("*")), Span::new(11..12, src.clone())),
|
||||
Node::new(NodeKind::Number(2.0), Span::new(13..14, src.clone())),
|
||||
Node::new(NodeKind::Number(3.0), Span::new(15..16, src.clone())),
|
||||
]),
|
||||
Span::new(10..17, src.clone())
|
||||
),
|
||||
]),
|
||||
Span::new(0..18, src)
|
||||
)]));
|
||||
|
||||
test!(parse_multiple_expressions: "(/ 6 3 (+ 1 2)) (* 2 5)\n(- 10 5)", src => Ok(vec![
|
||||
Node::new(
|
||||
NodeKind::List(vec![
|
||||
Node::new(NodeKind::Symbol(Symbol::from("/")), Span::new(1..2, src.clone())),
|
||||
Node::new(NodeKind::Number(6.0), Span::new(3..4, src.clone())),
|
||||
Node::new(NodeKind::Number(3.0), Span::new(5..6, src.clone())),
|
||||
Node::new(
|
||||
NodeKind::List(vec![
|
||||
Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(8..9, src.clone())),
|
||||
Node::new(NodeKind::Number(1.0), Span::new(10..11, src.clone())),
|
||||
Node::new(NodeKind::Number(2.0), Span::new(12..13, src.clone())),
|
||||
]),
|
||||
Span::new(7..14, src.clone())
|
||||
),
|
||||
]),
|
||||
Span::new(0..15, src.clone())
|
||||
),
|
||||
Node::new(
|
||||
NodeKind::List(vec![
|
||||
Node::new(NodeKind::Symbol(Symbol::from("*")), Span::new(17..18, src.clone())),
|
||||
Node::new(NodeKind::Number(2.0), Span::new(19..20, src.clone())),
|
||||
Node::new(NodeKind::Number(5.0), Span::new(21..22, src.clone())),
|
||||
]),
|
||||
Span::new(16..23, src.clone())
|
||||
),
|
||||
Node::new(
|
||||
NodeKind::List(vec![
|
||||
Node::new(NodeKind::Symbol(Symbol::from("-")), Span::new(25..26, src.clone())),
|
||||
Node::new(NodeKind::Number(10.0), Span::new(27..29, src.clone())),
|
||||
Node::new(NodeKind::Number(5.0), Span::new(30..31, src.clone())),
|
||||
]),
|
||||
Span::new(24..32, src)
|
||||
),
|
||||
]));
|
||||
|
||||
test!(parse_float: "(2.500000)", src => Ok(vec![Node::new(
|
||||
NodeKind::List(vec![Node::new(NodeKind::Number(2.5), Span::new(1..9, src.clone()))]),
|
||||
Span::new(0..10, src)
|
||||
)]));
|
||||
|
||||
test!(parse_empty: "", _src => Ok(vec![]));
|
||||
|
||||
test!(error_invalid_number: "(+ 1.2.3)", src => Err(ParserError::new(
|
||||
ParserErrorKind::Lexer(LexerError::new(
|
||||
LexerErrorKind::InvalidNumber("1.2.3".into()),
|
||||
Span::new(3..8, src.clone())
|
||||
)),
|
||||
Span::new(3..8, src)
|
||||
)));
|
||||
|
||||
test!(error_unexpected_close_paren: ")", src => Err(ParserError::new(
|
||||
ParserErrorKind::UnexpectedCloseBracket,
|
||||
Span::new(0..1, src)
|
||||
)));
|
||||
}
|
129
onihime/src/parser/node.rs
Normal file
129
onihime/src/parser/node.rs
Normal file
@ -0,0 +1,129 @@
|
||||
use super::error::{ParserError, ParserErrorKind};
|
||||
use crate::{
|
||||
lexer::{Symbol, Token, TokenKind},
|
||||
span::Span,
|
||||
};
|
||||
|
||||
/// The type of a node in the AST.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum NodeKind {
|
||||
/// Bool.
|
||||
Bool(bool),
|
||||
/// Character.
|
||||
Char(char),
|
||||
/// Keyword.
|
||||
Keyword(Symbol),
|
||||
/// Number.
|
||||
Number(f64),
|
||||
/// String.
|
||||
String(String),
|
||||
/// Symbol.
|
||||
Symbol(Symbol),
|
||||
/// Nil.
|
||||
Nil,
|
||||
|
||||
/// List.
|
||||
List(Vec<Node>),
|
||||
/// Vector.
|
||||
Vector(Vec<Node>),
|
||||
}
|
||||
|
||||
impl std::fmt::Display for NodeKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
NodeKind::Bool(_) => write!(f, "BOOL"),
|
||||
NodeKind::Char(_) => write!(f, "CHAR"),
|
||||
NodeKind::Keyword(_) => write!(f, "KEYWORD"),
|
||||
NodeKind::Number(_) => write!(f, "NUMBER"),
|
||||
NodeKind::String(_) => write!(f, "STRING"),
|
||||
NodeKind::Symbol(_) => write!(f, "SYMBOL"),
|
||||
NodeKind::Nil => write!(f, "NIL"),
|
||||
NodeKind::List(_) => write!(f, "LIST"),
|
||||
NodeKind::Vector(_) => write!(f, "VECTOR"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A node in the AST with a start and end location.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Node {
|
||||
/// The type of node.
|
||||
pub kind: NodeKind,
|
||||
/// The span in which the node occurs.
|
||||
pub span: Span,
|
||||
}
|
||||
|
||||
impl Node {
|
||||
/// Construct a new instance of `Node`.
|
||||
#[must_use]
|
||||
pub const fn new(kind: NodeKind, span: Span) -> Self {
|
||||
Self { kind, span }
|
||||
}
|
||||
|
||||
/// Push a child node onto a list node.
|
||||
pub fn push_node(&mut self, child: Self) -> Result<(), ParserError> {
|
||||
match &mut self.kind {
|
||||
NodeKind::List(c) | NodeKind::Vector(c) => {
|
||||
c.push(child);
|
||||
}
|
||||
_ => return Err(ParserError::new(ParserErrorKind::Unreachable, child.span)),
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn display(&self, indent: usize) -> String {
|
||||
let mut text = format!(
|
||||
"{}{}@{}..{}\n",
|
||||
" ".repeat(indent),
|
||||
self.kind,
|
||||
self.span.bytes().start,
|
||||
self.span.bytes().end
|
||||
);
|
||||
|
||||
match &self.kind {
|
||||
NodeKind::List(vec) | NodeKind::Vector(vec) => {
|
||||
for node in vec {
|
||||
text.push_str(&node.display(indent + 1));
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
text.trim_end().to_string()
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Node {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
write!(f, "{}", self.display(0))
|
||||
}
|
||||
}
|
||||
|
||||
impl TryFrom<Token> for Node {
|
||||
type Error = ParserError;
|
||||
|
||||
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
||||
let kind = match token.kind {
|
||||
TokenKind::Bool(b) => NodeKind::Bool(b),
|
||||
TokenKind::Char(c) => NodeKind::Char(c),
|
||||
TokenKind::Number(n) => NodeKind::Number(n),
|
||||
TokenKind::String(s) => NodeKind::String(s),
|
||||
TokenKind::Keyword(k) => NodeKind::Keyword(k),
|
||||
TokenKind::Symbol(s) => NodeKind::Symbol(s),
|
||||
TokenKind::Nil => NodeKind::Nil,
|
||||
TokenKind::OpenParen
|
||||
| TokenKind::CloseParen
|
||||
| TokenKind::OpenBrace
|
||||
| TokenKind::CloseBrace
|
||||
| TokenKind::OpenBracket
|
||||
| TokenKind::CloseBracket
|
||||
| TokenKind::LineComment(_)
|
||||
| TokenKind::BlockComment(_) => {
|
||||
return Err(ParserError::new(ParserErrorKind::Unreachable, token.span))
|
||||
}
|
||||
};
|
||||
|
||||
Ok(Self::new(kind, token.span))
|
||||
}
|
||||
}
|
@ -132,6 +132,10 @@ impl Span {
|
||||
pub fn same_source(&self, other: &Self) -> bool {
|
||||
Arc::ptr_eq(&self.source, &other.source)
|
||||
}
|
||||
|
||||
pub(crate) fn bytes(&self) -> &Range<usize> {
|
||||
&self.bytes
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Span {
|
||||
|
Loading…
Reference in New Issue
Block a user