Add the initial implementation of the parser

This commit is contained in:
Jesse Braham 2024-12-01 15:03:22 +01:00
parent cd76ceaa77
commit de78b9840a
5 changed files with 403 additions and 0 deletions

View File

@ -3,4 +3,5 @@
#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)] #![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)]
mod lexer; mod lexer;
mod parser;
mod span; mod span;

View File

@ -0,0 +1,57 @@
use crate::{lexer::LexerError, span::Span};
/// Errors that can occur during parsing.
#[derive(Debug, Clone, PartialEq)]
pub enum ParserErrorKind {
/// An error which ocurred during lexical analysis.
Lexer(LexerError),
/// An unexpecting closing parenthesis/bracket was encountered.
UnexpectedCloseBracket,
/// Opening parenthesis/bracket does not have a matching closing
/// parenthesis/bracket.
UnclosedBracket,
/// An unmatched parenthesis/bracket was encountered.
UnmatchedBracket,
/// Unexpected parser state reached.
Unreachable,
}
/// Parser error, with a start and end location.
#[derive(Debug, Clone, PartialEq)]
pub struct ParserError {
/// The type of error encountered.
pub kind: ParserErrorKind,
/// The span in which the error occurred.
pub span: Span,
}
impl ParserError {
/// Construct a new instance of `ParserErorr`.
#[must_use]
pub const fn new(kind: ParserErrorKind, span: Span) -> Self {
Self { kind, span }
}
}
impl std::error::Error for ParserError {}
impl std::fmt::Display for ParserError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use ParserErrorKind::*;
match &self.kind {
Lexer(err) => write!(f, "{err}"),
UnexpectedCloseBracket => write!(f, "Unexpected closing bracket"),
UnclosedBracket => write!(f, "Unclosed parenthesis"),
UnmatchedBracket => write!(f, "Unmatched bracket"),
Unreachable => write!(f, "Unexpected parsing state reached"),
}
}
}
impl From<LexerError> for ParserError {
fn from(err: LexerError) -> Self {
let span = err.span.clone();
Self::new(ParserErrorKind::Lexer(err), span)
}
}

212
onihime/src/parser/mod.rs Normal file
View File

@ -0,0 +1,212 @@
pub use self::{
error::{ParserError, ParserErrorKind},
node::{Node, NodeKind},
};
use crate::lexer::{Lexer, TokenKind};
mod error;
mod node;
/// A parser for the AST.
#[derive(Debug)]
pub struct Parser<'parser> {
lexer: Lexer<'parser>,
}
impl<'parser> Parser<'parser> {
/// Create a new parser instance from a string.
#[must_use]
pub fn new(input: &'parser str) -> Self {
Self {
lexer: Lexer::new(input),
}
}
/// Set the name of the lexer's source.
pub fn set_name(&mut self, name: String) {
self.lexer.set_name(name);
}
/// Parse the input string into an AST.
pub fn parse(&mut self) -> Result<Vec<Node>, ParserError> {
let mut parents = Vec::new();
let mut cur_node = Node::new(NodeKind::List(Vec::new()), self.lexer.span());
while let Some(token) = self.lexer.read()? {
match token.kind {
TokenKind::BlockComment(_) | TokenKind::LineComment(_) => {}
TokenKind::OpenParen => {
let child = Node::new(NodeKind::List(Vec::new()), token.span);
parents.push(cur_node);
cur_node = child;
}
TokenKind::CloseParen => {
let mut parent = parents.pop().ok_or_else(|| {
ParserError::new(
ParserErrorKind::UnexpectedCloseBracket,
token.span.clone(),
)
})?;
cur_node.span.extend(&token.span);
if !matches!(cur_node.kind, NodeKind::List(_)) {
return Err(ParserError::new(
ParserErrorKind::UnmatchedBracket,
token.span,
));
}
parent.push_node(cur_node)?;
cur_node = parent;
}
TokenKind::OpenBracket => {
let child = Node::new(NodeKind::Vector(Vec::new()), token.span);
parents.push(cur_node);
cur_node = child;
}
TokenKind::CloseBracket => {
let mut parent = parents.pop().ok_or_else(|| {
ParserError::new(
ParserErrorKind::UnexpectedCloseBracket,
token.span.clone(),
)
})?;
cur_node.span.extend(&token.span);
if !matches!(cur_node.kind, NodeKind::Vector(_)) {
return Err(ParserError::new(
ParserErrorKind::UnmatchedBracket,
token.span,
));
}
parent.push_node(cur_node)?;
cur_node = parent;
}
_ => cur_node.push_node(Node::try_from(token)?)?,
}
}
if !parents.is_empty() {
return Err(ParserError::new(
ParserErrorKind::UnclosedBracket,
cur_node.span,
));
}
if let NodeKind::List(body) = cur_node.kind {
Ok(body)
} else {
Err(ParserError::new(
ParserErrorKind::Unreachable,
cur_node.span,
))
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
lexer::{LexerError, LexerErrorKind, Symbol},
span::Span,
};
macro_rules! test {
( $name:ident: $input:literal, $src:ident => $ast:expr ) => {
#[test]
fn $name() {
let mut parser = Parser::new($input);
let $src = parser.lexer.source();
assert_eq!(parser.parse(), $ast);
}
};
}
test!(parse_list: "(+ 1 2)", src => Ok(vec![
Node::new(
NodeKind::List(vec![
Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(1..2, src.clone())),
Node::new(NodeKind::Number(1.0), Span::new(3..4, src.clone())),
Node::new(NodeKind::Number(2.0), Span::new(5..6, src.clone())),
]),
Span::new(0..7, src)
)
]));
test!(parse_nested_list: "(+ 2.5 64 (* 2 3))", src => Ok(vec![Node::new(
NodeKind::List(vec![
Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(1..2, src.clone())),
Node::new(NodeKind::Number(2.5), Span::new(3..6, src.clone())),
Node::new(NodeKind::Number(64.0), Span::new(7..9, src.clone())),
Node::new(
NodeKind::List(vec![
Node::new(NodeKind::Symbol(Symbol::from("*")), Span::new(11..12, src.clone())),
Node::new(NodeKind::Number(2.0), Span::new(13..14, src.clone())),
Node::new(NodeKind::Number(3.0), Span::new(15..16, src.clone())),
]),
Span::new(10..17, src.clone())
),
]),
Span::new(0..18, src)
)]));
test!(parse_multiple_expressions: "(/ 6 3 (+ 1 2)) (* 2 5)\n(- 10 5)", src => Ok(vec![
Node::new(
NodeKind::List(vec![
Node::new(NodeKind::Symbol(Symbol::from("/")), Span::new(1..2, src.clone())),
Node::new(NodeKind::Number(6.0), Span::new(3..4, src.clone())),
Node::new(NodeKind::Number(3.0), Span::new(5..6, src.clone())),
Node::new(
NodeKind::List(vec![
Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(8..9, src.clone())),
Node::new(NodeKind::Number(1.0), Span::new(10..11, src.clone())),
Node::new(NodeKind::Number(2.0), Span::new(12..13, src.clone())),
]),
Span::new(7..14, src.clone())
),
]),
Span::new(0..15, src.clone())
),
Node::new(
NodeKind::List(vec![
Node::new(NodeKind::Symbol(Symbol::from("*")), Span::new(17..18, src.clone())),
Node::new(NodeKind::Number(2.0), Span::new(19..20, src.clone())),
Node::new(NodeKind::Number(5.0), Span::new(21..22, src.clone())),
]),
Span::new(16..23, src.clone())
),
Node::new(
NodeKind::List(vec![
Node::new(NodeKind::Symbol(Symbol::from("-")), Span::new(25..26, src.clone())),
Node::new(NodeKind::Number(10.0), Span::new(27..29, src.clone())),
Node::new(NodeKind::Number(5.0), Span::new(30..31, src.clone())),
]),
Span::new(24..32, src)
),
]));
test!(parse_float: "(2.500000)", src => Ok(vec![Node::new(
NodeKind::List(vec![Node::new(NodeKind::Number(2.5), Span::new(1..9, src.clone()))]),
Span::new(0..10, src)
)]));
test!(parse_empty: "", _src => Ok(vec![]));
test!(error_invalid_number: "(+ 1.2.3)", src => Err(ParserError::new(
ParserErrorKind::Lexer(LexerError::new(
LexerErrorKind::InvalidNumber("1.2.3".into()),
Span::new(3..8, src.clone())
)),
Span::new(3..8, src)
)));
test!(error_unexpected_close_paren: ")", src => Err(ParserError::new(
ParserErrorKind::UnexpectedCloseBracket,
Span::new(0..1, src)
)));
}

129
onihime/src/parser/node.rs Normal file
View File

@ -0,0 +1,129 @@
use super::error::{ParserError, ParserErrorKind};
use crate::{
lexer::{Symbol, Token, TokenKind},
span::Span,
};
/// The type of a node in the AST.
#[derive(Debug, Clone, PartialEq)]
pub enum NodeKind {
/// Bool.
Bool(bool),
/// Character.
Char(char),
/// Keyword.
Keyword(Symbol),
/// Number.
Number(f64),
/// String.
String(String),
/// Symbol.
Symbol(Symbol),
/// Nil.
Nil,
/// List.
List(Vec<Node>),
/// Vector.
Vector(Vec<Node>),
}
impl std::fmt::Display for NodeKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
NodeKind::Bool(_) => write!(f, "BOOL"),
NodeKind::Char(_) => write!(f, "CHAR"),
NodeKind::Keyword(_) => write!(f, "KEYWORD"),
NodeKind::Number(_) => write!(f, "NUMBER"),
NodeKind::String(_) => write!(f, "STRING"),
NodeKind::Symbol(_) => write!(f, "SYMBOL"),
NodeKind::Nil => write!(f, "NIL"),
NodeKind::List(_) => write!(f, "LIST"),
NodeKind::Vector(_) => write!(f, "VECTOR"),
}
}
}
/// A node in the AST with a start and end location.
#[derive(Debug, Clone, PartialEq)]
pub struct Node {
/// The type of node.
pub kind: NodeKind,
/// The span in which the node occurs.
pub span: Span,
}
impl Node {
/// Construct a new instance of `Node`.
#[must_use]
pub const fn new(kind: NodeKind, span: Span) -> Self {
Self { kind, span }
}
/// Push a child node onto a list node.
pub fn push_node(&mut self, child: Self) -> Result<(), ParserError> {
match &mut self.kind {
NodeKind::List(c) | NodeKind::Vector(c) => {
c.push(child);
}
_ => return Err(ParserError::new(ParserErrorKind::Unreachable, child.span)),
}
Ok(())
}
fn display(&self, indent: usize) -> String {
let mut text = format!(
"{}{}@{}..{}\n",
" ".repeat(indent),
self.kind,
self.span.bytes().start,
self.span.bytes().end
);
match &self.kind {
NodeKind::List(vec) | NodeKind::Vector(vec) => {
for node in vec {
text.push_str(&node.display(indent + 1));
}
}
_ => {}
}
text.trim_end().to_string()
}
}
impl std::fmt::Display for Node {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.display(0))
}
}
impl TryFrom<Token> for Node {
type Error = ParserError;
fn try_from(token: Token) -> Result<Self, Self::Error> {
let kind = match token.kind {
TokenKind::Bool(b) => NodeKind::Bool(b),
TokenKind::Char(c) => NodeKind::Char(c),
TokenKind::Number(n) => NodeKind::Number(n),
TokenKind::String(s) => NodeKind::String(s),
TokenKind::Keyword(k) => NodeKind::Keyword(k),
TokenKind::Symbol(s) => NodeKind::Symbol(s),
TokenKind::Nil => NodeKind::Nil,
TokenKind::OpenParen
| TokenKind::CloseParen
| TokenKind::OpenBrace
| TokenKind::CloseBrace
| TokenKind::OpenBracket
| TokenKind::CloseBracket
| TokenKind::LineComment(_)
| TokenKind::BlockComment(_) => {
return Err(ParserError::new(ParserErrorKind::Unreachable, token.span))
}
};
Ok(Self::new(kind, token.span))
}
}

View File

@ -132,6 +132,10 @@ impl Span {
pub fn same_source(&self, other: &Self) -> bool { pub fn same_source(&self, other: &Self) -> bool {
Arc::ptr_eq(&self.source, &other.source) Arc::ptr_eq(&self.source, &other.source)
} }
pub(crate) fn bytes(&self) -> &Range<usize> {
&self.bytes
}
} }
impl PartialEq for Span { impl PartialEq for Span {