Add the initial implementation of the parser
This commit is contained in:
parent
cd76ceaa77
commit
de78b9840a
@ -3,4 +3,5 @@
|
|||||||
#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)]
|
#![deny(missing_debug_implementations, missing_docs, rust_2018_idioms)]
|
||||||
|
|
||||||
mod lexer;
|
mod lexer;
|
||||||
|
mod parser;
|
||||||
mod span;
|
mod span;
|
||||||
|
57
onihime/src/parser/error.rs
Normal file
57
onihime/src/parser/error.rs
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
use crate::{lexer::LexerError, span::Span};
|
||||||
|
|
||||||
|
/// Errors that can occur during parsing.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum ParserErrorKind {
|
||||||
|
/// An error which ocurred during lexical analysis.
|
||||||
|
Lexer(LexerError),
|
||||||
|
/// An unexpecting closing parenthesis/bracket was encountered.
|
||||||
|
UnexpectedCloseBracket,
|
||||||
|
/// Opening parenthesis/bracket does not have a matching closing
|
||||||
|
/// parenthesis/bracket.
|
||||||
|
UnclosedBracket,
|
||||||
|
/// An unmatched parenthesis/bracket was encountered.
|
||||||
|
UnmatchedBracket,
|
||||||
|
/// Unexpected parser state reached.
|
||||||
|
Unreachable,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parser error, with a start and end location.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct ParserError {
|
||||||
|
/// The type of error encountered.
|
||||||
|
pub kind: ParserErrorKind,
|
||||||
|
/// The span in which the error occurred.
|
||||||
|
pub span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParserError {
|
||||||
|
/// Construct a new instance of `ParserErorr`.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn new(kind: ParserErrorKind, span: Span) -> Self {
|
||||||
|
Self { kind, span }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::error::Error for ParserError {}
|
||||||
|
|
||||||
|
impl std::fmt::Display for ParserError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
use ParserErrorKind::*;
|
||||||
|
|
||||||
|
match &self.kind {
|
||||||
|
Lexer(err) => write!(f, "{err}"),
|
||||||
|
UnexpectedCloseBracket => write!(f, "Unexpected closing bracket"),
|
||||||
|
UnclosedBracket => write!(f, "Unclosed parenthesis"),
|
||||||
|
UnmatchedBracket => write!(f, "Unmatched bracket"),
|
||||||
|
Unreachable => write!(f, "Unexpected parsing state reached"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<LexerError> for ParserError {
|
||||||
|
fn from(err: LexerError) -> Self {
|
||||||
|
let span = err.span.clone();
|
||||||
|
Self::new(ParserErrorKind::Lexer(err), span)
|
||||||
|
}
|
||||||
|
}
|
212
onihime/src/parser/mod.rs
Normal file
212
onihime/src/parser/mod.rs
Normal file
@ -0,0 +1,212 @@
|
|||||||
|
pub use self::{
|
||||||
|
error::{ParserError, ParserErrorKind},
|
||||||
|
node::{Node, NodeKind},
|
||||||
|
};
|
||||||
|
use crate::lexer::{Lexer, TokenKind};
|
||||||
|
|
||||||
|
mod error;
|
||||||
|
mod node;
|
||||||
|
|
||||||
|
/// A parser for the AST.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Parser<'parser> {
|
||||||
|
lexer: Lexer<'parser>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'parser> Parser<'parser> {
|
||||||
|
/// Create a new parser instance from a string.
|
||||||
|
#[must_use]
|
||||||
|
pub fn new(input: &'parser str) -> Self {
|
||||||
|
Self {
|
||||||
|
lexer: Lexer::new(input),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the name of the lexer's source.
|
||||||
|
pub fn set_name(&mut self, name: String) {
|
||||||
|
self.lexer.set_name(name);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse the input string into an AST.
|
||||||
|
pub fn parse(&mut self) -> Result<Vec<Node>, ParserError> {
|
||||||
|
let mut parents = Vec::new();
|
||||||
|
let mut cur_node = Node::new(NodeKind::List(Vec::new()), self.lexer.span());
|
||||||
|
|
||||||
|
while let Some(token) = self.lexer.read()? {
|
||||||
|
match token.kind {
|
||||||
|
TokenKind::BlockComment(_) | TokenKind::LineComment(_) => {}
|
||||||
|
TokenKind::OpenParen => {
|
||||||
|
let child = Node::new(NodeKind::List(Vec::new()), token.span);
|
||||||
|
parents.push(cur_node);
|
||||||
|
cur_node = child;
|
||||||
|
}
|
||||||
|
TokenKind::CloseParen => {
|
||||||
|
let mut parent = parents.pop().ok_or_else(|| {
|
||||||
|
ParserError::new(
|
||||||
|
ParserErrorKind::UnexpectedCloseBracket,
|
||||||
|
token.span.clone(),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
cur_node.span.extend(&token.span);
|
||||||
|
|
||||||
|
if !matches!(cur_node.kind, NodeKind::List(_)) {
|
||||||
|
return Err(ParserError::new(
|
||||||
|
ParserErrorKind::UnmatchedBracket,
|
||||||
|
token.span,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
parent.push_node(cur_node)?;
|
||||||
|
cur_node = parent;
|
||||||
|
}
|
||||||
|
TokenKind::OpenBracket => {
|
||||||
|
let child = Node::new(NodeKind::Vector(Vec::new()), token.span);
|
||||||
|
parents.push(cur_node);
|
||||||
|
cur_node = child;
|
||||||
|
}
|
||||||
|
TokenKind::CloseBracket => {
|
||||||
|
let mut parent = parents.pop().ok_or_else(|| {
|
||||||
|
ParserError::new(
|
||||||
|
ParserErrorKind::UnexpectedCloseBracket,
|
||||||
|
token.span.clone(),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
cur_node.span.extend(&token.span);
|
||||||
|
|
||||||
|
if !matches!(cur_node.kind, NodeKind::Vector(_)) {
|
||||||
|
return Err(ParserError::new(
|
||||||
|
ParserErrorKind::UnmatchedBracket,
|
||||||
|
token.span,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
parent.push_node(cur_node)?;
|
||||||
|
cur_node = parent;
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => cur_node.push_node(Node::try_from(token)?)?,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !parents.is_empty() {
|
||||||
|
return Err(ParserError::new(
|
||||||
|
ParserErrorKind::UnclosedBracket,
|
||||||
|
cur_node.span,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
if let NodeKind::List(body) = cur_node.kind {
|
||||||
|
Ok(body)
|
||||||
|
} else {
|
||||||
|
Err(ParserError::new(
|
||||||
|
ParserErrorKind::Unreachable,
|
||||||
|
cur_node.span,
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use crate::{
|
||||||
|
lexer::{LexerError, LexerErrorKind, Symbol},
|
||||||
|
span::Span,
|
||||||
|
};
|
||||||
|
|
||||||
|
macro_rules! test {
|
||||||
|
( $name:ident: $input:literal, $src:ident => $ast:expr ) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let mut parser = Parser::new($input);
|
||||||
|
let $src = parser.lexer.source();
|
||||||
|
assert_eq!(parser.parse(), $ast);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
test!(parse_list: "(+ 1 2)", src => Ok(vec![
|
||||||
|
Node::new(
|
||||||
|
NodeKind::List(vec![
|
||||||
|
Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(1..2, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(1.0), Span::new(3..4, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(2.0), Span::new(5..6, src.clone())),
|
||||||
|
]),
|
||||||
|
Span::new(0..7, src)
|
||||||
|
)
|
||||||
|
]));
|
||||||
|
|
||||||
|
test!(parse_nested_list: "(+ 2.5 64 (* 2 3))", src => Ok(vec![Node::new(
|
||||||
|
NodeKind::List(vec![
|
||||||
|
Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(1..2, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(2.5), Span::new(3..6, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(64.0), Span::new(7..9, src.clone())),
|
||||||
|
Node::new(
|
||||||
|
NodeKind::List(vec![
|
||||||
|
Node::new(NodeKind::Symbol(Symbol::from("*")), Span::new(11..12, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(2.0), Span::new(13..14, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(3.0), Span::new(15..16, src.clone())),
|
||||||
|
]),
|
||||||
|
Span::new(10..17, src.clone())
|
||||||
|
),
|
||||||
|
]),
|
||||||
|
Span::new(0..18, src)
|
||||||
|
)]));
|
||||||
|
|
||||||
|
test!(parse_multiple_expressions: "(/ 6 3 (+ 1 2)) (* 2 5)\n(- 10 5)", src => Ok(vec![
|
||||||
|
Node::new(
|
||||||
|
NodeKind::List(vec![
|
||||||
|
Node::new(NodeKind::Symbol(Symbol::from("/")), Span::new(1..2, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(6.0), Span::new(3..4, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(3.0), Span::new(5..6, src.clone())),
|
||||||
|
Node::new(
|
||||||
|
NodeKind::List(vec![
|
||||||
|
Node::new(NodeKind::Symbol(Symbol::from("+")), Span::new(8..9, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(1.0), Span::new(10..11, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(2.0), Span::new(12..13, src.clone())),
|
||||||
|
]),
|
||||||
|
Span::new(7..14, src.clone())
|
||||||
|
),
|
||||||
|
]),
|
||||||
|
Span::new(0..15, src.clone())
|
||||||
|
),
|
||||||
|
Node::new(
|
||||||
|
NodeKind::List(vec![
|
||||||
|
Node::new(NodeKind::Symbol(Symbol::from("*")), Span::new(17..18, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(2.0), Span::new(19..20, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(5.0), Span::new(21..22, src.clone())),
|
||||||
|
]),
|
||||||
|
Span::new(16..23, src.clone())
|
||||||
|
),
|
||||||
|
Node::new(
|
||||||
|
NodeKind::List(vec![
|
||||||
|
Node::new(NodeKind::Symbol(Symbol::from("-")), Span::new(25..26, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(10.0), Span::new(27..29, src.clone())),
|
||||||
|
Node::new(NodeKind::Number(5.0), Span::new(30..31, src.clone())),
|
||||||
|
]),
|
||||||
|
Span::new(24..32, src)
|
||||||
|
),
|
||||||
|
]));
|
||||||
|
|
||||||
|
test!(parse_float: "(2.500000)", src => Ok(vec![Node::new(
|
||||||
|
NodeKind::List(vec![Node::new(NodeKind::Number(2.5), Span::new(1..9, src.clone()))]),
|
||||||
|
Span::new(0..10, src)
|
||||||
|
)]));
|
||||||
|
|
||||||
|
test!(parse_empty: "", _src => Ok(vec![]));
|
||||||
|
|
||||||
|
test!(error_invalid_number: "(+ 1.2.3)", src => Err(ParserError::new(
|
||||||
|
ParserErrorKind::Lexer(LexerError::new(
|
||||||
|
LexerErrorKind::InvalidNumber("1.2.3".into()),
|
||||||
|
Span::new(3..8, src.clone())
|
||||||
|
)),
|
||||||
|
Span::new(3..8, src)
|
||||||
|
)));
|
||||||
|
|
||||||
|
test!(error_unexpected_close_paren: ")", src => Err(ParserError::new(
|
||||||
|
ParserErrorKind::UnexpectedCloseBracket,
|
||||||
|
Span::new(0..1, src)
|
||||||
|
)));
|
||||||
|
}
|
129
onihime/src/parser/node.rs
Normal file
129
onihime/src/parser/node.rs
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
use super::error::{ParserError, ParserErrorKind};
|
||||||
|
use crate::{
|
||||||
|
lexer::{Symbol, Token, TokenKind},
|
||||||
|
span::Span,
|
||||||
|
};
|
||||||
|
|
||||||
|
/// The type of a node in the AST.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum NodeKind {
|
||||||
|
/// Bool.
|
||||||
|
Bool(bool),
|
||||||
|
/// Character.
|
||||||
|
Char(char),
|
||||||
|
/// Keyword.
|
||||||
|
Keyword(Symbol),
|
||||||
|
/// Number.
|
||||||
|
Number(f64),
|
||||||
|
/// String.
|
||||||
|
String(String),
|
||||||
|
/// Symbol.
|
||||||
|
Symbol(Symbol),
|
||||||
|
/// Nil.
|
||||||
|
Nil,
|
||||||
|
|
||||||
|
/// List.
|
||||||
|
List(Vec<Node>),
|
||||||
|
/// Vector.
|
||||||
|
Vector(Vec<Node>),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for NodeKind {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
NodeKind::Bool(_) => write!(f, "BOOL"),
|
||||||
|
NodeKind::Char(_) => write!(f, "CHAR"),
|
||||||
|
NodeKind::Keyword(_) => write!(f, "KEYWORD"),
|
||||||
|
NodeKind::Number(_) => write!(f, "NUMBER"),
|
||||||
|
NodeKind::String(_) => write!(f, "STRING"),
|
||||||
|
NodeKind::Symbol(_) => write!(f, "SYMBOL"),
|
||||||
|
NodeKind::Nil => write!(f, "NIL"),
|
||||||
|
NodeKind::List(_) => write!(f, "LIST"),
|
||||||
|
NodeKind::Vector(_) => write!(f, "VECTOR"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A node in the AST with a start and end location.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub struct Node {
|
||||||
|
/// The type of node.
|
||||||
|
pub kind: NodeKind,
|
||||||
|
/// The span in which the node occurs.
|
||||||
|
pub span: Span,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Node {
|
||||||
|
/// Construct a new instance of `Node`.
|
||||||
|
#[must_use]
|
||||||
|
pub const fn new(kind: NodeKind, span: Span) -> Self {
|
||||||
|
Self { kind, span }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Push a child node onto a list node.
|
||||||
|
pub fn push_node(&mut self, child: Self) -> Result<(), ParserError> {
|
||||||
|
match &mut self.kind {
|
||||||
|
NodeKind::List(c) | NodeKind::Vector(c) => {
|
||||||
|
c.push(child);
|
||||||
|
}
|
||||||
|
_ => return Err(ParserError::new(ParserErrorKind::Unreachable, child.span)),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn display(&self, indent: usize) -> String {
|
||||||
|
let mut text = format!(
|
||||||
|
"{}{}@{}..{}\n",
|
||||||
|
" ".repeat(indent),
|
||||||
|
self.kind,
|
||||||
|
self.span.bytes().start,
|
||||||
|
self.span.bytes().end
|
||||||
|
);
|
||||||
|
|
||||||
|
match &self.kind {
|
||||||
|
NodeKind::List(vec) | NodeKind::Vector(vec) => {
|
||||||
|
for node in vec {
|
||||||
|
text.push_str(&node.display(indent + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
|
||||||
|
text.trim_end().to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl std::fmt::Display for Node {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.display(0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TryFrom<Token> for Node {
|
||||||
|
type Error = ParserError;
|
||||||
|
|
||||||
|
fn try_from(token: Token) -> Result<Self, Self::Error> {
|
||||||
|
let kind = match token.kind {
|
||||||
|
TokenKind::Bool(b) => NodeKind::Bool(b),
|
||||||
|
TokenKind::Char(c) => NodeKind::Char(c),
|
||||||
|
TokenKind::Number(n) => NodeKind::Number(n),
|
||||||
|
TokenKind::String(s) => NodeKind::String(s),
|
||||||
|
TokenKind::Keyword(k) => NodeKind::Keyword(k),
|
||||||
|
TokenKind::Symbol(s) => NodeKind::Symbol(s),
|
||||||
|
TokenKind::Nil => NodeKind::Nil,
|
||||||
|
TokenKind::OpenParen
|
||||||
|
| TokenKind::CloseParen
|
||||||
|
| TokenKind::OpenBrace
|
||||||
|
| TokenKind::CloseBrace
|
||||||
|
| TokenKind::OpenBracket
|
||||||
|
| TokenKind::CloseBracket
|
||||||
|
| TokenKind::LineComment(_)
|
||||||
|
| TokenKind::BlockComment(_) => {
|
||||||
|
return Err(ParserError::new(ParserErrorKind::Unreachable, token.span))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(Self::new(kind, token.span))
|
||||||
|
}
|
||||||
|
}
|
@ -132,6 +132,10 @@ impl Span {
|
|||||||
pub fn same_source(&self, other: &Self) -> bool {
|
pub fn same_source(&self, other: &Self) -> bool {
|
||||||
Arc::ptr_eq(&self.source, &other.source)
|
Arc::ptr_eq(&self.source, &other.source)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn bytes(&self) -> &Range<usize> {
|
||||||
|
&self.bytes
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartialEq for Span {
|
impl PartialEq for Span {
|
||||||
|
Loading…
Reference in New Issue
Block a user