Compare commits

...

No commits in common. "2d95a58ce76248f7efb44b7a719519724c30694f" and "e5fafd03ba2e37a84bb0ad308d5a25661acc146a" have entirely different histories.

23 changed files with 1510 additions and 657 deletions

3
.gitignore vendored
View File

@ -10,6 +10,7 @@ Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# Miscellaneous files
# Miscellaneous
.DS_Store
tarpaulin-report.html
*.profraw

View File

@ -1,10 +1,14 @@
[workspace]
resolver = "2"
members = ["onihime"]
members = ["onihime", "tools/*"]
[workspace.package]
authors = ["Jesse Braham <jesse@hatebit.org>"]
authors = ["Jesse Braham <jesse@beta7.io>"]
edition = "2021"
homepage = "https://onihime.org"
repository = "https://hatebit.org/jesse/onihime"
license = "BSD-3-Clause"
[profile.release]
strip = true
lto = "fat"

View File

@ -1,6 +1,6 @@
BSD 3-Clause License
Copyright (c) 2025, Jesse Braham
Copyright (c) 2024, Jesse Braham
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:

52
justfile Executable file
View File

@ -0,0 +1,52 @@
#!/usr/bin/env -S just --justfile
log := "warn"
export JUST_LOG := log
_default:
@just --list --unsorted
# Build all packages
[group('build')]
build:
cargo build --release --workspace
# Build the specified package
[group('build')]
build-package PACKAGE:
cargo build --release --package={{PACKAGE}}
# Check test coverage of all packages in the workspace
[group('test')]
coverage:
cargo tarpaulin --workspace --out=Html --exclude=onihime-macros
# Test all packages
[group('test')]
test:
cargo test --workspace
# Test the specified package
[group('test')]
test-package PACKAGE:
cargo test --package={{PACKAGE}}
# Check the formatting of all packages
[group('lint')]
check-format:
cargo fmt --all -- --check
# Format all packages
[group('lint')]
format:
cargo fmt --all
# Run clippy checks for all packages
[group('lint')]
clippy:
cargo clippy --no-deps -- -D warnings -W clippy::all
# Check formatting and run clippy checks for all packages
[group('lint')]
lint: check-format clippy

View File

@ -7,8 +7,8 @@ homepage.workspace = true
repository.workspace = true
license.workspace = true
[dev-dependencies]
proptest = "1.6.0"
[dependencies]
unicode-segmentation = "1.12.0"
[lints.rust]
unexpected_cfgs = { level = "warn", check-cfg = ['cfg(tarpaulin_include)'] }

View File

@ -1 +0,0 @@
../LICENSE

View File

@ -1 +0,0 @@
# onihime

View File

@ -1,79 +1,56 @@
use std::fmt;
use crate::span::Span;
use crate::Span;
/// Kinds of errors which can occur during lexical analysis.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
/// Kinds of errors that may occur during lexical analysis.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum LexerErrorKind {
/// An invalid character literal was encountered.
InvalidChar,
/// An invalid keyword was encountered.
InvalidKeyword,
/// An invalid number literal was encountered.
InvalidNumber,
/// An invalid symbol was encountered.
InvalidSymbol,
/// An unclosed string literal was encountered.
/// An invalid escape sequence was encountered.
InvalidEscape(String),
/// An invalid numeric literal was encountered.
InvalidNumber(String),
/// An invalid string literal was encountered.
InvalidString,
/// An unclosed character literal was encountered.
UnclosedChar,
/// And unclosed string literal was encountered.
UnclosedString,
}
#[cfg(not(tarpaulin_include))]
impl fmt::Display for LexerErrorKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use LexerErrorKind::*;
match self {
InvalidChar => write!(f, "Invalid character literal"),
InvalidKeyword => write!(f, "Invalid keyword"),
InvalidNumber => write!(f, "Invalid number literal"),
InvalidSymbol => write!(f, "Invalid symbol"),
UnclosedString => write!(f, "Unclosed string literal"),
}
}
}
/// Errors which occur during lexical analysis.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
/// An error which occurred during lexical analysis.
///
/// `LexerError`s contain the kind of error which occurred, as well as a [Span]
/// specifying the [Source] and [Location] of the error.
///
/// [Source]: crate::span::Source
/// [Location]: crate::span::Location
#[derive(Debug, Clone, PartialEq, Hash)]
pub struct LexerError {
/// The kind of lexer error.
/// The kind of error encountered.
pub kind: LexerErrorKind,
/// The span of the lexer error.
/// The span in which the error occurred.
pub span: Span,
/// Additional context regarding the lexer error.
pub context: Option<String>,
}
impl LexerError {
/// Construct a new instance of a lexer error.
/// Construct a new instance of `LexerError`.
#[must_use]
pub const fn new(kind: LexerErrorKind, span: Span) -> Self {
Self {
kind,
span,
context: None,
}
}
/// Provide additional context for a lexer error.
#[must_use]
pub fn with_context<C>(mut self, f: impl FnOnce() -> C) -> Self
where
C: fmt::Display,
{
self.context = Some(f().to_string());
self
Self { kind, span }
}
}
impl std::error::Error for LexerError {}
#[cfg(not(tarpaulin_include))]
impl fmt::Display for LexerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(ref context) = self.context {
write!(f, "{}: {}", self.kind, context)
} else {
write!(f, "{}", self.kind)
impl std::fmt::Display for LexerError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use LexerErrorKind::*;
match &self.kind {
InvalidEscape(c) => write!(f, "Invalid escape sequence '\\{c}'"),
InvalidNumber(n) => write!(f, "Invalid numeric literal `{n}`"),
InvalidString => write!(f, "Invalid string literal"),
UnclosedChar => write!(f, "Unclosed character literal"),
UnclosedString => write!(f, "Unclosed string literal"),
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
/// A symbol used to identify a function or variable.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
#[repr(transparent)]
pub(crate) struct Symbol(String);
impl Symbol {
/// Create a new `Symbol` from a string.
pub(crate) fn from<S>(s: S) -> Self
where
S: Into<String>,
{
Self(s.into())
}
}
#[cfg(not(tarpaulin_include))]
impl std::fmt::Display for Symbol {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.0)
}
}

View File

@ -1,12 +1,13 @@
use crate::Span;
use super::Symbol;
use crate::span::Span;
/// Kinds of tokens which are valid in Onihime source code.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum TokenKind {
/// Possible kinds of a [Token].
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum TokenKind {
/// Block comment, e.g. `#| ... |#`
BlockComment(String),
/// Line comment, e.g. `; ...`
Comment,
/// Whitespace, e.g. ' ', '\t', '\n'
Whitespace,
LineComment(String),
/// Opening parenthesis, e.g. `(`
OpenParen,
@ -20,52 +21,46 @@ pub enum TokenKind {
OpenBracket,
/// Closing bracket, e.g. `]`
CloseBracket,
/// Opening hash-brace, e.g. `#{`
OpenHashBrace,
/// Boolean, e.g. `true`, `false`
Bool,
/// Character, e.g. `\a`, `\x1e`, `\u03BB`, `\newline`
Char,
/// Keyword, e.g. `:foo-bar`, `:baz`, `:qux0`
Keyword,
/// Floating-point number, e.g. `-1.0`, `2.0`, `3.0e-4`
Decimal,
/// Integer, e.g. `0`, `-1`, `0b1010`, `0o7`, `0xDECAFBAD`
Integer,
/// Ratio, e.g. `1/3`, `-5/7`
Ratio,
Bool(bool),
/// Character, e.g. `'c'`, `'\n'`
Char(String),
/// Floating-point number, e.g. `-1.0`, `2.0`, `+0.003`
Float(f64),
/// Integer, e.g. `0`, `-1`, `+200`
Integer(i64),
/// Keyword, e.g. `:baz`
Keyword(Symbol),
/// String, e.g. `"foo bar"`
String,
/// Symbol, e.g. `baz`, `*qux*`, `nil?`, `+`
Symbol,
String(String),
/// Symbol, e.g. `qux`, `+`
Symbol(Symbol),
/// Nil, e.g. `nil`
Nil,
}
impl TokenKind {
/// Returns `true` if the token type an atom.
pub fn is_atom(&self) -> bool {
use TokenKind::*;
matches!(
self,
Bool | Char | Keyword | Decimal | Integer | Ratio | String | Symbol | Nil
)
}
}
/// A valid token found in Onihime source code.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Token {
/// Kind of token which was found.
pub kind: TokenKind,
/// The token's span.
pub span: Span,
/// A token encountered during lexical analysis.
///
/// `Token`s contain the kind of token which was found, as well as a [Span]
/// specifying the [Source] and [Location] of the token.
///
/// [Source]: crate::span::Source
/// [Location]: crate::span::Location
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct Token {
/// The kind of token.
pub(crate) kind: TokenKind,
/// The span in which the token occurs.
pub(crate) span: Span,
}
impl Token {
/// Construct a new instance of a token.
/// Construct a new instance of `Token`.
#[must_use]
pub const fn new(kind: TokenKind, span: Span) -> Self {
pub(crate) const fn new(kind: TokenKind, span: Span) -> Self {
Self { kind, span }
}
}

View File

@ -1,4 +1,4 @@
//! Onihime programming language
//! Onihime programming language.
#![deny(
missing_debug_implementations,
@ -7,8 +7,6 @@
unsafe_code
)]
pub use self::span::Span;
pub mod lexer;
mod lexer;
mod parser;
mod span;

219
onihime/src/parser/ast.rs Normal file
View File

@ -0,0 +1,219 @@
use super::error::ParserError;
use crate::{
lexer::{Symbol, Token, TokenKind},
span::Span,
};
/// Abstract Syntax Tree (AST).
#[derive(Debug, Default, Clone, PartialEq)]
pub(crate) struct Ast {
root: Vec<Node>,
}
impl Ast {
/// Construct a new instance of `Ast`.
#[must_use]
pub(crate) fn new(root: Vec<Node>) -> Self {
Self { root }
}
}
impl From<Vec<Node>> for Ast {
fn from(root: Vec<Node>) -> Self {
Self { root }
}
}
#[cfg(not(tarpaulin_include))]
impl std::fmt::Display for Ast {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for node in &self.root {
writeln!(f, "{node}")?;
}
Ok(())
}
}
/// A node in the Abstract Syntax Tree (AST).
///
/// `Nodes`s contain the kind of node which was found, as well as a [Span]
/// specifying the [Source] and [Location] of the node.
///
/// [Source]: crate::span::Source
/// [Location]: crate::span::Location
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct Node {
/// The kind of node.
pub kind: Expr,
/// The span in which the node occurs.
pub span: Span,
}
impl Node {
/// Construct a new instance of `Node`.
#[must_use]
pub(crate) fn new(kind: Expr, span: Span) -> Self {
Self { kind, span }
}
/// Push a child node onto a list node.
pub(crate) fn push_node(&mut self, child: Self) -> Result<(), ParserError> {
match &mut self.kind {
Expr::List(vec) | Expr::Map(vec) | Expr::Set(vec) | Expr::Vector(vec) => {
vec.push(child);
}
_ => unimplemented!(),
}
Ok(())
}
#[cfg(not(tarpaulin_include))]
#[must_use]
fn display(&self, indent: usize) -> String {
let mut text = format!(
"{}{}@{}..{}\n",
" ".repeat(indent),
self.kind,
self.span.bytes().start,
self.span.bytes().end
);
match &self.kind {
Expr::Atom(_) => {}
Expr::List(vec) | Expr::Map(vec) | Expr::Set(vec) | Expr::Vector(vec) => {
for node in vec {
text.push_str(&format!("{}\n", node.display(indent + 1)));
}
}
}
text.trim_end().to_string()
}
}
impl TryFrom<Token> for Node {
type Error = ParserError;
fn try_from(token: Token) -> Result<Self, Self::Error> {
let span = token.span.clone();
let kind = Expr::try_from(token)?;
Ok(Self::new(kind, span))
}
}
#[cfg(not(tarpaulin_include))]
impl std::fmt::Display for Node {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{}", self.display(0))
}
}
/// An atomic value.
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum Atom {
/// Boolean, e.g. `true`, `false`
Bool(bool),
/// Character, e.g. `'c'`, `'\n'`
Char(String),
/// Floating-point number, e.g. `-1.0`, `2.0`, `+0.003`
Float(f64),
/// Integer, e.g. `0`, `-1`, `+200`
Integer(i64),
/// Keyword, e.g. `:baz`
Keyword(Symbol),
/// String, e.g. `"foo bar"`
String(String),
/// Symbol, e.g. `qux`, `+`
Symbol(Symbol),
/// Nil, e.g. `nil`
Nil,
}
#[cfg(not(tarpaulin_include))]
impl std::fmt::Display for Atom {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Atom::*;
match self {
Bool(_) => write!(f, "BOOL"),
Char(_) => write!(f, "CHAR"),
Float(_) => write!(f, "FLOAT"),
Integer(_) => write!(f, "INTEGER"),
Keyword(_) => write!(f, "KEYWORD"),
String(_) => write!(f, "STRING"),
Symbol(_) => write!(f, "SYMBOL"),
Nil => write!(f, "NIL"),
}
}
}
/// An expression.
#[derive(Debug, Clone, PartialEq)]
pub(crate) enum Expr {
/// An atomic value.
Atom(Atom),
/// A list of nodes.
List(Vec<Node>),
/// A map of nodes.
Map(Vec<Node>),
/// A set of nodes.
Set(Vec<Node>),
/// A vector of nodes.
Vector(Vec<Node>),
}
impl Expr {
/// Which closing delimiter is associated with the expression kind?
pub(crate) fn closing_delimiter(&self) -> Option<TokenKind> {
match self {
Expr::List(_) => Some(TokenKind::CloseParen),
Expr::Map(_) | Expr::Set(_) => Some(TokenKind::CloseBrace),
Expr::Vector(_) => Some(TokenKind::CloseBracket),
_ => None,
}
}
}
impl From<Atom> for Expr {
fn from(atom: Atom) -> Self {
Self::Atom(atom)
}
}
impl TryFrom<Token> for Expr {
type Error = ParserError;
fn try_from(token: Token) -> Result<Self, Self::Error> {
let kind = match token.kind {
TokenKind::Bool(b) => Atom::Bool(b),
TokenKind::Char(c) => Atom::Char(c),
TokenKind::Float(n) => Atom::Float(n),
TokenKind::Integer(n) => Atom::Integer(n),
TokenKind::Keyword(k) => Atom::Keyword(k),
TokenKind::String(s) => Atom::String(s),
TokenKind::Symbol(s) => Atom::Symbol(s),
TokenKind::Nil => Atom::Nil,
_ => unimplemented!(),
};
Ok(kind.into())
}
}
#[cfg(not(tarpaulin_include))]
impl std::fmt::Display for Expr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use Expr::*;
match self {
Atom(atom) => write!(f, "{atom}"),
List(_) => write!(f, "LIST"),
Map(_) => write!(f, "MAP"),
Set(_) => write!(f, "SET"),
Vector(_) => write!(f, "VECTOR"),
}
}
}

View File

@ -0,0 +1,63 @@
use crate::{lexer::LexerError, span::Span};
/// Kinds of errors that can occur during parsing.
#[derive(Debug, Clone, PartialEq)]
pub enum ParserErrorKind {
/// An error which ocurred during lexical analysis.
Lexer(LexerError),
/// Key in map is missing its corresponding value.
MissingValueInMap,
/// Opening delimiter does not have a matching closing delimiter.
UnclosedSequence,
/// An unexpected closing delimiter was found.
UnexpectedClosingDelimiter,
/// Unexpectedly reached end of input.
UnexpectedEof,
/// An unmatched closing delimiter was found.
UnmatchedClosingDelimiter,
}
/// Parser error, with a start and end location.
#[derive(Debug, Clone, PartialEq)]
pub struct ParserError {
/// The type of error encountered.
pub kind: ParserErrorKind,
/// The span in which the error occurred.
pub span: Span,
}
impl ParserError {
/// Construct a new instance of `ParserError`.
#[must_use]
pub const fn new(kind: ParserErrorKind, span: Span) -> Self {
Self { kind, span }
}
}
impl From<LexerError> for ParserError {
fn from(err: LexerError) -> Self {
let span = err.span.clone();
Self::new(ParserErrorKind::Lexer(err), span)
}
}
impl std::error::Error for ParserError {}
#[cfg(not(tarpaulin_include))]
impl std::fmt::Display for ParserError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
use ParserErrorKind::*;
match &self.kind {
Lexer(err) => write!(f, "{err}"),
MissingValueInMap => write!(f, "Key in map is missing its corresponding value"),
UnclosedSequence => write!(
f,
"Opening delimiter does not have a matching closing delimiter"
),
UnexpectedClosingDelimiter => write!(f, "An unexpected closing delimiter was found"),
UnexpectedEof => write!(f, "Unexpectedly reached end of input"),
UnmatchedClosingDelimiter => write!(f, "An unmatched closing delimiter was found"),
}
}
}

327
onihime/src/parser/mod.rs Normal file
View File

@ -0,0 +1,327 @@
pub(crate) use self::{ast::Ast, error::ParserError};
use self::{
ast::{Expr, Node},
error::ParserErrorKind,
};
use crate::{
lexer::{Lexer, TokenKind},
span::Span,
};
mod ast;
mod error;
pub(crate) struct Parser<'parser> {
lexer: Lexer<'parser>,
parents: Vec<Node>,
current: Node,
}
impl<'parser> Parser<'parser> {
/// Create a new parser instance from a string.
#[must_use]
pub(crate) fn new(input: &'parser str) -> Self {
let lexer = Lexer::new(input);
let current = Node::new(Expr::List(Vec::new()), lexer.span());
Self {
lexer,
parents: Vec::new(),
current,
}
}
/// Set the name of the lexer's source.
pub(crate) fn set_name(&mut self, name: String) {
self.lexer.set_name(name);
}
/// Produce an Abstract Syntax Tree (AST) from the source input.
pub(crate) fn parse(mut self) -> Result<Ast, ParserError> {
// This parser is actually quite simple!Recursively parse expressions until we
// run out of tokens, or an error occurs:
while !self.lexer.eof() {
if let Some(node) = self.expr()? {
self.current.push_node(node)?;
}
}
// When we reach the end of input, there should be no remaining parent nodes; if
// there are, that means that there is a missing closing delimiter somewhere:
if !self.parents.is_empty() {
return Err(ParserError::new(
ParserErrorKind::UnclosedSequence,
self.current.span,
));
}
// Since we created an initial `Expr::List` node to hold the parsed contents
// (i.e. so that we had something to push nodes to), we can now rip out its guts
// and return the newly constructed AST:
if let Expr::List(root) = self.current.kind {
Ok(Ast::new(root))
} else {
unreachable!() // TODO: Is this really true? It should be... right?
}
}
fn expr(&mut self) -> Result<Option<Node>, ParserError> {
if let Some(token) = self.lexer.read()? {
match token.kind {
// Comments are simply ignored by the parser:
TokenKind::BlockComment(_) | TokenKind::LineComment(_) => Ok(None),
// Any valid opening delimiters begins a new sequence:
TokenKind::OpenParen => self.begin_sequence(Expr::List, token.span),
TokenKind::OpenHashBrace => self.begin_sequence(Expr::Map, token.span),
TokenKind::OpenBrace => self.begin_sequence(Expr::Set, token.span),
TokenKind::OpenBracket => self.begin_sequence(Expr::Vector, token.span),
// Any valid closing delimiters end the current sequence:
kind
@ (TokenKind::CloseParen | TokenKind::CloseBrace | TokenKind::CloseBracket) => {
self.end_sequence(kind, token.span)
}
// Atoms are pushed to the current sequence:
_ => {
let node = Node::try_from(token)?;
let span = node.span.clone();
self.current.push_node(node)?;
self.current.span.extend(&span);
Ok(None)
}
}
} else {
Err(ParserError::new(
ParserErrorKind::UnexpectedEof,
self.lexer.span(),
))
}
}
fn begin_sequence(
&mut self,
init: impl FnOnce(Vec<Node>) -> Expr,
span: Span,
) -> Result<Option<Node>, ParserError> {
self.current.span.extend(&span);
self.parents.push(self.current.clone());
self.current = Node::new(init(Vec::new()), span.clone());
Ok(None)
}
fn end_sequence(&mut self, kind: TokenKind, span: Span) -> Result<Option<Node>, ParserError> {
// We will ultimately return the current expression, so clone it and update its
// span first:
let mut current = self.current.clone();
current.span.extend(&span);
// Update the parser's current node to the previous parent, or return an error
// if no parents exist:
self.current = self.parents.pop().ok_or_else(|| {
ParserError::new(ParserErrorKind::UnexpectedClosingDelimiter, span.clone())
})?;
// Ensure that the appropriate closing delimiter was found for our current node:
if current.kind.closing_delimiter() != Some(kind) {
return Err(ParserError::new(
ParserErrorKind::UnmatchedClosingDelimiter,
span,
));
}
// For maps, ensure that each key has a corresponding value:
match current.kind {
Expr::Map(ref vec) if vec.len() % 2 != 0 => {
return Err(ParserError::new(ParserErrorKind::MissingValueInMap, span));
}
_ => {}
}
// Finally, return the current node so it can be added to the AST:
Ok(Some(current))
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::{
lexer::{LexerError, LexerErrorKind, Symbol},
parser::ast::Atom,
};
macro_rules! test {
( $name:ident: $input:literal, $src:ident => $ast:expr ) => {
#[test]
fn $name() {
let parser = Parser::new($input);
let $src = parser.lexer.source();
assert_eq!(parser.parse(), $ast);
}
};
}
test!(empty: "", _src => Ok(Ast::default()));
test!(error_invalid_number: "(+ 1.2.3)", src => Err(ParserError::new(
ParserErrorKind::Lexer(LexerError::new(
LexerErrorKind::InvalidNumber("1.2.3".into()),
Span::new(3..8, src.clone())
)),
Span::new(3..8, src)
)));
test!(list: "(+ 1 2) ; sneaky comment :)", src => Ok(Ast::from(vec![
Node::new(
Expr::List(vec![
Node::new(Atom::Symbol(Symbol::from("+")).into(), Span::new(1..2, src.clone())),
Node::new(Atom::Integer(1).into(), Span::new(3..4, src.clone())),
Node::new(Atom::Integer(2).into(), Span::new(5..6, src.clone())),
]),
Span::new(0..7, src)
)
])));
test!(error_list_unmatched_bracket: "(]", src => Err(ParserError::new(
ParserErrorKind::UnmatchedClosingDelimiter,
Span::new(1..2, src),
)));
test!(error_list_missing_close_paren: "(true", src => Err(ParserError::new(
ParserErrorKind::UnclosedSequence,
Span::new(0..5, src),
)));
test!(error_list_unexpected_close_paren: ")", src => Err(ParserError::new(
ParserErrorKind::UnexpectedClosingDelimiter,
Span::new(0..1, src)
)));
test!(map: "#{:a 0.0 :b 1.0}", src => Ok(Ast::from(vec![
Node::new(
Expr::Map(vec![
Node::new(Atom::Keyword(Symbol::from("a")).into(), Span::new(2..4, src.clone())),
Node::new(Atom::Float(0.0).into(), Span::new(5..8, src.clone())),
Node::new(Atom::Keyword(Symbol::from("b")).into(), Span::new(9..11, src.clone())),
Node::new(Atom::Float(1.0).into(), Span::new(12..15, src.clone())),
]),
Span::new(0..16, src)
)
])));
test!(error_map_missing_value: "#{:x}", src => Err(ParserError::new(
ParserErrorKind::MissingValueInMap,
Span::new(4..5, src.clone())
)));
test!(error_map_unmatched_bracket: "#{)", src => Err(ParserError::new(
ParserErrorKind::UnmatchedClosingDelimiter,
Span::new(2..3, src),
)));
test!(error_map_missing_close_brace: "#{", src => Err(ParserError::new(
ParserErrorKind::UnclosedSequence,
Span::new(0..2, src),
)));
test!(error_map_set_unexpected_close_brace: "}", src => Err(ParserError::new(
ParserErrorKind::UnexpectedClosingDelimiter,
Span::new(0..1, src)
)));
test!(set: "{{} nil}", src => Ok(Ast::from(vec![
Node::new(
Expr::Set(vec![
Node::new(Expr::Set(vec![]), Span::new(1..3, src.clone())),
Node::new(Expr::Atom(Atom::Nil), Span::new(4..7, src.clone())),
]),
Span::new(0..8, src),
)
])));
test!(error_set_unmatched_bracket: "{]", src => Err(ParserError::new(
ParserErrorKind::UnmatchedClosingDelimiter,
Span::new(1..2, src),
)));
test!(error_set_missing_close_brace: "{", src => Err(ParserError::new(
ParserErrorKind::UnclosedSequence,
Span::new(0..1, src),
)));
test!(vector: "['a' 'b' 'c']", src => Ok(Ast::from(vec![
Node::new(
Expr::Vector(vec![
Node::new(Expr::Atom(Atom::Char("a".into())), Span::new(1..4, src.clone())),
Node::new(Expr::Atom(Atom::Char("b".into())), Span::new(5..8, src.clone())),
Node::new(Expr::Atom(Atom::Char("c".into())), Span::new(9..12, src.clone())),
]),
Span::new(0..13, src),
)
])));
test!(error_vector_unmatched_bracket: "[}", src => Err(ParserError::new(
ParserErrorKind::UnmatchedClosingDelimiter,
Span::new(1..2, src),
)));
test!(error_vector_missing_close_bracket: "[", src => Err(ParserError::new(
ParserErrorKind::UnclosedSequence,
Span::new(0..1, src),
)));
test!(error_vector_unexpected_close_bracket: "]", src => Err(ParserError::new(
ParserErrorKind::UnexpectedClosingDelimiter,
Span::new(0..1, src)
)));
test!(multiple_expressions: "(/ 6 3 (+ 1 2)) (* 2 5)\n(- 10 5)", src => Ok(Ast::from(vec![
Node::new(
Expr::List(vec![
Node::new(Atom::Symbol(Symbol::from("/")).into(), Span::new(1..2, src.clone())),
Node::new(Atom::Integer(6).into(), Span::new(3..4, src.clone())),
Node::new(Atom::Integer(3).into(), Span::new(5..6, src.clone())),
Node::new(
Expr::List(vec![
Node::new(Atom::Symbol(Symbol::from("+")).into(), Span::new(8..9, src.clone())),
Node::new(Atom::Integer(1).into(), Span::new(10..11, src.clone())),
Node::new(Atom::Integer(2).into(), Span::new(12..13, src.clone())),
]),
Span::new(7..14, src.clone())
),
]),
Span::new(0..15, src.clone())
),
Node::new(
Expr::List(vec![
Node::new(Atom::Symbol(Symbol::from("*")).into(), Span::new(17..18, src.clone())),
Node::new(Atom::Integer(2).into(), Span::new(19..20, src.clone())),
Node::new(Atom::Integer(5).into(), Span::new(21..22, src.clone())),
]),
Span::new(16..23, src.clone())
),
Node::new(
Expr::List(vec![
Node::new(Atom::Symbol(Symbol::from("-")).into(), Span::new(25..26, src.clone())),
Node::new(Atom::Integer(10).into(), Span::new(27..29, src.clone())),
Node::new(Atom::Integer(5).into(), Span::new(30..31, src.clone())),
]),
Span::new(24..32, src)
),
])));
test!(function_application: "(join \"foo\" \"bar\")", src => Ok(Ast::from(vec![Node::new(
Expr::List(vec![
Node::new(Atom::Symbol(Symbol::from("join")).into(), Span::new(1..5, src.clone())),
Node::new(Atom::String("foo".into()).into(), Span::new(6..11, src.clone())),
Node::new(Atom::String("bar".into()).into(), Span::new(12..17, src.clone())),
]),
Span::new(0..18, src)
)])));
}

View File

@ -1,53 +1,156 @@
/// A (half-open) range bounded inclusively below and exclusively above
/// `(start..end)`.
///
/// The range `start..end` contains all values with `start <= x < end`. It is
/// empty if `start >= end`.
use std::{cmp::Ordering, iter, ops::Range, sync::Arc};
/// A location within some source text.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
pub struct Span {
/// The lower bound of the range (inclusive).
pub start: usize,
/// The upper bound of the range (exclusive).
pub end: usize,
pub struct Location {
line: usize,
column: usize,
}
impl Span {
/// Construct a new instance of a span.
impl Location {
/// Construct a new instance of `Location`.
#[must_use]
pub const fn new(start: usize, end: usize) -> Self {
Self { start, end }
pub(crate) const fn new(line: usize, column: usize) -> Self {
Self { line, column }
}
}
/// Returns `true` if `item` is contained in the span.
#[must_use]
pub fn contains(&self, item: usize) -> bool {
self.start <= item && item < self.end
}
/// Returns `true` if the span contains no items.
#[must_use]
pub fn is_empty(&self) -> bool {
self.start >= self.end
}
/// Extend the span's end bound to that of the provided span, if
/// `other.end > self.end`.
pub fn extend(&mut self, other: &Self) {
if other.end > self.end {
self.end = other.end;
impl PartialOrd for Location {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
match self.line.partial_cmp(&other.line) {
Some(Ordering::Equal) => self.column.partial_cmp(&other.column),
ord => ord,
}
}
}
impl From<std::ops::Range<usize>> for Span {
fn from(range: std::ops::Range<usize>) -> Self {
Self::new(range.start, range.end)
/// Some (optionally named) source text.
#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
pub struct Source {
name: Option<String>,
contents: String,
lines: Vec<usize>,
}
impl Source {
/// Construct a new instance of `Source`.
#[must_use]
pub(crate) fn new(name: Option<String>, contents: String) -> Self {
let lines = contents
.match_indices('\n')
.map(|(i, _)| i)
.chain(iter::once(contents.len()))
.collect();
Self {
name,
contents,
lines,
}
}
/// Get the name of the source.
#[must_use]
pub(crate) fn name(&self) -> Option<&str> {
self.name.as_deref()
}
/// Set the name of the source.
pub(crate) fn set_name(&mut self, name: String) {
self.name = Some(name);
}
/// Get the [Location] of the specified byte in the source.
#[must_use]
pub(crate) fn location(&self, byte: usize) -> Location {
let line = self.lines.partition_point(|&x| x < byte);
let start = line.checked_sub(1).map_or(0, |n| self.lines[n] + 1);
let column = self.contents[start..byte].chars().count();
Location::new(line, column)
}
/// Get the full contents of the source.
#[must_use]
pub(crate) fn contents(&self) -> &str {
&self.contents
}
/// Get the specified line from the source.
#[must_use]
pub(crate) fn get_line(&self, line: usize) -> &str {
let end = self.lines[line];
let start = line.checked_sub(1).map_or(0, |n| self.lines[n] + 1);
&self.contents[start..end]
}
}
impl From<Span> for std::ops::Range<usize> {
fn from(span: Span) -> Self {
span.start..span.end
/// A contiguous sequence of bytes within some source.
#[derive(Debug, Default, Clone, Eq)]
pub struct Span {
bytes: Range<usize>,
source: Arc<Source>,
}
impl Span {
/// Construct a new instance of `Span`.
#[must_use]
pub(crate) fn new(bytes: Range<usize>, source: Arc<Source>) -> Self {
Self { bytes, source }
}
/// Join two spans, creating a new span.
#[must_use]
pub(crate) fn join(self, other: &Self) -> Self {
debug_assert!(self.same_source(other));
Self::new(self.bytes.start..other.bytes.end, self.source)
}
/// Extend one span to include another.
pub(crate) fn extend(&mut self, other: &Self) {
debug_assert!(self.same_source(other));
self.bytes.end = other.bytes.end;
}
/// The start location of a span within some source.
#[must_use]
pub(crate) fn location(&self) -> Location {
self.source.location(self.bytes.start)
}
/// The end location of a span within some source.
#[must_use]
pub(crate) fn end_location(&self) -> Location {
self.source.location(self.bytes.end)
}
/// Do two spans share the same source?
#[must_use]
pub(crate) fn same_source(&self, other: &Self) -> bool {
Arc::ptr_eq(&self.source, &other.source)
}
#[must_use]
pub(crate) fn bytes(&self) -> &Range<usize> {
&self.bytes
}
}
impl PartialEq for Span {
fn eq(&self, other: &Self) -> bool {
self.same_source(other) && self.bytes == other.bytes
}
}
impl std::hash::Hash for Span {
fn hash<H>(&self, state: &mut H)
where
H: std::hash::Hasher,
{
self.bytes.hash(state);
self.source.hash(state);
}
}
@ -56,35 +159,71 @@ mod tests {
use super::*;
#[test]
fn span_equality() {
let a = Span::new(0, 0);
let b = Span::new(0, 1);
fn location_partial_ord() {
assert!(Location::new(1, 1) < Location::new(1, 2));
assert!(Location::new(1, 10) < Location::new(2, 1));
assert!(Location::new(5, 5) == Location::new(5, 5));
assert!(Location::new(10, 1) > Location::new(9, 99));
}
#[test]
fn source_get_set_name() {
let mut src = Source::new(None, "".into());
assert!(src.name().is_none());
src.set_name("foo".into());
assert!(src.name() == Some("foo"));
}
#[test]
fn source_location() {
let source = Source::new(None, "foo\nbar\nbaz".into());
assert_eq!(source.location(0), Location::new(0, 0));
assert_eq!(source.location(5), Location::new(1, 1));
assert_eq!(source.location(10), Location::new(2, 2));
}
#[test]
fn source_contents() {
let contents = String::from("xxx");
let source = Source::new(None, contents.clone());
assert_eq!(source.contents(), &contents);
}
#[test]
fn source_get_line() {
let source = Source::new(None, "line 1\nline 2\nline 3\n".into());
assert_eq!(source.get_line(0), "line 1");
assert_eq!(source.get_line(1), "line 2");
assert_eq!(source.get_line(2), "line 3");
}
#[test]
fn span_partial_eq() {
let source = Arc::new(Source::new(None, String::new()));
let a = Span::new(0..0, source.clone());
assert_eq!(a, a);
let b = Span::new(1..10, source.clone());
assert_ne!(a, b);
let source2 = Arc::new(Source::new(None, String::from("foo")));
let c = Span::new(0..0, source2.clone());
assert_ne!(a, c);
}
#[test]
fn span_contains() {
let s = Span::new(1, 3);
fn span_start_end_location() {
let source = Arc::new(Source::new(None, "foo\nbar\nbaz".into()));
let span = Span::new(2..9, source);
assert!(s.contains(1));
assert!(s.contains(2));
let start = span.location();
assert_eq!(start.line, 0);
assert_eq!(start.column, 2);
assert!(!s.contains(0));
assert!(!s.contains(3));
}
#[test]
fn span_extend() {
let mut a = Span::new(0, 5);
let b = Span::new(1, 10);
let c = Span::new(5, 6);
assert_eq!(a.end, 5);
a.extend(&b);
assert_eq!(a.end, 10);
a.extend(&c);
assert_eq!(a.end, 10);
let end = span.end_location();
assert_eq!(end.line, 2);
assert_eq!(end.column, 1);
}
}

View File

@ -1,16 +0,0 @@
# Edition
edition = "2021"
# Comments
format_code_in_doc_comments = true
normalize_comments = true
wrap_comments = true
# Imports
group_imports = "StdExternalCrate"
imports_granularity = "Crate"
imports_layout = "HorizontalVertical"
# Miscellaneous
enum_discrim_align_threshold = 25
hex_literal_case = "Upper"

View File

@ -1,5 +0,0 @@
[formatting]
align_entries = true
allowed_blank_lines = 1
column_width = 100
reorder_arrays = true

16
tools/fuga/Cargo.toml Normal file
View File

@ -0,0 +1,16 @@
[package]
name = "fuga"
version = "0.0.0"
authors.workspace = true
edition.workspace = true
homepage.workspace = true
repository.workspace = true
license.workspace = true
[[bin]]
name = "fuga"
path = "src/bin/fuga.rs"
[dependencies]
clap = { version = "4.5.21", features = ["derive", "wrap_help"] }
log = { version = "0.4.22", features = ["std"] }

View File

@ -0,0 +1,30 @@
use clap::{
builder::{styling::Style, Styles},
Parser,
Subcommand,
};
use fuga::{color, command, AppResult};
const HEADER_STYLE: Style = Style::new().fg_color(Some(color::RED)).bold().underline();
const LITERAL_STYLE: Style = Style::new().fg_color(Some(color::PURPLE)).bold();
const STYLES: Styles = Styles::styled()
.usage(HEADER_STYLE)
.header(HEADER_STYLE)
.literal(LITERAL_STYLE);
#[derive(Debug, Parser)]
#[command(styles = STYLES, version)]
struct Cli {
#[command(subcommand)]
command: Command,
}
#[derive(Debug, Subcommand)]
enum Command {}
fn main() -> AppResult<()> {
fuga::logger::init()?;
match Cli::parse().command {}
}

21
tools/fuga/src/color.rs Normal file
View File

@ -0,0 +1,21 @@
pub use clap::builder::styling::Reset;
use clap::builder::styling::{Color, RgbColor};
pub const RED: Color = Color::Rgb(RgbColor(225, 55, 55)); // Red
pub const ORANGE: Color = Color::Rgb(RgbColor(215, 140, 100)); // Orange
pub const WHITE: Color = Color::Rgb(RgbColor(255, 255, 255)); // White
pub const BLUE: Color = Color::Rgb(RgbColor(60, 140, 185)); // Blue
pub const PURPLE: Color = Color::Rgb(RgbColor(180, 130, 215)); // Purple
pub trait EscapeSequence {
fn to_escape_sequence(&self) -> String;
}
impl EscapeSequence for Color {
fn to_escape_sequence(&self) -> String {
match self {
Color::Rgb(RgbColor(r, g, b)) => format!("\x1b[1;38;2;{r};{g};{b}m"),
_ => unimplemented!(),
}
}
}

6
tools/fuga/src/lib.rs Normal file
View File

@ -0,0 +1,6 @@
#![deny(rust_2018_idioms, unsafe_code)]
pub mod color;
pub mod logger;
pub type AppResult<T> = std::result::Result<T, Box<dyn std::error::Error>>;

47
tools/fuga/src/logger.rs Normal file
View File

@ -0,0 +1,47 @@
use std::str::FromStr as _;
use crate::color::{self, EscapeSequence as _};
struct FugaLogger {
level: log::LevelFilter,
}
impl log::Log for FugaLogger {
fn enabled(&self, metadata: &log::Metadata<'_>) -> bool {
metadata.level() <= self.level
}
fn log(&self, record: &log::Record<'_>) {
if self.enabled(record.metadata()) {
let style = match record.level() {
log::Level::Error => color::RED.to_escape_sequence(),
log::Level::Warn => color::ORANGE.to_escape_sequence(),
log::Level::Info => color::WHITE.to_escape_sequence(),
log::Level::Debug => color::BLUE.to_escape_sequence(),
log::Level::Trace => color::PURPLE.to_escape_sequence(),
};
eprintln!(
"{style}{: <5}{} {}",
record.level(),
color::Reset.render(),
record.args()
);
}
}
fn flush(&self) {}
}
pub fn init() -> Result<(), log::SetLoggerError> {
let level = if let Some(level) = std::option_env!("FUGA_LOG") {
log::LevelFilter::from_str(level).unwrap_or(log::LevelFilter::Off)
} else {
log::LevelFilter::Info
};
let logger = FugaLogger { level };
log::set_boxed_logger(Box::new(logger)).map(|()| log::set_max_level(level))?;
Ok(())
}