Include the slice in the token as well
This commit is contained in:
parent
ece6645e50
commit
e3a9dcd4fb
@ -42,23 +42,9 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a byte slice containing the contents of the current [Token].
|
|
||||||
#[inline]
|
|
||||||
#[must_use]
|
|
||||||
pub fn slice(&self) -> &'a [u8] {
|
|
||||||
&self.bytes[self.token_start..self.cursor]
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the span of the current [Token].
|
|
||||||
#[inline]
|
|
||||||
#[must_use]
|
|
||||||
pub fn span(&self) -> Span {
|
|
||||||
Span::new(self.token_start, self.cursor)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the next [Token] in the input stream.
|
/// Return the next [Token] in the input stream.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn next_token(&mut self) -> Result<Option<Token>, LexerError> {
|
pub fn next_token(&mut self) -> Result<Option<Token<'a>>, LexerError> {
|
||||||
self.token_start = self.cursor;
|
self.token_start = self.cursor;
|
||||||
|
|
||||||
let Some(c) = self.advance() else {
|
let Some(c) = self.advance() else {
|
||||||
@ -100,7 +86,19 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(Some(Token::new(kind, self.span())))
|
Ok(Some(Token::new(kind, self.span(), self.slice())))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
fn slice(&self) -> &'a [u8] {
|
||||||
|
&self.bytes[self.token_start..self.cursor]
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
#[must_use]
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
Span::new(self.token_start, self.cursor)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -142,7 +140,7 @@ impl<'a> Lexer<'a> {
|
|||||||
fn read_char(&mut self) -> Result<TokenKind, LexerError> {
|
fn read_char(&mut self) -> Result<TokenKind, LexerError> {
|
||||||
// NOTE: We have already consumed the initial '\' when this function is invoked
|
// NOTE: We have already consumed the initial '\' when this function is invoked
|
||||||
|
|
||||||
if self.peek().is_none() || self.peek().is_some_and(|c| is_separator(c)) {
|
if self.peek().is_none() || self.peek().is_some_and(is_separator) {
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -159,13 +157,13 @@ impl<'a> Lexer<'a> {
|
|||||||
b"backspace" | b"formfeed" | b"newline" | b"return" | b"space" | b"tab"
|
b"backspace" | b"formfeed" | b"newline" | b"return" | b"space" | b"tab"
|
||||||
) {
|
) {
|
||||||
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
return Err(LexerError::new(LexerErrorKind::InvalidChar, self.span()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(TokenKind::Char)
|
Ok(TokenKind::Char)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
fn complete_ascii_escape(&mut self) -> Result<TokenKind, LexerError> {
|
fn complete_ascii_escape(&mut self) -> Result<TokenKind, LexerError> {
|
||||||
// NOTE: We have already consumed the initial '\x' when this function is invoked
|
// NOTE: We have already consumed the initial '\x' when this function is invoked
|
||||||
@ -353,8 +351,8 @@ impl<'a> Lexer<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Iterator for Lexer<'_> {
|
impl<'a> Iterator for Lexer<'a> {
|
||||||
type Item = Result<Token, LexerError>;
|
type Item = Result<Token<'a>, LexerError>;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
self.next_token().transpose()
|
self.next_token().transpose()
|
||||||
@ -584,7 +582,7 @@ mod tests {
|
|||||||
#[test]
|
#[test]
|
||||||
fn $name(x in $input) {
|
fn $name(x in $input) {
|
||||||
let mut lexer = Lexer::new(&x);
|
let mut lexer = Lexer::new(&x);
|
||||||
assert_eq!(lexer.next(), Some(Ok(Token::new(TokenKind::$kind, lexer.span()))));
|
assert_eq!(lexer.next(), Some(Ok(Token::new(TokenKind::$kind, lexer.span(), lexer.slice()))));
|
||||||
assert_eq!(lexer.slice(), x.as_bytes());
|
assert_eq!(lexer.slice(), x.as_bytes());
|
||||||
assert_eq!(lexer.span(), Span::new(0, x.len()));
|
assert_eq!(lexer.span(), Span::new(0, x.len()));
|
||||||
}
|
}
|
||||||
|
@ -55,17 +55,19 @@ impl TokenKind {
|
|||||||
|
|
||||||
/// A valid token found in Onihime source code.
|
/// A valid token found in Onihime source code.
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
pub struct Token {
|
pub struct Token<'a> {
|
||||||
/// Kind of token which was found.
|
/// Kind of token which was found.
|
||||||
pub kind: TokenKind,
|
pub kind: TokenKind,
|
||||||
/// The token's span.
|
/// The token's span.
|
||||||
pub span: Span,
|
pub span: Span,
|
||||||
|
/// The token's slice.
|
||||||
|
pub slice: &'a [u8],
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Token {
|
impl<'a> Token<'a> {
|
||||||
/// Construct a new instance of a token.
|
/// Construct a new instance of a token.
|
||||||
#[must_use]
|
#[must_use]
|
||||||
pub const fn new(kind: TokenKind, span: Span) -> Self {
|
pub const fn new(kind: TokenKind, span: Span, slice: &'a [u8]) -> Self {
|
||||||
Self { kind, span }
|
Self { kind, span, slice }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user