forked from trinitrix/core
feat(trixy-lang_parser): Add a lexer with error handling for trixy code
This commit is contained in:
parent
cd2dbc516a
commit
3da75f6913
|
@ -0,0 +1,6 @@
|
|||
# build
|
||||
/target
|
||||
/result
|
||||
|
||||
# lua_macros is a library
|
||||
Cargo.lock
|
|
@ -0,0 +1,11 @@
|
|||
[package]
|
||||
name = "trixy-lang_parser"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
clap = { version = "4.4.11", features = ["derive"] }
|
||||
pretty_assertions = "1.4.0"
|
||||
thiserror = "1.0.50"
|
|
@ -0,0 +1,9 @@
|
|||
fn print(message: CommandTransferValue);
|
||||
|
||||
nasp trinitrix {
|
||||
fn hi(name: String) -> String;
|
||||
}
|
||||
|
||||
|
||||
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
|
||||
// vim: syntax=rust
|
|
@ -0,0 +1,11 @@
|
|||
fn print(message: CommandTransferValue);
|
||||
|
||||
nasp trinitrix {
|
||||
fn hi(name: String) -> String;
|
||||
}
|
||||
namespace commands { >-
|
||||
}
|
||||
|
||||
|
||||
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
|
||||
// vim: syntax=rust
|
|
@ -0,0 +1,36 @@
|
|||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct CommandSpec {
|
||||
pub(crate) declarations: Vec<Declaration>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct Declaration {
|
||||
pub(crate) namespace: Vec<Namespace>,
|
||||
pub(crate) genus: Genus,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct Namespace {
|
||||
pub(crate) name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) enum Genus {
|
||||
Function {
|
||||
name: String,
|
||||
inputs: Vec<NamedType>,
|
||||
output: Type,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct NamedType {
|
||||
pub(crate) name: String,
|
||||
pub(crate) base: Type,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) enum Type {
|
||||
String,
|
||||
Void,
|
||||
}
|
|
@ -0,0 +1,9 @@
|
|||
use thiserror::Error;
|
||||
|
||||
use crate::lexing::error::SpannedLexingError;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum TrixyError {
|
||||
#[error(transparent)]
|
||||
Parsing(#[from] SpannedLexingError),
|
||||
}
|
|
@ -0,0 +1,100 @@
|
|||
use std::{error::Error, fmt::Display};
|
||||
use thiserror::Error;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum LexingError {
|
||||
#[error("No matches were found")]
|
||||
NoMatchesTaken,
|
||||
#[error("Expected an token, but reached end of file")]
|
||||
UnexpectedEOF,
|
||||
#[error("Char ('{0}') is not a know token!")]
|
||||
UnknownCharacter(char),
|
||||
#[error("The Arrow token must be of the form: ->")]
|
||||
ExpectedArrow,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SpannedLexingError {
|
||||
Error {
|
||||
source: LexingError,
|
||||
/// The starting char index of the error in the source file
|
||||
start: usize,
|
||||
/// The starting char index of the error in the context line
|
||||
contexted_start: usize,
|
||||
/// The line above the error
|
||||
line_above: String,
|
||||
/// The line below the error
|
||||
line_below: String,
|
||||
/// The line in which the error occurred
|
||||
line: String,
|
||||
/// The line number of the main error line
|
||||
line_number: usize,
|
||||
},
|
||||
}
|
||||
|
||||
impl Error for SpannedLexingError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
let Self::Error { source, .. } = self;
|
||||
Some(source)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SpannedLexingError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self::Error {
|
||||
source,
|
||||
line_above,
|
||||
line_below,
|
||||
line,
|
||||
line_number,
|
||||
contexted_start,
|
||||
..
|
||||
} = self;
|
||||
let error_line = {
|
||||
let mut output = String::new();
|
||||
output.push_str("\x1b[92;1m");
|
||||
for _ in 0..(*contexted_start) {
|
||||
output.push(' ');
|
||||
}
|
||||
line_number.to_string().chars().for_each(|_| {
|
||||
output.push(' ');
|
||||
});
|
||||
output.push('^');
|
||||
for _ in *contexted_start..(line.len() - 1) {
|
||||
output.push('-');
|
||||
}
|
||||
output.push(' ');
|
||||
let appandig_str = match source {
|
||||
LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
|
||||
LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
|
||||
LexingError::UnknownCharacter(char) => format!("This char: `{char}`; is not a valid token"),
|
||||
LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(),
|
||||
};
|
||||
output.push_str(&appandig_str);
|
||||
output.push_str("\x1b[0m");
|
||||
output
|
||||
};
|
||||
|
||||
writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", source)?;
|
||||
if !line_above.is_empty() {
|
||||
writeln!(
|
||||
f,
|
||||
"\x1b[32;1m{} |\x1b[0m {}",
|
||||
line_number - 1,
|
||||
line_above
|
||||
)?;
|
||||
}
|
||||
writeln!(f, "\x1b[36;1m{} |\x1b[0m {}", line_number, line)?;
|
||||
writeln!(f, " {}", error_line)?;
|
||||
if !line_below.is_empty() {
|
||||
writeln!(
|
||||
f,
|
||||
"\x1b[32;1m{} |\x1b[0m {}",
|
||||
line_number + 1,
|
||||
line_below
|
||||
)
|
||||
} else {
|
||||
write!(f, "")
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
use self::{error::SpannedLexingError, tokenizer::Tokenizer};
|
||||
|
||||
pub mod error;
|
||||
mod tokenizer;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
pub struct TokenStream {
|
||||
original_file: String,
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
impl TokenStream {
|
||||
/// Turn a string of valid Trixy code into a list of tokens, including the
|
||||
/// location of that token's start and end point in the original source code.
|
||||
///
|
||||
/// Note the token indices represent the half-open interval `[start, end)`,
|
||||
/// equivalent to `start .. end` in Rust.
|
||||
pub fn lex(src: &str) -> Result<Self, SpannedLexingError> {
|
||||
let mut tokenizer = Tokenizer::new(src);
|
||||
let mut tokens = Vec::new();
|
||||
|
||||
while let Some(tok) = tokenizer.next_token()? {
|
||||
tokens.push(tok);
|
||||
}
|
||||
|
||||
Ok(Self {
|
||||
tokens,
|
||||
original_file: src.to_owned(),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// A token span is recorded in chars starting from the beginning of the file:
|
||||
/// A token span like this, for example:
|
||||
/// ```no_run
|
||||
/// TokenSpan {
|
||||
/// start: 20,
|
||||
/// end: 23,
|
||||
/// }
|
||||
/// ```
|
||||
/// signals, that the token starts at the 20th char in the source file and ends on the 23rd.
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
pub struct TokenSpan {
|
||||
start: usize,
|
||||
/// The start of the token span
|
||||
end: usize,
|
||||
}
|
||||
|
||||
/// A Token
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
pub struct Token {
|
||||
/// The token's original location in the source file
|
||||
span: TokenSpan,
|
||||
kind: TokenKind,
|
||||
}
|
||||
|
||||
/// Possibly kinds of tokens
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
pub enum TokenKind {
|
||||
Keyword(Keyword),
|
||||
Identifier(String),
|
||||
Colon,
|
||||
Semicolon,
|
||||
Comma,
|
||||
Arrow,
|
||||
BraceOpen,
|
||||
BraceClose,
|
||||
ParenthesisOpen,
|
||||
ParenthesisClose,
|
||||
}
|
||||
|
||||
/// Keywords used in the language
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
pub enum Keyword {
|
||||
/// Start a namespace declaration
|
||||
#[allow(non_camel_case_types)]
|
||||
nasp,
|
||||
/// Start a function declaration
|
||||
#[allow(non_camel_case_types)]
|
||||
r#fn,
|
||||
}
|
|
@ -0,0 +1,194 @@
|
|||
use crate::lexing::{Keyword, Token, TokenKind, TokenSpan};
|
||||
|
||||
use super::TokenStream;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_lexing_trixy() {
|
||||
let input = "
|
||||
nasp commands {
|
||||
fn expect(event: String) -> String;
|
||||
}
|
||||
";
|
||||
let token_stream = TokenStream::lex(input).unwrap();
|
||||
let expected_token_stream = {
|
||||
let tokens = vec![
|
||||
Token {
|
||||
span: TokenSpan { start: 1, end: 5 },
|
||||
kind: TokenKind::Keyword(Keyword::nasp),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 6, end: 14 },
|
||||
kind: TokenKind::Identifier("commands".to_owned()),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 15, end: 16 },
|
||||
kind: TokenKind::BraceOpen,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 21, end: 23 },
|
||||
kind: TokenKind::Keyword(Keyword::r#fn),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 24, end: 30 },
|
||||
kind: TokenKind::Identifier("expect".to_owned()),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 30, end: 31 },
|
||||
kind: TokenKind::ParenthesisOpen,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 31, end: 36 },
|
||||
kind: TokenKind::Identifier("event".to_owned()),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 36, end: 37 },
|
||||
kind: TokenKind::Colon,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 38, end: 44 },
|
||||
kind: TokenKind::Identifier("String".to_owned()),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 44, end: 45 },
|
||||
kind: TokenKind::ParenthesisClose,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 46, end: 48 },
|
||||
kind: TokenKind::Arrow,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 49, end: 55 },
|
||||
kind: TokenKind::Identifier("String".to_owned()),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 55, end: 56 },
|
||||
kind: TokenKind::Semicolon,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 57, end: 58 },
|
||||
kind: TokenKind::BraceClose,
|
||||
},
|
||||
];
|
||||
TokenStream {
|
||||
tokens,
|
||||
original_file: input.to_owned(),
|
||||
}
|
||||
};
|
||||
assert_eq!(token_stream, expected_token_stream)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_failing_lexing() {
|
||||
let input = "
|
||||
nasp trinitrix {
|
||||
nasp - commands {
|
||||
fn hi(strings: String) -> String;
|
||||
}
|
||||
}
|
||||
";
|
||||
let token_stream = TokenStream::lex(input);
|
||||
eprintln!("{}", token_stream.as_ref().unwrap_err());
|
||||
|
||||
// uncomment the next line to see the error message, without having to remove cargo's output filter
|
||||
// assert!(!token_stream.is_err());
|
||||
assert!(token_stream.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_tokens() {
|
||||
let input = "
|
||||
nasp nasp {{
|
||||
}}
|
||||
";
|
||||
let token_stream = TokenStream::lex(input).unwrap();
|
||||
let expected_token_stream = {
|
||||
let tokens = vec![
|
||||
Token {
|
||||
span: TokenSpan { start: 1, end: 5 },
|
||||
kind: TokenKind::Keyword(Keyword::nasp),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 6, end: 10 },
|
||||
kind: TokenKind::Keyword(Keyword::nasp),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 11, end: 12 },
|
||||
kind: TokenKind::BraceOpen,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 12, end: 13 },
|
||||
kind: TokenKind::BraceOpen,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 14, end: 15 },
|
||||
kind: TokenKind::BraceClose,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 15, end: 16 },
|
||||
kind: TokenKind::BraceClose,
|
||||
},
|
||||
];
|
||||
TokenStream {
|
||||
tokens,
|
||||
original_file: input.to_owned(),
|
||||
}
|
||||
};
|
||||
assert_eq!(token_stream, expected_token_stream)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_comments() {
|
||||
let input = "
|
||||
// Some comment
|
||||
nasp nasp {{
|
||||
|
||||
}}
|
||||
// NOTE(@soispha): We do not support nested multi line comments <2023-12-16>
|
||||
/* Some
|
||||
* multi
|
||||
* line
|
||||
* comment
|
||||
*/
|
||||
";
|
||||
let token_stream = TokenStream::lex(input)
|
||||
.map_err(|e| {
|
||||
eprintln!("{}", e);
|
||||
panic!();
|
||||
})
|
||||
.unwrap();
|
||||
let expected_token_stream = {
|
||||
let tokens = vec![
|
||||
Token {
|
||||
span: TokenSpan { start: 33, end: 37 },
|
||||
kind: TokenKind::Keyword(Keyword::nasp),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 38, end: 42 },
|
||||
kind: TokenKind::Keyword(Keyword::nasp),
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 43, end: 44 },
|
||||
kind: TokenKind::BraceOpen,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 44, end: 45 },
|
||||
kind: TokenKind::BraceOpen,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 55, end: 56 },
|
||||
kind: TokenKind::BraceClose,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 56, end: 57 },
|
||||
kind: TokenKind::BraceClose,
|
||||
},
|
||||
];
|
||||
TokenStream {
|
||||
tokens,
|
||||
original_file: input.to_owned(),
|
||||
}
|
||||
};
|
||||
assert_eq!(token_stream, expected_token_stream)
|
||||
}
|
|
@ -0,0 +1,235 @@
|
|||
// This code is heavily inspired by: https://michael-f-bryan.github.io/static-analyser-in-rust/book/lex.html
|
||||
|
||||
use crate::lexing::{Keyword, TokenSpan};
|
||||
|
||||
use super::{
|
||||
error::{LexingError, SpannedLexingError},
|
||||
Token, TokenKind,
|
||||
};
|
||||
|
||||
pub(super) struct Tokenizer<'a> {
|
||||
current_index: usize,
|
||||
remaining_text: &'a str,
|
||||
original_text: &'a str,
|
||||
}
|
||||
|
||||
impl<'a> Tokenizer<'a> {
|
||||
pub(super) fn new(input: &'a str) -> Self {
|
||||
Self {
|
||||
current_index: 0,
|
||||
remaining_text: input,
|
||||
original_text: input,
|
||||
}
|
||||
}
|
||||
pub(super) fn next_token(&mut self) -> Result<Option<Token>, SpannedLexingError> {
|
||||
self.skip_ignored_tokens();
|
||||
if self.remaining_text.is_empty() {
|
||||
return Ok(None);
|
||||
} else {
|
||||
let start = self.current_index;
|
||||
|
||||
let (token_kind, index) = self.get_next_tokenkind().map_err(|e| {
|
||||
let (line_above, line, line_below, contexted_start, line_number) = {
|
||||
let line_number = self
|
||||
.original_text
|
||||
.chars()
|
||||
.take(start)
|
||||
.filter(|a| a == &'\n')
|
||||
.count();
|
||||
let lines: Vec<_> = self.original_text.lines().collect();
|
||||
|
||||
let line = (*lines
|
||||
.get(line_number)
|
||||
.expect("This should work, as have *at least* one (index = 0) line"))
|
||||
.to_owned();
|
||||
|
||||
let contexted_start = {
|
||||
let matched_line: Vec<_> = self.original_text.match_indices(&line).collect();
|
||||
let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place");
|
||||
debug_assert_eq!(matched_line, &&line);
|
||||
start - index
|
||||
};
|
||||
|
||||
let line_above;
|
||||
if line_number == 0 {
|
||||
// We only have one line, so no line above
|
||||
line_above = "".to_owned();
|
||||
} else {
|
||||
line_above = (*lines
|
||||
.get(line_number - 1)
|
||||
.expect("We checked that this should work"))
|
||||
.to_owned();
|
||||
}
|
||||
|
||||
let line_below;
|
||||
if lines.len() - 1 > line_number {
|
||||
// We have a line after the current line
|
||||
line_below = (*lines
|
||||
.get(line_number + 1)
|
||||
.expect("We checked that this should work"))
|
||||
.to_owned();
|
||||
} else {
|
||||
line_below = "".to_owned();
|
||||
}
|
||||
|
||||
(line_above, line, line_below, contexted_start, line_number)
|
||||
};
|
||||
SpannedLexingError::Error {
|
||||
source: e,
|
||||
start,
|
||||
contexted_start,
|
||||
line_above,
|
||||
line_below,
|
||||
line_number,
|
||||
line,
|
||||
}
|
||||
})?;
|
||||
self.chomp(index); // end - start
|
||||
let end = self.current_index;
|
||||
Ok(Some(Token {
|
||||
span: TokenSpan { start, end },
|
||||
kind: token_kind,
|
||||
}))
|
||||
}
|
||||
}
|
||||
|
||||
fn get_next_tokenkind(&mut self) -> Result<(TokenKind, usize), LexingError> {
|
||||
let next = match self.remaining_text.chars().next() {
|
||||
Some(c) => c,
|
||||
None => return Err(LexingError::UnexpectedEOF),
|
||||
};
|
||||
|
||||
let (tok, length) = match next {
|
||||
'(' => (TokenKind::ParenthesisOpen, 1),
|
||||
')' => (TokenKind::ParenthesisClose, 1),
|
||||
'{' => (TokenKind::BraceOpen, 1),
|
||||
'}' => (TokenKind::BraceClose, 1),
|
||||
':' => (TokenKind::Colon, 1),
|
||||
';' => (TokenKind::Semicolon, 1),
|
||||
',' => (TokenKind::Comma, 1),
|
||||
'-' => tokenize_arrow(self.remaining_text)?,
|
||||
c @ '_' | c if c.is_alphanumeric() => tokenize_ident(self.remaining_text)?,
|
||||
other => return Err(LexingError::UnknownCharacter(other)),
|
||||
};
|
||||
|
||||
Ok((tok, length))
|
||||
}
|
||||
|
||||
/// Skip past any whitespace characters or comments.
|
||||
fn skip_ignored_tokens(&mut self) {
|
||||
loop {
|
||||
let ws = self.skip_whitespace();
|
||||
let comments = self.skip_comments();
|
||||
|
||||
if ws + comments == 0 {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
fn skip_whitespace(&mut self) -> usize {
|
||||
let mut remaining = self.remaining_text;
|
||||
|
||||
// Filter out whitespace
|
||||
let _ws = {
|
||||
let ws = match take_while(remaining, |ch| ch.is_whitespace()) {
|
||||
Ok((_, bytes_skipped)) => bytes_skipped,
|
||||
_ => 0,
|
||||
};
|
||||
remaining = &remaining[ws..];
|
||||
ws
|
||||
};
|
||||
// let comments = skip_comments(remaining);
|
||||
// remaining = &remaining[comments..];
|
||||
|
||||
let skip = self.remaining_text.len() - remaining.len();
|
||||
self.chomp(skip);
|
||||
skip
|
||||
}
|
||||
|
||||
fn skip_comments(&mut self) -> usize {
|
||||
let remaining = self.remaining_text;
|
||||
let pairs = [("//", "\n"), ("/*", "*/")];
|
||||
|
||||
let mut skip = 0;
|
||||
for &(pattern, matcher) in &pairs {
|
||||
if remaining.starts_with(pattern) {
|
||||
let leftovers = skip_until(remaining, matcher);
|
||||
skip = remaining.len() - leftovers.len();
|
||||
break;
|
||||
}
|
||||
}
|
||||
self.chomp(skip);
|
||||
skip
|
||||
}
|
||||
|
||||
fn chomp(&mut self, chars_to_chomp: usize) {
|
||||
self.remaining_text = &self.remaining_text[chars_to_chomp..];
|
||||
self.current_index += chars_to_chomp;
|
||||
}
|
||||
}
|
||||
|
||||
fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> {
|
||||
let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?;
|
||||
|
||||
// Filter out keywords
|
||||
let tokenkind = match got {
|
||||
"nasp" => TokenKind::Keyword(Keyword::nasp),
|
||||
"fn" => TokenKind::Keyword(Keyword::r#fn),
|
||||
other => TokenKind::Identifier(other.to_string()),
|
||||
};
|
||||
|
||||
Ok((tokenkind, chars_read))
|
||||
}
|
||||
|
||||
fn tokenize_arrow(text: &str) -> Result<(TokenKind, usize), LexingError> {
|
||||
let mut chars = text.chars();
|
||||
if let Some(char) = chars.next() {
|
||||
if char == '-' {
|
||||
if let Some(char) = chars.next() {
|
||||
if char == '>' {
|
||||
return Ok((TokenKind::Arrow, 2));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// This is a implicit else as the other if clauses return
|
||||
Err(LexingError::ExpectedArrow)
|
||||
}
|
||||
|
||||
/// Consumes bytes while a predicate evaluates to true.
|
||||
fn take_while<F>(data: &str, mut pred: F) -> Result<(&str, usize), LexingError>
|
||||
where
|
||||
F: FnMut(char) -> bool,
|
||||
{
|
||||
let mut current_index = 0;
|
||||
|
||||
for ch in data.chars() {
|
||||
let should_continue = pred(ch);
|
||||
|
||||
if !should_continue {
|
||||
break;
|
||||
}
|
||||
|
||||
current_index += ch.len_utf8();
|
||||
}
|
||||
|
||||
if current_index == 0 {
|
||||
Err(LexingError::NoMatchesTaken)
|
||||
} else {
|
||||
Ok((&data[..current_index], current_index))
|
||||
}
|
||||
}
|
||||
|
||||
/// Skips input until the remaining string pattern starts with the pattern
|
||||
fn skip_until<'a>(mut src: &'a str, pattern: &str) -> &'a str {
|
||||
while !src.is_empty() && !src.starts_with(pattern) {
|
||||
let next_char_size = src
|
||||
.chars()
|
||||
.next()
|
||||
.expect("The string isn't empty")
|
||||
.len_utf8();
|
||||
src = &src[next_char_size..];
|
||||
}
|
||||
|
||||
&src[pattern.len()..]
|
||||
}
|
|
@ -0,0 +1,58 @@
|
|||
use error::TrixyError;
|
||||
|
||||
use crate::lexing::TokenStream;
|
||||
|
||||
use self::command_spec::CommandSpec;
|
||||
|
||||
mod command_spec;
|
||||
pub mod error;
|
||||
pub mod lexing;
|
||||
|
||||
pub fn parse_trixy_lang(input: &str) -> Result<CommandSpec, TrixyError> {
|
||||
let input_tokens = TokenStream::lex(input)?;
|
||||
|
||||
todo!()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{
|
||||
command_spec::{CommandSpec, Declaration, Genus, NamedType, Namespace, Type},
|
||||
parse_trixy_lang,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_function_with_namespace() {
|
||||
let expected = parse_trixy_lang(
|
||||
"
|
||||
nasp commands {
|
||||
fn say_something(name_to_greet: String, what_to_say: String) -> String;
|
||||
}
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
let correct: CommandSpec = {
|
||||
let declarations = vec![Declaration {
|
||||
namespace: vec![Namespace {
|
||||
name: "commands".to_owned(),
|
||||
}],
|
||||
genus: Genus::Function {
|
||||
name: "say_something".to_owned(),
|
||||
inputs: vec![
|
||||
NamedType {
|
||||
name: "name_to_greet".to_owned(),
|
||||
base: Type::String,
|
||||
},
|
||||
NamedType {
|
||||
name: "what_to_say".to_owned(),
|
||||
base: Type::String,
|
||||
},
|
||||
],
|
||||
output: Type::String,
|
||||
},
|
||||
}];
|
||||
CommandSpec { declarations }
|
||||
};
|
||||
assert_eq!(expected, correct);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
use std::{fs, process::exit};
|
||||
|
||||
use trixy_lang_parser::lexing::TokenStream;
|
||||
|
||||
use std::path::PathBuf;
|
||||
|
||||
use clap::{Parser, Subcommand};
|
||||
|
||||
/// A helper command for the trixy-lang_parser crate
|
||||
#[derive(Parser, Debug)]
|
||||
#[clap(author, version, about, long_about = None)]
|
||||
pub struct Args {
|
||||
#[command(subcommand)]
|
||||
/// The subcommand to execute
|
||||
pub subcommand: Command,
|
||||
}
|
||||
#[derive(Subcommand, Debug)]
|
||||
pub enum Command {
|
||||
#[clap(value_parser)]
|
||||
/// Only try to tokenize the file
|
||||
Tokenize {
|
||||
#[clap(value_parser)]
|
||||
/// The file containing the trixy code to tokenize
|
||||
file: PathBuf,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
let args = Args::parse();
|
||||
match args.subcommand {
|
||||
Command::Tokenize { file } => {
|
||||
let input = fs::read_to_string(file).unwrap();
|
||||
|
||||
let input_tokens = match TokenStream::lex(&input) {
|
||||
Ok(err) => err,
|
||||
Err(ok) => {
|
||||
println!("{}", ok);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
println!("{:#?}", input_tokens);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue