feat(trixy-lang_parser): Add first parser pass

This commit is contained in:
Benedikt Peetz 2023-12-18 18:04:21 +01:00
parent 74efd3eda6
commit 3a65c33b15
Signed by: bpeetz
GPG Key ID: A5E94010C3A642AD
24 changed files with 1127 additions and 226 deletions

View File

@ -2,5 +2,5 @@
/target
/result
# lua_macros is a library
# This crate is a library
Cargo.lock

View File

@ -0,0 +1,6 @@
# trixy-lang_parser
This crate contains a parser (and lexer) for the Trixy language.
The corresponding grammar is in the grammar file [here](./docs/grammar.ebnf) encoded in [Extended Backus-Naur Form](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form).
## Docs
Run `./generate_docs` to turn the grammar file into railroad diagrams.

View File

@ -0,0 +1,17 @@
# (*
# Trixy is fully whitespace independent, this means that you can
# interleave whitespace in the definitions.
# The same applies to comments:
# - Line comments (`// \n`) and
# - Block comments (`/* */`).
# *)
CommandSpec = { Function | Namespace } ;
Function = "fn" Identifier "(" {Identifier ":" Type} ")" [ "->" Type ] ";" ;
Namespace = "nasp" Identifier "{" {Function | Namespace} "}" ;
Type = "String" | "Integer" ; # (* This corresponds to the CommandTransferValue *)
Identifier = CHARACTER { NUMBER | CHARACTER } ;
# (*
# vim: ft=ebnf
# *)

Binary file not shown.

View File

@ -0,0 +1,9 @@
fn print(message: CommandTransferValue);
nasp trinitrix { {}
fn hi honner(name: String) -> String; ;
}
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
// vim: syntax=rust

View File

@ -0,0 +1,126 @@
/// Prints to the output, with a newline.
// HACK(@soispha): The stdlib Lua `print()` function has stdout as output hardcoded,
// redirecting stdout seems too much like a hack thus we are just redefining the print function
// to output to a controlled output. <2023-09-09>
fn print(input: CommandTransferValue);
nasp trinitrix {
/// Language specific functions, which mirror the `trinitrix.api` namespace.
/// That is, if you have to choose between a `std` and a `api` function choose the `std`
/// one as it will most likely be more high-level and easier to use (as it isn't abstracted
/// over multiple languages). Feel free to drop down to the lower level api, if you feel
/// like that more, it should be as stable and user-oriented as the `std` functions
nasp std {}
/// Debug only functions, these are effectively useless
nasp debug {
/// Greets the user
fn greet(input: String) -> String;
/// Returns a table of greeted users
fn greet_multiple() -> Table;
}
/// General API to change stuff in Trinitrix
nasp api {
/// Closes the application
fn exit();
/// Send a message to the current room
/// The send message is interpreted literally.
fn room_message_send(msg: String);
/// Open the help pages at the first occurrence of
/// the input string if it is Some, otherwise open
/// the help pages at the start
fn help(input: Option<String>);
// Register a function to be used with the Trinitrix API
// (This function is actually implemented in the std namespace)
/* fn register_function(function: RawFunction); */
/// Function that change the UI, or UI state
nasp ui {
/// Shows the command line
fn command_line_show();
/// Hides the command line
fn command_line_hide();
/// Go to the next plane
fn cycle_planes();
/// Go to the previous plane
fn cycle_planes_rev();
/// Sets the current app mode to Normal / navigation mode
fn set_mode_normal();
/// Sets the current app mode to Insert / editing mode
fn set_mode_insert();
}
/// Manipulate keymappings, the mode is specified as a String build up of all mode
/// the keymapping should be active in. The mapping works as follows:
/// n => normal Mode
/// c => command Mode
/// i => insert Mode
///
/// The key works in a similar matter, specifying the required keypresses to trigger the
/// callback. For example "aba" for require the user to press "a" then "b" then "a" again
/// to trigger the mapping. Special characters are encoded as follows:
/// "<C-a>ba" => "Ctrl+a" then "b" then "a"
/// "<S-a>" => "A" or "Shift+a"
/// "A" => "A"
/// "<M-a> " => "Alt+a" (<A-a>) or "Meta+a"(<M-a>) (most terminals can't really differentiate between these characters)
/// "a<C-b><C-a>" => "a" then "Ctrl+b" then "Ctrl+a" (also works for Shift, Alt and Super)
/// "<CSM-b>" => "Ctrl+Shift+Alt+b" (the ordering doesn't matter)
/// "a " => "a" then a literal space (" ")
/// "å🙂" => "å" then "🙂" (full Unicode support!)
/// "<ESC>" => escape key
/// "<F3>" => F3 key
/// "<BACKSPACE>" => backspace key (and so forth)
/// "<DASH>" => a literal "-"
/// "<ANGULAR_BRACKET_OPEN>" or "<ABO>" => a literal "<"
/// "<ANGULAR_BRACKET_CLOSE>" or "<ABC>" => a literal ">"
///
/// The callback MUST be registered first by calling
/// `trinitrix.api.register_function()` the returned value can than be used to
/// set the keymap.
nasp keymaps {
/// Add a new keymapping
fn add(mode: String, key: String, callback: Function);
/// Remove a keymapping
///
/// Does nothing, if the keymapping doesn't exists
fn remove((/* mode: */ String, /* key: */ String));
/// List declared keymappings
fn get(mode: String);
}
/// Functions only used internally within Trinitrix
nasp raw {
/// Send an error to the default error output
fn raise_error(input: String);
/// Send output to the default output
/// This is mainly used to display the final
/// output of evaluated lua commands.
fn display_output(input: String);
/// Input a character without checking for possible keymaps
/// If the current state does not expect input, this character is ignored
/// The encoding is the same as in the `trinitrix.api.keymaps` commands
fn send_input_unprocessed(input: String);
/// This namespace is used to store some command specific data (like functions, as
/// ensuring memory locations stay allocated in garbage collected language is hard)
///
/// Treat it as an implementation detail
nasp __private {}
}
}
}
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
// vim: syntax=rust

View File

@ -3,7 +3,9 @@ fn print(message: CommandTransferValue);
nasp trinitrix {
fn hi(name: String) -> String;
}
namespace commands { >-
nasp trinitrix {
fn ho(name: String) -> String;
}

View File

@ -0,0 +1,9 @@
#!/usr/bin/env sh
ebnf2pdf "./docs/grammar.ebnf"
mv out.pdf ./docs/grammar.pdf
# vim: ft=sh

View File

@ -0,0 +1,58 @@
//! This module contains the already type checked types.
//!
//!
use crate::lexing::{Keyword, TokenKind};
pub enum PrimitiveTypes {
String,
/// Nothing
Void,
}
impl From<TokenKind> for Identifier {
fn from(value: TokenKind) -> Self {
match value {
TokenKind::Identifier(ident) => Identifier(ident),
TokenKind::Keyword(_)
| TokenKind::Colon
| TokenKind::Semicolon
| TokenKind::Comma
| TokenKind::Arrow
| TokenKind::BraceOpen
| TokenKind::BraceClose
| TokenKind::ParenOpen
| TokenKind::Dummy
| TokenKind::ParenClose => {
panic!("Tried to convert a non Identifier TokenKind to a Identefier. This is a bug")
}
}
}
}
/// An Identifier
/// These include
/// - Variable names
/// - Function names
/// - Namespace names
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Identifier(String);
impl From<TokenKind> for Keyword {
fn from(value: TokenKind) -> Self {
match value {
TokenKind::Keyword(keyword) => keyword,
TokenKind::Identifier(_)
| TokenKind::Colon
| TokenKind::Semicolon
| TokenKind::Comma
| TokenKind::Arrow
| TokenKind::BraceOpen
| TokenKind::BraceClose
| TokenKind::ParenOpen
| TokenKind::Dummy
| TokenKind::ParenClose => {
panic!("Tried to convert a non Keyword TokenKind to a Keyword. This is a bug")
}
}
}
}

View File

@ -1,36 +1,2 @@
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct CommandSpec {
pub(crate) declarations: Vec<Declaration>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct Declaration {
pub(crate) namespace: Vec<Namespace>,
pub(crate) genus: Genus,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct Namespace {
pub(crate) name: String,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) enum Genus {
Function {
name: String,
inputs: Vec<NamedType>,
output: Type,
},
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct NamedType {
pub(crate) name: String,
pub(crate) base: Type,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) enum Type {
String,
Void,
}
pub mod checked;
pub mod unchecked;

View File

@ -0,0 +1,48 @@
//! This module contains the not type checked types.
//! These are generated on the first pass of the parser, to be later converted into the checked
//! ones.
use crate::lexing::Token;
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct CommandSpec {
pub declarations: Vec<Declaration>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Declaration {
pub namespace: Vec<Token>, // Will later be turned into Namespace
pub genus: Genus,
}
impl Declaration {
pub fn new_function(function: Function, namespace: Vec<Token>) -> Self {
Declaration { namespace, genus: Genus::Function(function) }
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Namespace {
pub name: Token, // Will later become an Identifier
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum Genus {
/// Not actually a genus, but used in parsing to accommodate multiple errors
Dummy,
/// A function
Function(Function),
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Function {
pub identifier: Token, // Will later become an Identifier
pub inputs: Vec<FunctionInput>,
pub output: Option<Token>, // Will later become an Type
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct FunctionInput {
pub name: Token, // Will later become an Identifier
pub r#type: Token, // Will later become an Type
}

View File

@ -1,9 +1,191 @@
use core::fmt;
use thiserror::Error;
use crate::lexing::error::SpannedLexingError;
use crate::lexing::{error::SpannedLexingError, TokenSpan};
#[derive(Error, Debug)]
pub enum TrixyError {
#[error(transparent)]
Parsing(#[from] SpannedLexingError),
}
/// The context of an Error.
#[derive(Debug)]
pub struct ErrorContext {
/// The span of the error in the source file
pub span: TokenSpan,
/// The span of the error in the context line relative to the context line
pub contexted_span: TokenSpan,
/// The line above the error
pub line_above: String,
/// The line below the error
pub line_below: String,
/// The line in which the error occurred
pub line: String,
/// The line number of the main error line
pub line_number: usize,
}
impl ErrorContext {
pub fn from_span(span: TokenSpan, original_file: &str) -> Self {
let line_number = original_file
.chars()
.take(span.start)
.filter(|a| a == &'\n')
.count()
// This is here, as we are missing one newline with the method above
+ 1;
let lines: Vec<_> = original_file.lines().collect();
let line = (*lines
.get(line_number - 1)
.expect("This should work, as have *at least* one (index = 0) line"))
.to_owned();
let contexted_span = {
let matched_line: Vec<_> = original_file.match_indices(&line).collect();
let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place");
debug_assert_eq!(matched_line, &&line);
TokenSpan {
start: span.start - index,
end: span.end - index,
}
};
let line_above;
if line_number == 0 {
// We only have one line, so no line above
line_above = "".to_owned();
} else {
line_above = (*lines
.get((line_number - 1) - 1)
.expect("We checked that this should work"))
.to_owned();
}
let line_below;
if lines.len() - 1 > line_number {
// We have a line after the current line
line_below = (*lines
.get((line_number + 1) - 1)
.expect("We checked that this should work"))
.to_owned();
} else {
line_below = "".to_owned();
}
Self {
span,
contexted_span,
line_above,
line_below,
line,
line_number,
}
}
pub fn from_index(start: usize, orginal_file: &str) -> Self {
let span = TokenSpan {
start,
end: start,
};
Self::from_span(span, orginal_file)
}
pub fn get_error_line(&self, source_error: &str) -> String {
// deconstruct the structure
let ErrorContext {
contexted_span,
line_number,
..
} = self;
let mut output = String::new();
output.push_str("\x1b[92;1m");
// pad to accommodate the line number printing.
// 32 -> needs two spaces padding to print it
line_number.to_string().chars().for_each(|_| {
output.push(' ');
});
// pad to the beginning of the error
for _ in 0..contexted_span.start {
output.push(' ');
}
// push the error markers
for _ in contexted_span.start..contexted_span.end {
output.push('^');
}
// // pad until end of line
// for _ in contexted_span.end..(line.len() - 1) {
// output.push('-');
// }
//
// additional space to avoid having to end with a '-'
output.push(' ');
output.push_str("help: ");
output.push_str(source_error);
output.push_str("\x1b[0m");
output
}
}
pub trait AdditionalHelp {
fn additional_help(&self) -> String;
}
pub trait ErrorContextDisplay: fmt::Display {
type Error;
fn error_fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
where
<Self as ErrorContextDisplay>::Error: std::fmt::Display + AdditionalHelp,
{
let error_line = self
.context()
.get_error_line(&self.source().additional_help());
writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", self.source())?;
if !self.line_above().is_empty() {
writeln!(
f,
"\x1b[32;1m{} |\x1b[0m {}",
self.line_number() - 1,
self.line_above()
)?;
}
writeln!(
f,
"\x1b[36;1m{} |\x1b[0m {}",
self.line_number(),
self.line()
)?;
writeln!(f, " {}", error_line)?;
if !self.line_below().is_empty() {
writeln!(
f,
"\x1b[32;1m{} |\x1b[0m {}",
self.line_number() + 1,
self.line_below()
)
} else {
write!(f, "")
}
}
// getters
fn context(&self) -> &ErrorContext;
fn source(&self) -> &Self::Error;
fn line_number(&self) -> usize;
fn line_above(&self) -> &str;
fn line_below(&self) -> &str;
fn line(&self) -> &str;
}

View File

@ -1,6 +1,8 @@
use std::{error::Error, fmt::Display};
use thiserror::Error;
use crate::error::{AdditionalHelp, ErrorContext, ErrorContextDisplay};
#[derive(Error, Debug)]
pub enum LexingError {
#[error("No matches were found")]
@ -13,88 +15,61 @@ pub enum LexingError {
ExpectedArrow,
}
#[derive(Debug)]
pub enum SpannedLexingError {
Error {
source: LexingError,
/// The starting char index of the error in the source file
start: usize,
/// The starting char index of the error in the context line
contexted_start: usize,
/// The line above the error
line_above: String,
/// The line below the error
line_below: String,
/// The line in which the error occurred
line: String,
/// The line number of the main error line
line_number: usize,
impl AdditionalHelp for LexingError {
fn additional_help(& self) -> String {
let out = match self {
LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(),
LexingError::UnknownCharacter(char) => {
format!("This char: `{char}`; is not a valid token")
},
};
out
}
}
#[derive(Debug)]
pub struct SpannedLexingError {
pub source: LexingError,
pub context: ErrorContext,
}
impl Error for SpannedLexingError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
let Self::Error { source, .. } = self;
Some(source)
Some(&self.source)
}
}
impl ErrorContextDisplay for SpannedLexingError {
type Error = LexingError;
fn context(&self) -> &crate::error::ErrorContext {
&self.context
}
fn line_number(&self) -> usize {
self.context.line_number
}
fn line_above(&self) -> &str {
&self.context.line_above
}
fn line_below(&self) -> &str {
&self.context.line_below
}
fn line(&self) -> &str {
&self.context.line
}
fn source(&self) -> &<SpannedLexingError as ErrorContextDisplay>::Error {
&self.source
}
}
impl Display for SpannedLexingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let Self::Error {
source,
line_above,
line_below,
line,
line_number,
contexted_start,
..
} = self;
let error_line = {
let mut output = String::new();
output.push_str("\x1b[92;1m");
for _ in 0..(*contexted_start) {
output.push(' ');
}
line_number.to_string().chars().for_each(|_| {
output.push(' ');
});
output.push('^');
for _ in *contexted_start..(line.len() - 1) {
output.push('-');
}
output.push(' ');
let appandig_str = match source {
LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
LexingError::UnknownCharacter(char) => format!("This char: `{char}`; is not a valid token"),
LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(),
};
output.push_str(&appandig_str);
output.push_str("\x1b[0m");
output
};
writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", source)?;
if !line_above.is_empty() {
writeln!(
f,
"\x1b[32;1m{} |\x1b[0m {}",
line_number - 1,
line_above
)?;
}
writeln!(f, "\x1b[36;1m{} |\x1b[0m {}", line_number, line)?;
writeln!(f, " {}", error_line)?;
if !line_below.is_empty() {
writeln!(
f,
"\x1b[32;1m{} |\x1b[0m {}",
line_number + 1,
line_below
)
} else {
write!(f, "")
}
self.error_fmt(f)
}
}

View File

@ -1,3 +1,5 @@
use std::fmt::Display;
use self::{error::SpannedLexingError, tokenizer::Tokenizer};
pub mod error;
@ -8,7 +10,7 @@ mod test;
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
pub struct TokenStream {
original_file: String,
pub original_file: String,
tokens: Vec<Token>,
}
@ -31,34 +33,83 @@ impl TokenStream {
original_file: src.to_owned(),
})
}
/// Get a token by index
pub fn get(&self, index: usize) -> Option<&Token> {
self.tokens.get(index)
}
/// Get a reference to the uppermost token, without modifying the token list
pub fn peek(&self) -> &Token {
self.tokens.last().expect("This should not be emtpy")
}
/// Remove to the uppermost token
pub fn pop(&mut self) -> Token {
self.tokens.pop().expect("This should not be emtpy")
}
/// Reverses the underlying tokes vector
/// This is facilitates using the pop and peek methods to parse the tokens from the beginning,
/// not the end
pub fn reverse(&mut self) {
self.tokens.reverse()
}
/// Check if the TokenStream is empty.
pub fn is_empty(&self) -> bool {
self.tokens.is_empty()
}
}
/// A token span is recorded in chars starting from the beginning of the file:
/// A token span like this, for example:
/// ```no_run
/// ```dont_run
///# use trixy_lang_parser::lexing::TokenSpan;
/// TokenSpan {
/// start: 20,
/// end: 23,
/// }
/// ```
/// signals, that the token starts at the 20th char in the source file and ends on the 23rd.
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
pub struct TokenSpan {
start: usize,
/// The start of the token span
end: usize,
pub start: usize,
/// The end of the token span
pub end: usize,
}
/// A Token
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone)]
pub struct Token {
/// The token's original location in the source file
span: TokenSpan,
kind: TokenKind,
pub span: TokenSpan,
pub kind: TokenKind,
}
impl Token {
/// Return the TokenKind of a token
pub fn kind(&self) -> &TokenKind {
&self.kind
}
/// Return the TokenSpan of a token
pub fn span(&self) -> &TokenSpan {
&self.span
}
/// Get a dummy token, this is intended for error handling
pub fn get_dummy() -> Token {
Self {
span: TokenSpan { start: 0, end: 0 },
kind: TokenKind::Dummy,
}
}
}
/// Possibly kinds of tokens
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum TokenKind {
Keyword(Keyword),
Identifier(String),
@ -68,12 +119,49 @@ pub enum TokenKind {
Arrow,
BraceOpen,
BraceClose,
ParenthesisOpen,
ParenthesisClose,
ParenOpen,
ParenClose,
/// This is not a real TokenKind, but only used for error handling
Dummy,
}
impl TokenKind {
pub fn same_kind(&self, other: &TokenKind) -> bool {
if let TokenKind::Identifier(_) = self {
if let TokenKind::Identifier(_) = other {
return true;
}
}
self == other
}
}
impl Display for TokenKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenKind::Keyword(word) => write!(f, "KEYWORD({})", word),
TokenKind::Identifier(ident) => {
if ident == "" {
write!(f, "IDENTIFIER")
} else {
write!(f, "IDENTIFIER({})", ident)
}
}
TokenKind::Colon => f.write_str("COLON"),
TokenKind::Semicolon => f.write_str("SEMICOLON"),
TokenKind::Comma => f.write_str("COMMA"),
TokenKind::Arrow => f.write_str("ARROW"),
TokenKind::BraceOpen => f.write_str("BRACEOPEN"),
TokenKind::BraceClose => f.write_str("BRACECLOSE"),
TokenKind::ParenOpen => f.write_str("PARENOPEN"),
TokenKind::ParenClose => f.write_str("PARENCLOSE"),
TokenKind::Dummy => f.write_str("DUMMY"),
}
}
}
/// Keywords used in the language
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
pub enum Keyword {
/// Start a namespace declaration
#[allow(non_camel_case_types)]
@ -82,3 +170,85 @@ pub enum Keyword {
#[allow(non_camel_case_types)]
r#fn,
}
impl Display for Keyword {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Keyword::nasp => f.write_str("nasp"),
Keyword::r#fn => f.write_str("fn"),
}
}
}
/// Shorthand macro for generating a token from *anything* which can be
/// converted into a `TokenKind`, or any of the `TokenKind` variants.
///
/// # Examples
///
/// ```
/// use trixy_lang_parser::token;
/// # fn main() {
/// token![nasp];
/// token![;];
/// token![Arrow];
/// # }
/// ```
#[macro_export]
macro_rules! token {
[Semicolon] => { $crate::lexing::TokenKind::Semicolon };
[;] => { $crate::lexing::TokenKind::Semicolon };
[Colon] => { $crate::lexing::TokenKind::Colon };
[:] => { $crate::lexing::TokenKind::Colon };
[Comma] => { $crate::lexing::TokenKind::Comma };
[,] => { $crate::lexing::TokenKind::Comma };
[Arrow] => { $crate::lexing::TokenKind::Arrow };
[->] => { $crate::lexing::TokenKind::Arrow };
[BraceOpen] => { $crate::lexing::TokenKind::BraceOpen };
// [{] => { $crate::lexing::TokenKind::BraceOpen };
[BraceClose] => { $crate::lexing::TokenKind::BraceClose };
// [}] => { $crate::lexing::TokenKind::BraceClose };
[ParenOpen] => { $crate::lexing::TokenKind::ParenOpen };
// [(] => { $crate::lexing::TokenKind::ParenthesisOpen };
[ParenClose] => { $crate::lexing::TokenKind::ParenClose };
// [)] => { $crate::lexing::TokenKind::ParenthesisClose };
[nasp] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::nasp) };
[fn] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#fn) };
// This is only works for checking for a identifier
// see the `same_kind` method on TokenKind
[Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
[Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
}
#[cfg(test)]
mod tests {
use super::TokenKind;
use crate::token;
macro_rules! token_macro_test {
($name:ident, $from:tt, => $to:expr) => {
#[test]
fn $name() {
let got: TokenKind = token![$from];
let should_be = $to;
assert_eq!(got, should_be);
}
};
($name:ident, $from:tt, => $to:expr) => {
#[test]
fn $name() {
let got: TokenKind = token![$from];
let should_be = $to;
assert_eq!(got, should_be);
}
};
}
token_macro_test!(tok_expands_to_arrow, ->, => TokenKind::Arrow);
token_macro_test!(tok_expands_to_semicolon, Semicolon, => TokenKind::Semicolon);
token_macro_test!(tok_expands_to_nasp, nasp, => TokenKind::Keyword(crate::lexing::Keyword::nasp));
token_macro_test!(tok_expands_to_fn, fn, => TokenKind::Keyword(crate::lexing::Keyword::r#fn));
}

View File

@ -36,7 +36,7 @@ nasp commands {
},
Token {
span: TokenSpan { start: 30, end: 31 },
kind: TokenKind::ParenthesisOpen,
kind: TokenKind::ParenOpen,
},
Token {
span: TokenSpan { start: 31, end: 36 },
@ -52,7 +52,7 @@ nasp commands {
},
Token {
span: TokenSpan { start: 44, end: 45 },
kind: TokenKind::ParenthesisClose,
kind: TokenKind::ParenClose,
},
Token {
span: TokenSpan { start: 46, end: 48 },

View File

@ -1,6 +1,9 @@
// This code is heavily inspired by: https://michael-f-bryan.github.io/static-analyser-in-rust/book/lex.html
use crate::lexing::{Keyword, TokenSpan};
use crate::{
error::ErrorContext,
lexing::{Keyword, TokenSpan},
};
use super::{
error::{LexingError, SpannedLexingError},
@ -29,61 +32,11 @@ impl<'a> Tokenizer<'a> {
let start = self.current_index;
let (token_kind, index) = self.get_next_tokenkind().map_err(|e| {
let (line_above, line, line_below, contexted_start, line_number) = {
let line_number = self
.original_text
.chars()
.take(start)
.filter(|a| a == &'\n')
.count();
let lines: Vec<_> = self.original_text.lines().collect();
let context = ErrorContext::from_index(start, self.original_text);
let line = (*lines
.get(line_number)
.expect("This should work, as have *at least* one (index = 0) line"))
.to_owned();
let contexted_start = {
let matched_line: Vec<_> = self.original_text.match_indices(&line).collect();
let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place");
debug_assert_eq!(matched_line, &&line);
start - index
};
let line_above;
if line_number == 0 {
// We only have one line, so no line above
line_above = "".to_owned();
} else {
line_above = (*lines
.get(line_number - 1)
.expect("We checked that this should work"))
.to_owned();
}
let line_below;
if lines.len() - 1 > line_number {
// We have a line after the current line
line_below = (*lines
.get(line_number + 1)
.expect("We checked that this should work"))
.to_owned();
} else {
line_below = "".to_owned();
}
(line_above, line, line_below, contexted_start, line_number)
};
SpannedLexingError::Error {
source: e,
start,
contexted_start,
line_above,
line_below,
line_number,
line,
}
SpannedLexingError { source: e, context }
})?;
self.chomp(index); // end - start
let end = self.current_index;
Ok(Some(Token {
@ -100,8 +53,8 @@ impl<'a> Tokenizer<'a> {
};
let (tok, length) = match next {
'(' => (TokenKind::ParenthesisOpen, 1),
')' => (TokenKind::ParenthesisClose, 1),
'(' => (TokenKind::ParenOpen, 1),
')' => (TokenKind::ParenClose, 1),
'{' => (TokenKind::BraceOpen, 1),
'}' => (TokenKind::BraceClose, 1),
':' => (TokenKind::Colon, 1),

View File

@ -2,11 +2,12 @@ use error::TrixyError;
use crate::lexing::TokenStream;
use self::command_spec::CommandSpec;
use self::command_spec::unchecked::CommandSpec;
mod command_spec;
pub mod error;
pub mod lexing;
pub mod parsing;
pub fn parse_trixy_lang(input: &str) -> Result<CommandSpec, TrixyError> {
let input_tokens = TokenStream::lex(input)?;
@ -14,45 +15,45 @@ pub fn parse_trixy_lang(input: &str) -> Result<CommandSpec, TrixyError> {
todo!()
}
#[cfg(test)]
mod test {
use crate::{
command_spec::{CommandSpec, Declaration, Genus, NamedType, Namespace, Type},
parse_trixy_lang,
};
#[test]
fn test_function_with_namespace() {
let expected = parse_trixy_lang(
"
nasp commands {
fn say_something(name_to_greet: String, what_to_say: String) -> String;
}
",
)
.unwrap();
let correct: CommandSpec = {
let declarations = vec![Declaration {
namespace: vec![Namespace {
name: "commands".to_owned(),
}],
genus: Genus::Function {
name: "say_something".to_owned(),
inputs: vec![
NamedType {
name: "name_to_greet".to_owned(),
base: Type::String,
},
NamedType {
name: "what_to_say".to_owned(),
base: Type::String,
},
],
output: Type::String,
},
}];
CommandSpec { declarations }
};
assert_eq!(expected, correct);
}
}
// #[cfg(test)]
// mod test {
// use crate::{
// command_spec::unchecked::{CommandSpec, Declaration, Genus, Namespace},
// parse_trixy_lang,
// };
//
// #[test]
// fn test_function_with_namespace() {
// let expected = parse_trixy_lang(
// "
// nasp commands {
// fn say_something(name_to_greet: String, what_to_say: String) -> String;
// }
// ",
// )
// .unwrap();
// let correct: CommandSpec = {
// let declarations = vec![Declaration {
// namespace: vec![Namespace {
// name: "commands".to_owned(),
// }],
// genus: Genus::Function {
// name: "say_something".to_owned(),
// inputs: vec![
// NamedType {
// name: "name_to_greet".to_owned(),
// base: Type::String,
// },
// NamedType {
// name: "what_to_say".to_owned(),
// base: Type::String,
// },
// ],
// output: Type::String,
// },
// }];
// CommandSpec { declarations }
// };
// assert_eq!(expected, correct);
// }
// }

View File

@ -23,6 +23,11 @@ pub enum Command {
/// The file containing the trixy code to tokenize
file: PathBuf,
},
Parse {
#[clap(value_parser)]
/// The file containing the trixy code to parse
file: PathBuf,
},
}
pub fn main() {
@ -34,12 +39,34 @@ pub fn main() {
let input_tokens = match TokenStream::lex(&input) {
Ok(err) => err,
Err(ok) => {
println!("{}", ok);
eprintln!("{}", ok);
exit(1);
}
};
println!("{:#?}", input_tokens);
}
Command::Parse { file } => {
let input = fs::read_to_string(file).unwrap();
let input_tokens = match TokenStream::lex(&input) {
Ok(ok) => ok,
Err(err) => {
eprintln!("Error while tokenizing:");
eprintln!("{}", err);
exit(1);
}
};
let parsed = match input_tokens.parse_unchecked() {
Ok(ok) => ok,
Err(err) => {
eprintln!("Error while doing the first (unchecked) parsing run:");
eprintln!("{}", err);
exit(1)
}
};
println!("{:#?}", parsed);
}
}
}

View File

@ -0,0 +1,93 @@
use std::{error::Error, fmt::Display};
use thiserror::Error;
use crate::{
error::{AdditionalHelp, ErrorContext, ErrorContextDisplay},
lexing::{TokenKind, TokenSpan},
};
#[derive(Error, Debug)]
pub enum ParsingError {
#[error("Expected '{expected}' but received '{actual}'")]
ExpectedDifferentToken {
expected: TokenKind,
actual: TokenKind,
span: TokenSpan,
},
#[error("Expected a Keyword to start a new declaration, but found: '{actual}'")]
ExpectedKeyword { actual: TokenKind, span: TokenSpan },
}
impl ParsingError {
pub fn get_span(&self) -> TokenSpan {
match self {
ParsingError::ExpectedDifferentToken { span, .. } => *span,
ParsingError::ExpectedKeyword { span, .. } => *span,
}
}
}
impl AdditionalHelp for ParsingError {
fn additional_help(&self) -> String {
match self {
ParsingError::ExpectedDifferentToken {
expected,
actual,
..
} => format!(
"I expected a '{}' here, but you put a '{}' there!",
expected, actual
),
ParsingError::ExpectedKeyword { actual, .. } => format!(
"I expected a keyword (that is something like 'fn' or 'nasp') but you put a '{}' there!",
actual),
}
}
}
#[derive(Debug)]
pub struct SpannedParsingError {
pub source: ParsingError,
pub context: ErrorContext,
}
impl Error for SpannedParsingError {
fn source(&self) -> Option<&(dyn Error + 'static)> {
Some(&self.source)
}
}
impl Display for SpannedParsingError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
self.error_fmt(f)
}
}
impl ErrorContextDisplay for SpannedParsingError {
type Error = ParsingError;
fn context(&self) -> &crate::error::ErrorContext {
&self.context
}
fn line_number(&self) -> usize {
self.context.line_number
}
fn line_above(&self) -> &str {
&self.context.line_above
}
fn line_below(&self) -> &str {
&self.context.line_below
}
fn line(&self) -> &str {
&self.context.line
}
fn source(&self) -> &<SpannedParsingError as ErrorContextDisplay>::Error {
&self.source
}
}

View File

@ -0,0 +1,4 @@
mod error;
mod unchecked;
#[cfg(test)]
mod test;

View File

@ -0,0 +1,88 @@
use crate::{
command_spec::unchecked::{CommandSpec, Declaration, Function, FunctionInput, Genus},
lexing::{Token, TokenKind, TokenSpan, TokenStream},
};
use super::error::ParsingError;
use pretty_assertions::assert_eq;
#[test]
fn test_failing() {
let input = "
fn print(message: CommandTransferValue);
nasp trinitrix { {}
fn hi honner(name: String) -> String; ;
}
";
let parsed = TokenStream::lex(input).unwrap().parse_unchecked();
let err = parsed.unwrap_err().source;
match err {
ParsingError::ExpectedDifferentToken { .. } => panic!("Wrong error"),
ParsingError::ExpectedKeyword { .. } => {}
}
}
#[test]
fn test_full() {
let input = "fn print(message: CommandTransferValue);
nasp trinitrix {
fn hi(name: String) -> String;
}
";
let parsed = TokenStream::lex(input).unwrap().parse_unchecked().unwrap();
let expected = CommandSpec {
declarations: vec![
Declaration {
namespace: vec![],
genus: Genus::Function(Function {
identifier: Token {
span: TokenSpan { start: 3, end: 8 },
kind: TokenKind::Identifier("print".to_owned()),
},
inputs: vec![FunctionInput {
name: Token {
span: TokenSpan { start: 9, end: 16 },
kind: TokenKind::Identifier("message".to_owned()),
},
r#type: Token {
span: TokenSpan { start: 18, end: 38 },
kind: TokenKind::Identifier("CommandTransferValue".to_owned()),
},
}],
output: None,
}),
},
Declaration {
namespace: vec![Token {
span: TokenSpan { start: 47, end: 56 },
kind: TokenKind::Identifier("trinitrix".to_owned()),
}],
genus: Genus::Function(Function {
identifier: Token {
span: TokenSpan { start: 66, end: 68 },
kind: TokenKind::Identifier("hi".to_owned()),
},
inputs: vec![FunctionInput {
name: Token {
span: TokenSpan { start: 69, end: 73 },
kind: TokenKind::Identifier("name".to_owned()),
},
r#type: Token {
span: TokenSpan { start: 75, end: 81 },
kind: TokenKind::Identifier("String".to_owned()),
},
}],
output: Some(Token {
span: TokenSpan { start: 86, end: 92 },
kind: TokenKind::Identifier("String".to_owned()),
}),
}),
},
],
};
assert_eq!(parsed, expected);
}

View File

@ -0,0 +1,167 @@
use crate::{
command_spec::unchecked::{CommandSpec, Declaration, Function, FunctionInput},
error::ErrorContext,
lexing::{Token, TokenKind, TokenStream},
token,
};
use super::error::{ParsingError, SpannedParsingError};
impl TokenStream {
pub fn parse_unchecked(self) -> Result<CommandSpec, SpannedParsingError> {
let mut parser = Parser::new(self);
parser.parse()
}
}
pub(super) struct Parser {
token_stream: TokenStream,
current_namespaces: Vec<Token>, // This should in the second pass turn into Identifiers
}
impl Parser {
fn new(mut token_stream: TokenStream) -> Self {
token_stream.reverse();
Self {
token_stream,
current_namespaces: vec![],
}
}
fn parse(&mut self) -> Result<CommandSpec, SpannedParsingError> {
let mut declarations = vec![];
while !self.token_stream.is_empty() {
let mut next = self.parse_next().map_err(|err| {
let span = err.get_span();
SpannedParsingError {
source: err,
context: ErrorContext::from_span(span, &self.token_stream.original_file),
}
})?;
declarations.append(&mut next);
}
Ok(CommandSpec { declarations })
}
fn parse_next(&mut self) -> Result<Vec<Declaration>, ParsingError> {
match self.peek().kind() {
token![nasp] => Ok(self.parse_namespace()?),
token![fn] => Ok(vec![Declaration::new_function(
self.parse_function()?,
self.current_namespaces.clone(),
)]),
_ => {
let err = ParsingError::ExpectedKeyword {
span: *self.peek().span(),
actual: self.peek().kind().clone(),
};
return Err(err);
}
}
}
fn parse_namespace(&mut self) -> Result<Vec<Declaration>, ParsingError> {
self.expect(token![nasp])?;
let namespace_name = self.expect(token![Ident])?;
self.current_namespaces.push(namespace_name);
self.expect(token![BraceOpen])?;
let mut declarations = vec![];
while !self.expect_peek(token![BraceClose]) {
declarations.append(&mut self.parse_next()?);
}
self.expect(token![BraceClose])?;
self.current_namespaces.pop();
Ok(declarations)
}
fn parse_function(&mut self) -> Result<Function, ParsingError> {
self.expect(token![fn])?;
let name = self.expect(token![Ident])?;
self.expect(token![ParenOpen])?;
let mut inputs = vec![];
while self.expect_peek(token![Ident]) {
let input_name = self.expect(token![Ident])?;
self.expect(token![Colon])?;
let input_type = self.expect(token![Ident])?;
inputs.push(FunctionInput {
name: input_name,
r#type: input_type,
})
}
self.expect(token![ParenClose])?;
let mut output_type = None;
if self.expect_peek(token![->]) {
self.expect(token![->])?;
output_type = Some(self.expect(token![Ident])?);
}
self.expect(token![;])?;
Ok(Function {
identifier: name,
inputs,
output: output_type,
})
}
/// Expect a token in the next input position:
/// For example:
///
/// ```dont_run
/// use trixy_lang_parser::{
/// lexing::{Keyword, TokenKind, TokenStream},
/// parsing::unchecked::Parser,
/// token,
/// };
///
/// # fn main() {
/// let token_stream = TokenStream::lex("nasp {}").unwrap();
/// let parser = Parser::new(token_stream);
/// assert_eq!(parser.expect(token![nasp]).unwrap(), TokenKind::Keyword(Keyword::nasp));
/// assert_eq!(parser.expect(token![BraceOpen]).unwrap(), TokenKind::BraceOpen);
/// assert_eq!(parser.expect(token![BraceClose]).unwrap(), TokenKind::BraceClose);
/// assert!(parser.expect(token![BraceClose]).is_err());
/// # }
/// ```
///
pub(super) fn expect(&mut self, token: TokenKind) -> Result<Token, ParsingError> {
let actual_token = self.peek();
if actual_token.kind().same_kind(&token) {
Ok(self.pop())
} else {
let err = ParsingError::ExpectedDifferentToken {
expected: token,
actual: actual_token.kind().clone(),
span: *actual_token.span(),
};
Err(err)
}
}
/// Check if the next token is of the specified TokenKind.
/// Does not alter the token_stream
fn expect_peek(&self, token: TokenKind) -> bool {
let actual_token = self.peek();
if actual_token.kind().same_kind(&token) {
true
} else {
false
}
}
/// Looks at the next token without removing it
fn peek(&self) -> &Token {
self.token_stream.peek()
}
/// Removes the next token
fn pop(&mut self) -> Token {
self.token_stream.pop()
}
}