diff --git a/trixy/trixy-lang_parser/.gitignore b/trixy/trixy-lang_parser/.gitignore index 72fc7e3..20c0ba9 100644 --- a/trixy/trixy-lang_parser/.gitignore +++ b/trixy/trixy-lang_parser/.gitignore @@ -2,5 +2,5 @@ /target /result -# lua_macros is a library +# This crate is a library Cargo.lock diff --git a/trixy/trixy-lang_parser/README.md b/trixy/trixy-lang_parser/README.md new file mode 100644 index 0000000..0a6f6bb --- /dev/null +++ b/trixy/trixy-lang_parser/README.md @@ -0,0 +1,6 @@ +# trixy-lang_parser +This crate contains a parser (and lexer) for the Trixy language. +The corresponding grammar is in the grammar file [here](./docs/grammar.ebnf) encoded in [Extended Backus-Naur Form](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form). + +## Docs +Run `./generate_docs` to turn the grammar file into railroad diagrams. diff --git a/trixy/trixy-lang_parser/docs/grammar.ebnf b/trixy/trixy-lang_parser/docs/grammar.ebnf new file mode 100644 index 0000000..749149c --- /dev/null +++ b/trixy/trixy-lang_parser/docs/grammar.ebnf @@ -0,0 +1,17 @@ +# (* +# Trixy is fully whitespace independent, this means that you can +# interleave whitespace in the definitions. +# The same applies to comments: +# - Line comments (`// \n`) and +# - Block comments (`/* */`). +# *) + +CommandSpec = { Function | Namespace } ; +Function = "fn" Identifier "(" {Identifier ":" Type} ")" [ "->" Type ] ";" ; +Namespace = "nasp" Identifier "{" {Function | Namespace} "}" ; +Type = "String" | "Integer" ; # (* This corresponds to the CommandTransferValue *) +Identifier = CHARACTER { NUMBER | CHARACTER } ; + +# (* +# vim: ft=ebnf +# *) diff --git a/trixy/trixy-lang_parser/docs/grammar.pdf b/trixy/trixy-lang_parser/docs/grammar.pdf new file mode 100644 index 0000000..54cad09 Binary files /dev/null and b/trixy/trixy-lang_parser/docs/grammar.pdf differ diff --git a/trixy/trixy-lang_parser/example/empty.tri b/trixy/trixy-lang_parser/example/empty.tri new file mode 100644 index 0000000..e69de29 diff --git a/trixy/trixy-lang_parser/example/failing.tri b/trixy/trixy-lang_parser/example/failing.tri new file mode 100644 index 0000000..7227248 --- /dev/null +++ b/trixy/trixy-lang_parser/example/failing.tri @@ -0,0 +1,9 @@ +fn print(message: CommandTransferValue); + +nasp trinitrix { {} + fn hi honner(name: String) -> String; ; +} + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy/trixy-lang_parser/example/full.tri b/trixy/trixy-lang_parser/example/full.tri new file mode 100644 index 0000000..d0ca0b1 --- /dev/null +++ b/trixy/trixy-lang_parser/example/full.tri @@ -0,0 +1,126 @@ +/// Prints to the output, with a newline. +// HACK(@soispha): The stdlib Lua `print()` function has stdout as output hardcoded, +// redirecting stdout seems too much like a hack thus we are just redefining the print function +// to output to a controlled output. <2023-09-09> +fn print(input: CommandTransferValue); + +nasp trinitrix { + /// Language specific functions, which mirror the `trinitrix.api` namespace. + /// That is, if you have to choose between a `std` and a `api` function choose the `std` + /// one as it will most likely be more high-level and easier to use (as it isn't abstracted + /// over multiple languages). Feel free to drop down to the lower level api, if you feel + /// like that more, it should be as stable and user-oriented as the `std` functions + nasp std {} + + /// Debug only functions, these are effectively useless + nasp debug { + /// Greets the user + fn greet(input: String) -> String; + + /// Returns a table of greeted users + fn greet_multiple() -> Table; + } + + /// General API to change stuff in Trinitrix + nasp api { + /// Closes the application + fn exit(); + + /// Send a message to the current room + /// The send message is interpreted literally. + fn room_message_send(msg: String); + + /// Open the help pages at the first occurrence of + /// the input string if it is Some, otherwise open + /// the help pages at the start + fn help(input: Option); + + // Register a function to be used with the Trinitrix API + // (This function is actually implemented in the std namespace) + /* fn register_function(function: RawFunction); */ + + /// Function that change the UI, or UI state + nasp ui { + /// Shows the command line + fn command_line_show(); + + /// Hides the command line + fn command_line_hide(); + + /// Go to the next plane + fn cycle_planes(); + /// Go to the previous plane + fn cycle_planes_rev(); + + /// Sets the current app mode to Normal / navigation mode + fn set_mode_normal(); + /// Sets the current app mode to Insert / editing mode + fn set_mode_insert(); + } + + /// Manipulate keymappings, the mode is specified as a String build up of all mode + /// the keymapping should be active in. The mapping works as follows: + /// n => normal Mode + /// c => command Mode + /// i => insert Mode + /// + /// The key works in a similar matter, specifying the required keypresses to trigger the + /// callback. For example "aba" for require the user to press "a" then "b" then "a" again + /// to trigger the mapping. Special characters are encoded as follows: + /// "ba" => "Ctrl+a" then "b" then "a" + /// "" => "A" or "Shift+a" + /// "A" => "A" + /// " " => "Alt+a" () or "Meta+a"() (most terminals can't really differentiate between these characters) + /// "a" => "a" then "Ctrl+b" then "Ctrl+a" (also works for Shift, Alt and Super) + /// "" => "Ctrl+Shift+Alt+b" (the ordering doesn't matter) + /// "a " => "a" then a literal space (" ") + /// "å🙂" => "å" then "🙂" (full Unicode support!) + /// "" => escape key + /// "" => F3 key + /// "" => backspace key (and so forth) + /// "" => a literal "-" + /// "" or "" => a literal "<" + /// "" or "" => a literal ">" + /// + /// The callback MUST be registered first by calling + /// `trinitrix.api.register_function()` the returned value can than be used to + /// set the keymap. + nasp keymaps { + /// Add a new keymapping + fn add(mode: String, key: String, callback: Function); + + /// Remove a keymapping + /// + /// Does nothing, if the keymapping doesn't exists + fn remove((/* mode: */ String, /* key: */ String)); + + /// List declared keymappings + fn get(mode: String); + } + + /// Functions only used internally within Trinitrix + nasp raw { + /// Send an error to the default error output + fn raise_error(input: String); + + /// Send output to the default output + /// This is mainly used to display the final + /// output of evaluated lua commands. + fn display_output(input: String); + + /// Input a character without checking for possible keymaps + /// If the current state does not expect input, this character is ignored + /// The encoding is the same as in the `trinitrix.api.keymaps` commands + fn send_input_unprocessed(input: String); + + /// This namespace is used to store some command specific data (like functions, as + /// ensuring memory locations stay allocated in garbage collected language is hard) + /// + /// Treat it as an implementation detail + nasp __private {} + } + } +} + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy/trixy-lang_parser/example/example_simple.tri b/trixy/trixy-lang_parser/example/multiple.tri similarity index 80% rename from trixy/trixy-lang_parser/example/example_simple.tri rename to trixy/trixy-lang_parser/example/multiple.tri index 8cdb691..a0d01ad 100644 --- a/trixy/trixy-lang_parser/example/example_simple.tri +++ b/trixy/trixy-lang_parser/example/multiple.tri @@ -3,7 +3,9 @@ fn print(message: CommandTransferValue); nasp trinitrix { fn hi(name: String) -> String; } -namespace commands { >- + +nasp trinitrix { + fn ho(name: String) -> String; } diff --git a/trixy/trixy-lang_parser/example/example.tri b/trixy/trixy-lang_parser/example/simple.tri similarity index 100% rename from trixy/trixy-lang_parser/example/example.tri rename to trixy/trixy-lang_parser/example/simple.tri diff --git a/trixy/trixy-lang_parser/generate_docs b/trixy/trixy-lang_parser/generate_docs new file mode 100755 index 0000000..e48d336 --- /dev/null +++ b/trixy/trixy-lang_parser/generate_docs @@ -0,0 +1,9 @@ +#!/usr/bin/env sh + + + +ebnf2pdf "./docs/grammar.ebnf" +mv out.pdf ./docs/grammar.pdf + + +# vim: ft=sh diff --git a/trixy/trixy-lang_parser/src/command_spec/checked.rs b/trixy/trixy-lang_parser/src/command_spec/checked.rs new file mode 100644 index 0000000..c47bf73 --- /dev/null +++ b/trixy/trixy-lang_parser/src/command_spec/checked.rs @@ -0,0 +1,58 @@ +//! This module contains the already type checked types. +//! +//! + +use crate::lexing::{Keyword, TokenKind}; +pub enum PrimitiveTypes { + String, + /// Nothing + Void, +} + +impl From for Identifier { + fn from(value: TokenKind) -> Self { + match value { + TokenKind::Identifier(ident) => Identifier(ident), + TokenKind::Keyword(_) + | TokenKind::Colon + | TokenKind::Semicolon + | TokenKind::Comma + | TokenKind::Arrow + | TokenKind::BraceOpen + | TokenKind::BraceClose + | TokenKind::ParenOpen + | TokenKind::Dummy + | TokenKind::ParenClose => { + panic!("Tried to convert a non Identifier TokenKind to a Identefier. This is a bug") + } + } + } +} + +/// An Identifier +/// These include +/// - Variable names +/// - Function names +/// - Namespace names +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Identifier(String); + +impl From for Keyword { + fn from(value: TokenKind) -> Self { + match value { + TokenKind::Keyword(keyword) => keyword, + TokenKind::Identifier(_) + | TokenKind::Colon + | TokenKind::Semicolon + | TokenKind::Comma + | TokenKind::Arrow + | TokenKind::BraceOpen + | TokenKind::BraceClose + | TokenKind::ParenOpen + | TokenKind::Dummy + | TokenKind::ParenClose => { + panic!("Tried to convert a non Keyword TokenKind to a Keyword. This is a bug") + } + } + } +} diff --git a/trixy/trixy-lang_parser/src/command_spec/mod.rs b/trixy/trixy-lang_parser/src/command_spec/mod.rs index 2832a12..1bf868c 100644 --- a/trixy/trixy-lang_parser/src/command_spec/mod.rs +++ b/trixy/trixy-lang_parser/src/command_spec/mod.rs @@ -1,36 +1,2 @@ -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct CommandSpec { - pub(crate) declarations: Vec, -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct Declaration { - pub(crate) namespace: Vec, - pub(crate) genus: Genus, -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct Namespace { - pub(crate) name: String, -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) enum Genus { - Function { - name: String, - inputs: Vec, - output: Type, - }, -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) struct NamedType { - pub(crate) name: String, - pub(crate) base: Type, -} - -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub(crate) enum Type { - String, - Void, -} +pub mod checked; +pub mod unchecked; diff --git a/trixy/trixy-lang_parser/src/command_spec/unchecked.rs b/trixy/trixy-lang_parser/src/command_spec/unchecked.rs new file mode 100644 index 0000000..ec2fa66 --- /dev/null +++ b/trixy/trixy-lang_parser/src/command_spec/unchecked.rs @@ -0,0 +1,48 @@ +//! This module contains the not type checked types. +//! These are generated on the first pass of the parser, to be later converted into the checked +//! ones. + +use crate::lexing::Token; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct CommandSpec { + pub declarations: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Declaration { + pub namespace: Vec, // Will later be turned into Namespace + pub genus: Genus, +} + +impl Declaration { + pub fn new_function(function: Function, namespace: Vec) -> Self { + Declaration { namespace, genus: Genus::Function(function) } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Namespace { + pub name: Token, // Will later become an Identifier +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Genus { + /// Not actually a genus, but used in parsing to accommodate multiple errors + Dummy, + /// A function + Function(Function), +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Function { + pub identifier: Token, // Will later become an Identifier + pub inputs: Vec, + pub output: Option, // Will later become an Type +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct FunctionInput { + pub name: Token, // Will later become an Identifier + pub r#type: Token, // Will later become an Type +} diff --git a/trixy/trixy-lang_parser/src/error.rs b/trixy/trixy-lang_parser/src/error.rs index e0681a3..fcf441d 100644 --- a/trixy/trixy-lang_parser/src/error.rs +++ b/trixy/trixy-lang_parser/src/error.rs @@ -1,9 +1,191 @@ +use core::fmt; + use thiserror::Error; -use crate::lexing::error::SpannedLexingError; +use crate::lexing::{error::SpannedLexingError, TokenSpan}; #[derive(Error, Debug)] pub enum TrixyError { #[error(transparent)] Parsing(#[from] SpannedLexingError), } + +/// The context of an Error. +#[derive(Debug)] +pub struct ErrorContext { + /// The span of the error in the source file + pub span: TokenSpan, + /// The span of the error in the context line relative to the context line + pub contexted_span: TokenSpan, + /// The line above the error + pub line_above: String, + /// The line below the error + pub line_below: String, + /// The line in which the error occurred + pub line: String, + /// The line number of the main error line + pub line_number: usize, +} + +impl ErrorContext { + pub fn from_span(span: TokenSpan, original_file: &str) -> Self { + let line_number = original_file + .chars() + .take(span.start) + .filter(|a| a == &'\n') + .count() + // This is here, as we are missing one newline with the method above + + 1; + + let lines: Vec<_> = original_file.lines().collect(); + + let line = (*lines + .get(line_number - 1) + .expect("This should work, as have *at least* one (index = 0) line")) + .to_owned(); + + let contexted_span = { + let matched_line: Vec<_> = original_file.match_indices(&line).collect(); + let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place"); + debug_assert_eq!(matched_line, &&line); + TokenSpan { + start: span.start - index, + end: span.end - index, + } + }; + + let line_above; + if line_number == 0 { + // We only have one line, so no line above + line_above = "".to_owned(); + } else { + line_above = (*lines + .get((line_number - 1) - 1) + .expect("We checked that this should work")) + .to_owned(); + } + + let line_below; + if lines.len() - 1 > line_number { + // We have a line after the current line + line_below = (*lines + .get((line_number + 1) - 1) + .expect("We checked that this should work")) + .to_owned(); + } else { + line_below = "".to_owned(); + } + + Self { + span, + contexted_span, + line_above, + line_below, + line, + line_number, + } + } + + pub fn from_index(start: usize, orginal_file: &str) -> Self { + let span = TokenSpan { + start, + end: start, + }; + Self::from_span(span, orginal_file) + } + + pub fn get_error_line(&self, source_error: &str) -> String { + // deconstruct the structure + let ErrorContext { + contexted_span, + line_number, + .. + } = self; + + let mut output = String::new(); + output.push_str("\x1b[92;1m"); + + // pad to accommodate the line number printing. + // 32 -> needs two spaces padding to print it + line_number.to_string().chars().for_each(|_| { + output.push(' '); + }); + + // pad to the beginning of the error + for _ in 0..contexted_span.start { + output.push(' '); + } + + // push the error markers + for _ in contexted_span.start..contexted_span.end { + output.push('^'); + } + + // // pad until end of line + // for _ in contexted_span.end..(line.len() - 1) { + // output.push('-'); + // } + // + // additional space to avoid having to end with a '-' + output.push(' '); + + output.push_str("help: "); + + output.push_str(source_error); + output.push_str("\x1b[0m"); + output + } +} + +pub trait AdditionalHelp { + fn additional_help(&self) -> String; +} + +pub trait ErrorContextDisplay: fmt::Display { + type Error; + + fn error_fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result + where + ::Error: std::fmt::Display + AdditionalHelp, + { + let error_line = self + .context() + .get_error_line(&self.source().additional_help()); + + writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", self.source())?; + + if !self.line_above().is_empty() { + writeln!( + f, + "\x1b[32;1m{} |\x1b[0m {}", + self.line_number() - 1, + self.line_above() + )?; + } + writeln!( + f, + "\x1b[36;1m{} |\x1b[0m {}", + self.line_number(), + self.line() + )?; + writeln!(f, " {}", error_line)?; + if !self.line_below().is_empty() { + writeln!( + f, + "\x1b[32;1m{} |\x1b[0m {}", + self.line_number() + 1, + self.line_below() + ) + } else { + write!(f, "") + } + } + + // getters + fn context(&self) -> &ErrorContext; + fn source(&self) -> &Self::Error; + fn line_number(&self) -> usize; + fn line_above(&self) -> &str; + fn line_below(&self) -> &str; + fn line(&self) -> &str; +} diff --git a/trixy/trixy-lang_parser/src/lexing/error.rs b/trixy/trixy-lang_parser/src/lexing/error.rs index 12177fb..98f3699 100644 --- a/trixy/trixy-lang_parser/src/lexing/error.rs +++ b/trixy/trixy-lang_parser/src/lexing/error.rs @@ -1,6 +1,8 @@ use std::{error::Error, fmt::Display}; use thiserror::Error; +use crate::error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}; + #[derive(Error, Debug)] pub enum LexingError { #[error("No matches were found")] @@ -13,88 +15,61 @@ pub enum LexingError { ExpectedArrow, } +impl AdditionalHelp for LexingError { + fn additional_help(& self) -> String { + let out = match self { + LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(), + LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(), + LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(), + LexingError::UnknownCharacter(char) => { + format!("This char: `{char}`; is not a valid token") + }, + }; + out + } +} + #[derive(Debug)] -pub enum SpannedLexingError { - Error { - source: LexingError, - /// The starting char index of the error in the source file - start: usize, - /// The starting char index of the error in the context line - contexted_start: usize, - /// The line above the error - line_above: String, - /// The line below the error - line_below: String, - /// The line in which the error occurred - line: String, - /// The line number of the main error line - line_number: usize, - }, +pub struct SpannedLexingError { + pub source: LexingError, + pub context: ErrorContext, } impl Error for SpannedLexingError { fn source(&self) -> Option<&(dyn Error + 'static)> { - let Self::Error { source, .. } = self; - Some(source) + Some(&self.source) } } +impl ErrorContextDisplay for SpannedLexingError { + type Error = LexingError; + + fn context(&self) -> &crate::error::ErrorContext { + &self.context + } + + fn line_number(&self) -> usize { + self.context.line_number + } + + fn line_above(&self) -> &str { + &self.context.line_above + } + + fn line_below(&self) -> &str { + &self.context.line_below + } + + fn line(&self) -> &str { + &self.context.line + } + + fn source(&self) -> &::Error { + &self.source + } +} impl Display for SpannedLexingError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let Self::Error { - source, - line_above, - line_below, - line, - line_number, - contexted_start, - .. - } = self; - let error_line = { - let mut output = String::new(); - output.push_str("\x1b[92;1m"); - for _ in 0..(*contexted_start) { - output.push(' '); - } - line_number.to_string().chars().for_each(|_| { - output.push(' '); - }); - output.push('^'); - for _ in *contexted_start..(line.len() - 1) { - output.push('-'); - } - output.push(' '); - let appandig_str = match source { - LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(), - LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(), - LexingError::UnknownCharacter(char) => format!("This char: `{char}`; is not a valid token"), - LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(), - }; - output.push_str(&appandig_str); - output.push_str("\x1b[0m"); - output - }; - - writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", source)?; - if !line_above.is_empty() { - writeln!( - f, - "\x1b[32;1m{} |\x1b[0m {}", - line_number - 1, - line_above - )?; - } - writeln!(f, "\x1b[36;1m{} |\x1b[0m {}", line_number, line)?; - writeln!(f, " {}", error_line)?; - if !line_below.is_empty() { - writeln!( - f, - "\x1b[32;1m{} |\x1b[0m {}", - line_number + 1, - line_below - ) - } else { - write!(f, "") - } + self.error_fmt(f) } } diff --git a/trixy/trixy-lang_parser/src/lexing/mod.rs b/trixy/trixy-lang_parser/src/lexing/mod.rs index d601cb6..989ec8b 100644 --- a/trixy/trixy-lang_parser/src/lexing/mod.rs +++ b/trixy/trixy-lang_parser/src/lexing/mod.rs @@ -1,3 +1,5 @@ +use std::fmt::Display; + use self::{error::SpannedLexingError, tokenizer::Tokenizer}; pub mod error; @@ -8,7 +10,7 @@ mod test; #[derive(Debug, PartialEq, PartialOrd, Ord, Eq)] pub struct TokenStream { - original_file: String, + pub original_file: String, tokens: Vec, } @@ -31,34 +33,83 @@ impl TokenStream { original_file: src.to_owned(), }) } + + /// Get a token by index + pub fn get(&self, index: usize) -> Option<&Token> { + self.tokens.get(index) + } + + /// Get a reference to the uppermost token, without modifying the token list + pub fn peek(&self) -> &Token { + self.tokens.last().expect("This should not be emtpy") + } + + /// Remove to the uppermost token + pub fn pop(&mut self) -> Token { + self.tokens.pop().expect("This should not be emtpy") + } + + /// Reverses the underlying tokes vector + /// This is facilitates using the pop and peek methods to parse the tokens from the beginning, + /// not the end + pub fn reverse(&mut self) { + self.tokens.reverse() + } + + /// Check if the TokenStream is empty. + pub fn is_empty(&self) -> bool { + self.tokens.is_empty() + } } /// A token span is recorded in chars starting from the beginning of the file: /// A token span like this, for example: -/// ```no_run +/// ```dont_run +///# use trixy_lang_parser::lexing::TokenSpan; /// TokenSpan { /// start: 20, /// end: 23, /// } /// ``` /// signals, that the token starts at the 20th char in the source file and ends on the 23rd. -#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)] +#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] pub struct TokenSpan { - start: usize, /// The start of the token span - end: usize, + pub start: usize, + /// The end of the token span + pub end: usize, } /// A Token -#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)] +#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone)] pub struct Token { /// The token's original location in the source file - span: TokenSpan, - kind: TokenKind, + pub span: TokenSpan, + pub kind: TokenKind, +} + +impl Token { + /// Return the TokenKind of a token + pub fn kind(&self) -> &TokenKind { + &self.kind + } + + /// Return the TokenSpan of a token + pub fn span(&self) -> &TokenSpan { + &self.span + } + + /// Get a dummy token, this is intended for error handling + pub fn get_dummy() -> Token { + Self { + span: TokenSpan { start: 0, end: 0 }, + kind: TokenKind::Dummy, + } + } } /// Possibly kinds of tokens -#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub enum TokenKind { Keyword(Keyword), Identifier(String), @@ -68,12 +119,49 @@ pub enum TokenKind { Arrow, BraceOpen, BraceClose, - ParenthesisOpen, - ParenthesisClose, + ParenOpen, + ParenClose, + /// This is not a real TokenKind, but only used for error handling + Dummy, +} + +impl TokenKind { + pub fn same_kind(&self, other: &TokenKind) -> bool { + if let TokenKind::Identifier(_) = self { + if let TokenKind::Identifier(_) = other { + return true; + } + } + self == other + } +} + +impl Display for TokenKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TokenKind::Keyword(word) => write!(f, "KEYWORD({})", word), + TokenKind::Identifier(ident) => { + if ident == "" { + write!(f, "IDENTIFIER") + } else { + write!(f, "IDENTIFIER({})", ident) + } + } + TokenKind::Colon => f.write_str("COLON"), + TokenKind::Semicolon => f.write_str("SEMICOLON"), + TokenKind::Comma => f.write_str("COMMA"), + TokenKind::Arrow => f.write_str("ARROW"), + TokenKind::BraceOpen => f.write_str("BRACEOPEN"), + TokenKind::BraceClose => f.write_str("BRACECLOSE"), + TokenKind::ParenOpen => f.write_str("PARENOPEN"), + TokenKind::ParenClose => f.write_str("PARENCLOSE"), + TokenKind::Dummy => f.write_str("DUMMY"), + } + } } /// Keywords used in the language -#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)] +#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] pub enum Keyword { /// Start a namespace declaration #[allow(non_camel_case_types)] @@ -82,3 +170,85 @@ pub enum Keyword { #[allow(non_camel_case_types)] r#fn, } + +impl Display for Keyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Keyword::nasp => f.write_str("nasp"), + Keyword::r#fn => f.write_str("fn"), + } + } +} + +/// Shorthand macro for generating a token from *anything* which can be +/// converted into a `TokenKind`, or any of the `TokenKind` variants. +/// +/// # Examples +/// +/// ``` +/// use trixy_lang_parser::token; +/// # fn main() { +/// token![nasp]; +/// token![;]; +/// token![Arrow]; +/// # } +/// ``` +#[macro_export] +macro_rules! token { + [Semicolon] => { $crate::lexing::TokenKind::Semicolon }; + [;] => { $crate::lexing::TokenKind::Semicolon }; + [Colon] => { $crate::lexing::TokenKind::Colon }; + [:] => { $crate::lexing::TokenKind::Colon }; + [Comma] => { $crate::lexing::TokenKind::Comma }; + [,] => { $crate::lexing::TokenKind::Comma }; + [Arrow] => { $crate::lexing::TokenKind::Arrow }; + [->] => { $crate::lexing::TokenKind::Arrow }; + [BraceOpen] => { $crate::lexing::TokenKind::BraceOpen }; + // [{] => { $crate::lexing::TokenKind::BraceOpen }; + [BraceClose] => { $crate::lexing::TokenKind::BraceClose }; + // [}] => { $crate::lexing::TokenKind::BraceClose }; + [ParenOpen] => { $crate::lexing::TokenKind::ParenOpen }; + // [(] => { $crate::lexing::TokenKind::ParenthesisOpen }; + [ParenClose] => { $crate::lexing::TokenKind::ParenClose }; + // [)] => { $crate::lexing::TokenKind::ParenthesisClose }; + + [nasp] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::nasp) }; + [fn] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#fn) }; + + // This is only works for checking for a identifier + // see the `same_kind` method on TokenKind + [Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; + [Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; +} + +#[cfg(test)] +mod tests { + use super::TokenKind; + use crate::token; + + macro_rules! token_macro_test { + ($name:ident, $from:tt, => $to:expr) => { + #[test] + fn $name() { + let got: TokenKind = token![$from]; + let should_be = $to; + + assert_eq!(got, should_be); + } + }; + ($name:ident, $from:tt, => $to:expr) => { + #[test] + fn $name() { + let got: TokenKind = token![$from]; + let should_be = $to; + + assert_eq!(got, should_be); + } + }; + } + + token_macro_test!(tok_expands_to_arrow, ->, => TokenKind::Arrow); + token_macro_test!(tok_expands_to_semicolon, Semicolon, => TokenKind::Semicolon); + token_macro_test!(tok_expands_to_nasp, nasp, => TokenKind::Keyword(crate::lexing::Keyword::nasp)); + token_macro_test!(tok_expands_to_fn, fn, => TokenKind::Keyword(crate::lexing::Keyword::r#fn)); +} diff --git a/trixy/trixy-lang_parser/src/lexing/test.rs b/trixy/trixy-lang_parser/src/lexing/test.rs index 43665ad..396f1cb 100644 --- a/trixy/trixy-lang_parser/src/lexing/test.rs +++ b/trixy/trixy-lang_parser/src/lexing/test.rs @@ -36,7 +36,7 @@ nasp commands { }, Token { span: TokenSpan { start: 30, end: 31 }, - kind: TokenKind::ParenthesisOpen, + kind: TokenKind::ParenOpen, }, Token { span: TokenSpan { start: 31, end: 36 }, @@ -52,7 +52,7 @@ nasp commands { }, Token { span: TokenSpan { start: 44, end: 45 }, - kind: TokenKind::ParenthesisClose, + kind: TokenKind::ParenClose, }, Token { span: TokenSpan { start: 46, end: 48 }, diff --git a/trixy/trixy-lang_parser/src/lexing/tokenizer.rs b/trixy/trixy-lang_parser/src/lexing/tokenizer.rs index 67986e1..af46d43 100644 --- a/trixy/trixy-lang_parser/src/lexing/tokenizer.rs +++ b/trixy/trixy-lang_parser/src/lexing/tokenizer.rs @@ -1,6 +1,9 @@ // This code is heavily inspired by: https://michael-f-bryan.github.io/static-analyser-in-rust/book/lex.html -use crate::lexing::{Keyword, TokenSpan}; +use crate::{ + error::ErrorContext, + lexing::{Keyword, TokenSpan}, +}; use super::{ error::{LexingError, SpannedLexingError}, @@ -29,61 +32,11 @@ impl<'a> Tokenizer<'a> { let start = self.current_index; let (token_kind, index) = self.get_next_tokenkind().map_err(|e| { - let (line_above, line, line_below, contexted_start, line_number) = { - let line_number = self - .original_text - .chars() - .take(start) - .filter(|a| a == &'\n') - .count(); - let lines: Vec<_> = self.original_text.lines().collect(); + let context = ErrorContext::from_index(start, self.original_text); - let line = (*lines - .get(line_number) - .expect("This should work, as have *at least* one (index = 0) line")) - .to_owned(); - - let contexted_start = { - let matched_line: Vec<_> = self.original_text.match_indices(&line).collect(); - let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place"); - debug_assert_eq!(matched_line, &&line); - start - index - }; - - let line_above; - if line_number == 0 { - // We only have one line, so no line above - line_above = "".to_owned(); - } else { - line_above = (*lines - .get(line_number - 1) - .expect("We checked that this should work")) - .to_owned(); - } - - let line_below; - if lines.len() - 1 > line_number { - // We have a line after the current line - line_below = (*lines - .get(line_number + 1) - .expect("We checked that this should work")) - .to_owned(); - } else { - line_below = "".to_owned(); - } - - (line_above, line, line_below, contexted_start, line_number) - }; - SpannedLexingError::Error { - source: e, - start, - contexted_start, - line_above, - line_below, - line_number, - line, - } + SpannedLexingError { source: e, context } })?; + self.chomp(index); // end - start let end = self.current_index; Ok(Some(Token { @@ -100,8 +53,8 @@ impl<'a> Tokenizer<'a> { }; let (tok, length) = match next { - '(' => (TokenKind::ParenthesisOpen, 1), - ')' => (TokenKind::ParenthesisClose, 1), + '(' => (TokenKind::ParenOpen, 1), + ')' => (TokenKind::ParenClose, 1), '{' => (TokenKind::BraceOpen, 1), '}' => (TokenKind::BraceClose, 1), ':' => (TokenKind::Colon, 1), diff --git a/trixy/trixy-lang_parser/src/lib.rs b/trixy/trixy-lang_parser/src/lib.rs index 1167aea..247fa8b 100644 --- a/trixy/trixy-lang_parser/src/lib.rs +++ b/trixy/trixy-lang_parser/src/lib.rs @@ -2,11 +2,12 @@ use error::TrixyError; use crate::lexing::TokenStream; -use self::command_spec::CommandSpec; +use self::command_spec::unchecked::CommandSpec; mod command_spec; pub mod error; pub mod lexing; +pub mod parsing; pub fn parse_trixy_lang(input: &str) -> Result { let input_tokens = TokenStream::lex(input)?; @@ -14,45 +15,45 @@ pub fn parse_trixy_lang(input: &str) -> Result { todo!() } -#[cfg(test)] -mod test { - use crate::{ - command_spec::{CommandSpec, Declaration, Genus, NamedType, Namespace, Type}, - parse_trixy_lang, - }; - - #[test] - fn test_function_with_namespace() { - let expected = parse_trixy_lang( - " - nasp commands { - fn say_something(name_to_greet: String, what_to_say: String) -> String; - } - ", - ) - .unwrap(); - let correct: CommandSpec = { - let declarations = vec![Declaration { - namespace: vec![Namespace { - name: "commands".to_owned(), - }], - genus: Genus::Function { - name: "say_something".to_owned(), - inputs: vec![ - NamedType { - name: "name_to_greet".to_owned(), - base: Type::String, - }, - NamedType { - name: "what_to_say".to_owned(), - base: Type::String, - }, - ], - output: Type::String, - }, - }]; - CommandSpec { declarations } - }; - assert_eq!(expected, correct); - } -} +// #[cfg(test)] +// mod test { +// use crate::{ +// command_spec::unchecked::{CommandSpec, Declaration, Genus, Namespace}, +// parse_trixy_lang, +// }; +// +// #[test] +// fn test_function_with_namespace() { +// let expected = parse_trixy_lang( +// " +// nasp commands { +// fn say_something(name_to_greet: String, what_to_say: String) -> String; +// } +// ", +// ) +// .unwrap(); +// let correct: CommandSpec = { +// let declarations = vec![Declaration { +// namespace: vec![Namespace { +// name: "commands".to_owned(), +// }], +// genus: Genus::Function { +// name: "say_something".to_owned(), +// inputs: vec![ +// NamedType { +// name: "name_to_greet".to_owned(), +// base: Type::String, +// }, +// NamedType { +// name: "what_to_say".to_owned(), +// base: Type::String, +// }, +// ], +// output: Type::String, +// }, +// }]; +// CommandSpec { declarations } +// }; +// assert_eq!(expected, correct); +// } +// } diff --git a/trixy/trixy-lang_parser/src/main.rs b/trixy/trixy-lang_parser/src/main.rs index c6f8104..277fb76 100644 --- a/trixy/trixy-lang_parser/src/main.rs +++ b/trixy/trixy-lang_parser/src/main.rs @@ -23,6 +23,11 @@ pub enum Command { /// The file containing the trixy code to tokenize file: PathBuf, }, + Parse { + #[clap(value_parser)] + /// The file containing the trixy code to parse + file: PathBuf, + }, } pub fn main() { @@ -34,12 +39,34 @@ pub fn main() { let input_tokens = match TokenStream::lex(&input) { Ok(err) => err, Err(ok) => { - println!("{}", ok); + eprintln!("{}", ok); exit(1); } }; println!("{:#?}", input_tokens); } + Command::Parse { file } => { + let input = fs::read_to_string(file).unwrap(); + + let input_tokens = match TokenStream::lex(&input) { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while tokenizing:"); + eprintln!("{}", err); + exit(1); + } + }; + + let parsed = match input_tokens.parse_unchecked() { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while doing the first (unchecked) parsing run:"); + eprintln!("{}", err); + exit(1) + } + }; + println!("{:#?}", parsed); + } } } diff --git a/trixy/trixy-lang_parser/src/parsing/error.rs b/trixy/trixy-lang_parser/src/parsing/error.rs new file mode 100644 index 0000000..a6036e3 --- /dev/null +++ b/trixy/trixy-lang_parser/src/parsing/error.rs @@ -0,0 +1,93 @@ +use std::{error::Error, fmt::Display}; +use thiserror::Error; + +use crate::{ + error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}, + lexing::{TokenKind, TokenSpan}, +}; + +#[derive(Error, Debug)] +pub enum ParsingError { + #[error("Expected '{expected}' but received '{actual}'")] + ExpectedDifferentToken { + expected: TokenKind, + actual: TokenKind, + span: TokenSpan, + }, + + #[error("Expected a Keyword to start a new declaration, but found: '{actual}'")] + ExpectedKeyword { actual: TokenKind, span: TokenSpan }, +} + +impl ParsingError { + pub fn get_span(&self) -> TokenSpan { + match self { + ParsingError::ExpectedDifferentToken { span, .. } => *span, + ParsingError::ExpectedKeyword { span, .. } => *span, + } + } +} + +impl AdditionalHelp for ParsingError { + fn additional_help(&self) -> String { + match self { + ParsingError::ExpectedDifferentToken { + expected, + actual, + .. + } => format!( + "I expected a '{}' here, but you put a '{}' there!", + expected, actual + ), + ParsingError::ExpectedKeyword { actual, .. } => format!( + "I expected a keyword (that is something like 'fn' or 'nasp') but you put a '{}' there!", + actual), + } + } +} + +#[derive(Debug)] +pub struct SpannedParsingError { + pub source: ParsingError, + pub context: ErrorContext, +} + +impl Error for SpannedParsingError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(&self.source) + } +} + +impl Display for SpannedParsingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.error_fmt(f) + } +} + +impl ErrorContextDisplay for SpannedParsingError { + type Error = ParsingError; + + fn context(&self) -> &crate::error::ErrorContext { + &self.context + } + + fn line_number(&self) -> usize { + self.context.line_number + } + + fn line_above(&self) -> &str { + &self.context.line_above + } + + fn line_below(&self) -> &str { + &self.context.line_below + } + + fn line(&self) -> &str { + &self.context.line + } + + fn source(&self) -> &::Error { + &self.source + } +} diff --git a/trixy/trixy-lang_parser/src/parsing/mod.rs b/trixy/trixy-lang_parser/src/parsing/mod.rs new file mode 100644 index 0000000..435b2bc --- /dev/null +++ b/trixy/trixy-lang_parser/src/parsing/mod.rs @@ -0,0 +1,4 @@ +mod error; +mod unchecked; +#[cfg(test)] +mod test; diff --git a/trixy/trixy-lang_parser/src/parsing/test.rs b/trixy/trixy-lang_parser/src/parsing/test.rs new file mode 100644 index 0000000..2f73978 --- /dev/null +++ b/trixy/trixy-lang_parser/src/parsing/test.rs @@ -0,0 +1,88 @@ +use crate::{ + command_spec::unchecked::{CommandSpec, Declaration, Function, FunctionInput, Genus}, + lexing::{Token, TokenKind, TokenSpan, TokenStream}, +}; + +use super::error::ParsingError; + +use pretty_assertions::assert_eq; + +#[test] +fn test_failing() { + let input = " +fn print(message: CommandTransferValue); + +nasp trinitrix { {} + fn hi honner(name: String) -> String; ; +} + +"; + let parsed = TokenStream::lex(input).unwrap().parse_unchecked(); + let err = parsed.unwrap_err().source; + match err { + ParsingError::ExpectedDifferentToken { .. } => panic!("Wrong error"), + ParsingError::ExpectedKeyword { .. } => {} + } +} + +#[test] +fn test_full() { + let input = "fn print(message: CommandTransferValue); + +nasp trinitrix { + fn hi(name: String) -> String; +} +"; + let parsed = TokenStream::lex(input).unwrap().parse_unchecked().unwrap(); + let expected = CommandSpec { + declarations: vec![ + Declaration { + namespace: vec![], + genus: Genus::Function(Function { + identifier: Token { + span: TokenSpan { start: 3, end: 8 }, + kind: TokenKind::Identifier("print".to_owned()), + }, + inputs: vec![FunctionInput { + name: Token { + span: TokenSpan { start: 9, end: 16 }, + kind: TokenKind::Identifier("message".to_owned()), + }, + r#type: Token { + span: TokenSpan { start: 18, end: 38 }, + kind: TokenKind::Identifier("CommandTransferValue".to_owned()), + }, + }], + output: None, + }), + }, + Declaration { + namespace: vec![Token { + span: TokenSpan { start: 47, end: 56 }, + kind: TokenKind::Identifier("trinitrix".to_owned()), + }], + genus: Genus::Function(Function { + identifier: Token { + span: TokenSpan { start: 66, end: 68 }, + kind: TokenKind::Identifier("hi".to_owned()), + }, + inputs: vec![FunctionInput { + name: Token { + span: TokenSpan { start: 69, end: 73 }, + kind: TokenKind::Identifier("name".to_owned()), + }, + r#type: Token { + span: TokenSpan { start: 75, end: 81 }, + kind: TokenKind::Identifier("String".to_owned()), + }, + }], + output: Some(Token { + span: TokenSpan { start: 86, end: 92 }, + kind: TokenKind::Identifier("String".to_owned()), + }), + }), + }, + ], + }; + assert_eq!(parsed, expected); +} diff --git a/trixy/trixy-lang_parser/src/parsing/unchecked.rs b/trixy/trixy-lang_parser/src/parsing/unchecked.rs new file mode 100644 index 0000000..9d12a9e --- /dev/null +++ b/trixy/trixy-lang_parser/src/parsing/unchecked.rs @@ -0,0 +1,167 @@ +use crate::{ + command_spec::unchecked::{CommandSpec, Declaration, Function, FunctionInput}, + error::ErrorContext, + lexing::{Token, TokenKind, TokenStream}, + token, +}; + +use super::error::{ParsingError, SpannedParsingError}; + +impl TokenStream { + pub fn parse_unchecked(self) -> Result { + let mut parser = Parser::new(self); + parser.parse() + } +} + +pub(super) struct Parser { + token_stream: TokenStream, + current_namespaces: Vec, // This should in the second pass turn into Identifiers +} + +impl Parser { + fn new(mut token_stream: TokenStream) -> Self { + token_stream.reverse(); + Self { + token_stream, + current_namespaces: vec![], + } + } + + fn parse(&mut self) -> Result { + let mut declarations = vec![]; + while !self.token_stream.is_empty() { + let mut next = self.parse_next().map_err(|err| { + let span = err.get_span(); + SpannedParsingError { + source: err, + context: ErrorContext::from_span(span, &self.token_stream.original_file), + } + })?; + + declarations.append(&mut next); + } + + Ok(CommandSpec { declarations }) + } + + fn parse_next(&mut self) -> Result, ParsingError> { + match self.peek().kind() { + token![nasp] => Ok(self.parse_namespace()?), + token![fn] => Ok(vec![Declaration::new_function( + self.parse_function()?, + self.current_namespaces.clone(), + )]), + _ => { + let err = ParsingError::ExpectedKeyword { + span: *self.peek().span(), + actual: self.peek().kind().clone(), + }; + + return Err(err); + } + } + } + + fn parse_namespace(&mut self) -> Result, ParsingError> { + self.expect(token![nasp])?; + let namespace_name = self.expect(token![Ident])?; + self.current_namespaces.push(namespace_name); + self.expect(token![BraceOpen])?; + + let mut declarations = vec![]; + while !self.expect_peek(token![BraceClose]) { + declarations.append(&mut self.parse_next()?); + } + + self.expect(token![BraceClose])?; + self.current_namespaces.pop(); + Ok(declarations) + } + + fn parse_function(&mut self) -> Result { + self.expect(token![fn])?; + let name = self.expect(token![Ident])?; + self.expect(token![ParenOpen])?; + let mut inputs = vec![]; + + while self.expect_peek(token![Ident]) { + let input_name = self.expect(token![Ident])?; + self.expect(token![Colon])?; + let input_type = self.expect(token![Ident])?; + inputs.push(FunctionInput { + name: input_name, + r#type: input_type, + }) + } + + self.expect(token![ParenClose])?; + let mut output_type = None; + if self.expect_peek(token![->]) { + self.expect(token![->])?; + output_type = Some(self.expect(token![Ident])?); + } + self.expect(token![;])?; + Ok(Function { + identifier: name, + inputs, + output: output_type, + }) + } + + /// Expect a token in the next input position: + /// For example: + /// + /// ```dont_run + /// use trixy_lang_parser::{ + /// lexing::{Keyword, TokenKind, TokenStream}, + /// parsing::unchecked::Parser, + /// token, + /// }; + /// + /// # fn main() { + /// let token_stream = TokenStream::lex("nasp {}").unwrap(); + /// let parser = Parser::new(token_stream); + /// assert_eq!(parser.expect(token![nasp]).unwrap(), TokenKind::Keyword(Keyword::nasp)); + /// assert_eq!(parser.expect(token![BraceOpen]).unwrap(), TokenKind::BraceOpen); + /// assert_eq!(parser.expect(token![BraceClose]).unwrap(), TokenKind::BraceClose); + /// assert!(parser.expect(token![BraceClose]).is_err()); + /// # } + /// ``` + /// + pub(super) fn expect(&mut self, token: TokenKind) -> Result { + let actual_token = self.peek(); + if actual_token.kind().same_kind(&token) { + Ok(self.pop()) + } else { + let err = ParsingError::ExpectedDifferentToken { + expected: token, + actual: actual_token.kind().clone(), + span: *actual_token.span(), + }; + + Err(err) + } + } + + /// Check if the next token is of the specified TokenKind. + /// Does not alter the token_stream + fn expect_peek(&self, token: TokenKind) -> bool { + let actual_token = self.peek(); + if actual_token.kind().same_kind(&token) { + true + } else { + false + } + } + + /// Looks at the next token without removing it + fn peek(&self) -> &Token { + self.token_stream.peek() + } + + /// Removes the next token + fn pop(&mut self) -> Token { + self.token_stream.pop() + } +}