forked from trinitrix/core
feat(trixy-lang_parser): Add first parser pass
This commit is contained in:
parent
74efd3eda6
commit
3a65c33b15
|
@ -2,5 +2,5 @@
|
||||||
/target
|
/target
|
||||||
/result
|
/result
|
||||||
|
|
||||||
# lua_macros is a library
|
# This crate is a library
|
||||||
Cargo.lock
|
Cargo.lock
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
# trixy-lang_parser
|
||||||
|
This crate contains a parser (and lexer) for the Trixy language.
|
||||||
|
The corresponding grammar is in the grammar file [here](./docs/grammar.ebnf) encoded in [Extended Backus-Naur Form](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form).
|
||||||
|
|
||||||
|
## Docs
|
||||||
|
Run `./generate_docs` to turn the grammar file into railroad diagrams.
|
|
@ -0,0 +1,17 @@
|
||||||
|
# (*
|
||||||
|
# Trixy is fully whitespace independent, this means that you can
|
||||||
|
# interleave whitespace in the definitions.
|
||||||
|
# The same applies to comments:
|
||||||
|
# - Line comments (`// \n`) and
|
||||||
|
# - Block comments (`/* */`).
|
||||||
|
# *)
|
||||||
|
|
||||||
|
CommandSpec = { Function | Namespace } ;
|
||||||
|
Function = "fn" Identifier "(" {Identifier ":" Type} ")" [ "->" Type ] ";" ;
|
||||||
|
Namespace = "nasp" Identifier "{" {Function | Namespace} "}" ;
|
||||||
|
Type = "String" | "Integer" ; # (* This corresponds to the CommandTransferValue *)
|
||||||
|
Identifier = CHARACTER { NUMBER | CHARACTER } ;
|
||||||
|
|
||||||
|
# (*
|
||||||
|
# vim: ft=ebnf
|
||||||
|
# *)
|
Binary file not shown.
|
@ -0,0 +1,9 @@
|
||||||
|
fn print(message: CommandTransferValue);
|
||||||
|
|
||||||
|
nasp trinitrix { {}
|
||||||
|
fn hi honner(name: String) -> String; ;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
|
||||||
|
// vim: syntax=rust
|
|
@ -0,0 +1,126 @@
|
||||||
|
/// Prints to the output, with a newline.
|
||||||
|
// HACK(@soispha): The stdlib Lua `print()` function has stdout as output hardcoded,
|
||||||
|
// redirecting stdout seems too much like a hack thus we are just redefining the print function
|
||||||
|
// to output to a controlled output. <2023-09-09>
|
||||||
|
fn print(input: CommandTransferValue);
|
||||||
|
|
||||||
|
nasp trinitrix {
|
||||||
|
/// Language specific functions, which mirror the `trinitrix.api` namespace.
|
||||||
|
/// That is, if you have to choose between a `std` and a `api` function choose the `std`
|
||||||
|
/// one as it will most likely be more high-level and easier to use (as it isn't abstracted
|
||||||
|
/// over multiple languages). Feel free to drop down to the lower level api, if you feel
|
||||||
|
/// like that more, it should be as stable and user-oriented as the `std` functions
|
||||||
|
nasp std {}
|
||||||
|
|
||||||
|
/// Debug only functions, these are effectively useless
|
||||||
|
nasp debug {
|
||||||
|
/// Greets the user
|
||||||
|
fn greet(input: String) -> String;
|
||||||
|
|
||||||
|
/// Returns a table of greeted users
|
||||||
|
fn greet_multiple() -> Table;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// General API to change stuff in Trinitrix
|
||||||
|
nasp api {
|
||||||
|
/// Closes the application
|
||||||
|
fn exit();
|
||||||
|
|
||||||
|
/// Send a message to the current room
|
||||||
|
/// The send message is interpreted literally.
|
||||||
|
fn room_message_send(msg: String);
|
||||||
|
|
||||||
|
/// Open the help pages at the first occurrence of
|
||||||
|
/// the input string if it is Some, otherwise open
|
||||||
|
/// the help pages at the start
|
||||||
|
fn help(input: Option<String>);
|
||||||
|
|
||||||
|
// Register a function to be used with the Trinitrix API
|
||||||
|
// (This function is actually implemented in the std namespace)
|
||||||
|
/* fn register_function(function: RawFunction); */
|
||||||
|
|
||||||
|
/// Function that change the UI, or UI state
|
||||||
|
nasp ui {
|
||||||
|
/// Shows the command line
|
||||||
|
fn command_line_show();
|
||||||
|
|
||||||
|
/// Hides the command line
|
||||||
|
fn command_line_hide();
|
||||||
|
|
||||||
|
/// Go to the next plane
|
||||||
|
fn cycle_planes();
|
||||||
|
/// Go to the previous plane
|
||||||
|
fn cycle_planes_rev();
|
||||||
|
|
||||||
|
/// Sets the current app mode to Normal / navigation mode
|
||||||
|
fn set_mode_normal();
|
||||||
|
/// Sets the current app mode to Insert / editing mode
|
||||||
|
fn set_mode_insert();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Manipulate keymappings, the mode is specified as a String build up of all mode
|
||||||
|
/// the keymapping should be active in. The mapping works as follows:
|
||||||
|
/// n => normal Mode
|
||||||
|
/// c => command Mode
|
||||||
|
/// i => insert Mode
|
||||||
|
///
|
||||||
|
/// The key works in a similar matter, specifying the required keypresses to trigger the
|
||||||
|
/// callback. For example "aba" for require the user to press "a" then "b" then "a" again
|
||||||
|
/// to trigger the mapping. Special characters are encoded as follows:
|
||||||
|
/// "<C-a>ba" => "Ctrl+a" then "b" then "a"
|
||||||
|
/// "<S-a>" => "A" or "Shift+a"
|
||||||
|
/// "A" => "A"
|
||||||
|
/// "<M-a> " => "Alt+a" (<A-a>) or "Meta+a"(<M-a>) (most terminals can't really differentiate between these characters)
|
||||||
|
/// "a<C-b><C-a>" => "a" then "Ctrl+b" then "Ctrl+a" (also works for Shift, Alt and Super)
|
||||||
|
/// "<CSM-b>" => "Ctrl+Shift+Alt+b" (the ordering doesn't matter)
|
||||||
|
/// "a " => "a" then a literal space (" ")
|
||||||
|
/// "å🙂" => "å" then "🙂" (full Unicode support!)
|
||||||
|
/// "<ESC>" => escape key
|
||||||
|
/// "<F3>" => F3 key
|
||||||
|
/// "<BACKSPACE>" => backspace key (and so forth)
|
||||||
|
/// "<DASH>" => a literal "-"
|
||||||
|
/// "<ANGULAR_BRACKET_OPEN>" or "<ABO>" => a literal "<"
|
||||||
|
/// "<ANGULAR_BRACKET_CLOSE>" or "<ABC>" => a literal ">"
|
||||||
|
///
|
||||||
|
/// The callback MUST be registered first by calling
|
||||||
|
/// `trinitrix.api.register_function()` the returned value can than be used to
|
||||||
|
/// set the keymap.
|
||||||
|
nasp keymaps {
|
||||||
|
/// Add a new keymapping
|
||||||
|
fn add(mode: String, key: String, callback: Function);
|
||||||
|
|
||||||
|
/// Remove a keymapping
|
||||||
|
///
|
||||||
|
/// Does nothing, if the keymapping doesn't exists
|
||||||
|
fn remove((/* mode: */ String, /* key: */ String));
|
||||||
|
|
||||||
|
/// List declared keymappings
|
||||||
|
fn get(mode: String);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Functions only used internally within Trinitrix
|
||||||
|
nasp raw {
|
||||||
|
/// Send an error to the default error output
|
||||||
|
fn raise_error(input: String);
|
||||||
|
|
||||||
|
/// Send output to the default output
|
||||||
|
/// This is mainly used to display the final
|
||||||
|
/// output of evaluated lua commands.
|
||||||
|
fn display_output(input: String);
|
||||||
|
|
||||||
|
/// Input a character without checking for possible keymaps
|
||||||
|
/// If the current state does not expect input, this character is ignored
|
||||||
|
/// The encoding is the same as in the `trinitrix.api.keymaps` commands
|
||||||
|
fn send_input_unprocessed(input: String);
|
||||||
|
|
||||||
|
/// This namespace is used to store some command specific data (like functions, as
|
||||||
|
/// ensuring memory locations stay allocated in garbage collected language is hard)
|
||||||
|
///
|
||||||
|
/// Treat it as an implementation detail
|
||||||
|
nasp __private {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
|
||||||
|
// vim: syntax=rust
|
|
@ -3,7 +3,9 @@ fn print(message: CommandTransferValue);
|
||||||
nasp trinitrix {
|
nasp trinitrix {
|
||||||
fn hi(name: String) -> String;
|
fn hi(name: String) -> String;
|
||||||
}
|
}
|
||||||
namespace commands { >-
|
|
||||||
|
nasp trinitrix {
|
||||||
|
fn ho(name: String) -> String;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
#!/usr/bin/env sh
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
ebnf2pdf "./docs/grammar.ebnf"
|
||||||
|
mv out.pdf ./docs/grammar.pdf
|
||||||
|
|
||||||
|
|
||||||
|
# vim: ft=sh
|
|
@ -0,0 +1,58 @@
|
||||||
|
//! This module contains the already type checked types.
|
||||||
|
//!
|
||||||
|
//!
|
||||||
|
|
||||||
|
use crate::lexing::{Keyword, TokenKind};
|
||||||
|
pub enum PrimitiveTypes {
|
||||||
|
String,
|
||||||
|
/// Nothing
|
||||||
|
Void,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<TokenKind> for Identifier {
|
||||||
|
fn from(value: TokenKind) -> Self {
|
||||||
|
match value {
|
||||||
|
TokenKind::Identifier(ident) => Identifier(ident),
|
||||||
|
TokenKind::Keyword(_)
|
||||||
|
| TokenKind::Colon
|
||||||
|
| TokenKind::Semicolon
|
||||||
|
| TokenKind::Comma
|
||||||
|
| TokenKind::Arrow
|
||||||
|
| TokenKind::BraceOpen
|
||||||
|
| TokenKind::BraceClose
|
||||||
|
| TokenKind::ParenOpen
|
||||||
|
| TokenKind::Dummy
|
||||||
|
| TokenKind::ParenClose => {
|
||||||
|
panic!("Tried to convert a non Identifier TokenKind to a Identefier. This is a bug")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An Identifier
|
||||||
|
/// These include
|
||||||
|
/// - Variable names
|
||||||
|
/// - Function names
|
||||||
|
/// - Namespace names
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct Identifier(String);
|
||||||
|
|
||||||
|
impl From<TokenKind> for Keyword {
|
||||||
|
fn from(value: TokenKind) -> Self {
|
||||||
|
match value {
|
||||||
|
TokenKind::Keyword(keyword) => keyword,
|
||||||
|
TokenKind::Identifier(_)
|
||||||
|
| TokenKind::Colon
|
||||||
|
| TokenKind::Semicolon
|
||||||
|
| TokenKind::Comma
|
||||||
|
| TokenKind::Arrow
|
||||||
|
| TokenKind::BraceOpen
|
||||||
|
| TokenKind::BraceClose
|
||||||
|
| TokenKind::ParenOpen
|
||||||
|
| TokenKind::Dummy
|
||||||
|
| TokenKind::ParenClose => {
|
||||||
|
panic!("Tried to convert a non Keyword TokenKind to a Keyword. This is a bug")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,36 +1,2 @@
|
||||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
pub mod checked;
|
||||||
pub struct CommandSpec {
|
pub mod unchecked;
|
||||||
pub(crate) declarations: Vec<Declaration>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
|
||||||
pub(crate) struct Declaration {
|
|
||||||
pub(crate) namespace: Vec<Namespace>,
|
|
||||||
pub(crate) genus: Genus,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
|
||||||
pub(crate) struct Namespace {
|
|
||||||
pub(crate) name: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
|
||||||
pub(crate) enum Genus {
|
|
||||||
Function {
|
|
||||||
name: String,
|
|
||||||
inputs: Vec<NamedType>,
|
|
||||||
output: Type,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
|
||||||
pub(crate) struct NamedType {
|
|
||||||
pub(crate) name: String,
|
|
||||||
pub(crate) base: Type,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
|
||||||
pub(crate) enum Type {
|
|
||||||
String,
|
|
||||||
Void,
|
|
||||||
}
|
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
//! This module contains the not type checked types.
|
||||||
|
//! These are generated on the first pass of the parser, to be later converted into the checked
|
||||||
|
//! ones.
|
||||||
|
|
||||||
|
use crate::lexing::Token;
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct CommandSpec {
|
||||||
|
pub declarations: Vec<Declaration>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct Declaration {
|
||||||
|
pub namespace: Vec<Token>, // Will later be turned into Namespace
|
||||||
|
pub genus: Genus,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Declaration {
|
||||||
|
pub fn new_function(function: Function, namespace: Vec<Token>) -> Self {
|
||||||
|
Declaration { namespace, genus: Genus::Function(function) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct Namespace {
|
||||||
|
pub name: Token, // Will later become an Identifier
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub enum Genus {
|
||||||
|
/// Not actually a genus, but used in parsing to accommodate multiple errors
|
||||||
|
Dummy,
|
||||||
|
/// A function
|
||||||
|
Function(Function),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct Function {
|
||||||
|
pub identifier: Token, // Will later become an Identifier
|
||||||
|
pub inputs: Vec<FunctionInput>,
|
||||||
|
pub output: Option<Token>, // Will later become an Type
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct FunctionInput {
|
||||||
|
pub name: Token, // Will later become an Identifier
|
||||||
|
pub r#type: Token, // Will later become an Type
|
||||||
|
}
|
|
@ -1,9 +1,191 @@
|
||||||
|
use core::fmt;
|
||||||
|
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
use crate::lexing::error::SpannedLexingError;
|
use crate::lexing::{error::SpannedLexingError, TokenSpan};
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum TrixyError {
|
pub enum TrixyError {
|
||||||
#[error(transparent)]
|
#[error(transparent)]
|
||||||
Parsing(#[from] SpannedLexingError),
|
Parsing(#[from] SpannedLexingError),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The context of an Error.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct ErrorContext {
|
||||||
|
/// The span of the error in the source file
|
||||||
|
pub span: TokenSpan,
|
||||||
|
/// The span of the error in the context line relative to the context line
|
||||||
|
pub contexted_span: TokenSpan,
|
||||||
|
/// The line above the error
|
||||||
|
pub line_above: String,
|
||||||
|
/// The line below the error
|
||||||
|
pub line_below: String,
|
||||||
|
/// The line in which the error occurred
|
||||||
|
pub line: String,
|
||||||
|
/// The line number of the main error line
|
||||||
|
pub line_number: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ErrorContext {
|
||||||
|
pub fn from_span(span: TokenSpan, original_file: &str) -> Self {
|
||||||
|
let line_number = original_file
|
||||||
|
.chars()
|
||||||
|
.take(span.start)
|
||||||
|
.filter(|a| a == &'\n')
|
||||||
|
.count()
|
||||||
|
// This is here, as we are missing one newline with the method above
|
||||||
|
+ 1;
|
||||||
|
|
||||||
|
let lines: Vec<_> = original_file.lines().collect();
|
||||||
|
|
||||||
|
let line = (*lines
|
||||||
|
.get(line_number - 1)
|
||||||
|
.expect("This should work, as have *at least* one (index = 0) line"))
|
||||||
|
.to_owned();
|
||||||
|
|
||||||
|
let contexted_span = {
|
||||||
|
let matched_line: Vec<_> = original_file.match_indices(&line).collect();
|
||||||
|
let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place");
|
||||||
|
debug_assert_eq!(matched_line, &&line);
|
||||||
|
TokenSpan {
|
||||||
|
start: span.start - index,
|
||||||
|
end: span.end - index,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let line_above;
|
||||||
|
if line_number == 0 {
|
||||||
|
// We only have one line, so no line above
|
||||||
|
line_above = "".to_owned();
|
||||||
|
} else {
|
||||||
|
line_above = (*lines
|
||||||
|
.get((line_number - 1) - 1)
|
||||||
|
.expect("We checked that this should work"))
|
||||||
|
.to_owned();
|
||||||
|
}
|
||||||
|
|
||||||
|
let line_below;
|
||||||
|
if lines.len() - 1 > line_number {
|
||||||
|
// We have a line after the current line
|
||||||
|
line_below = (*lines
|
||||||
|
.get((line_number + 1) - 1)
|
||||||
|
.expect("We checked that this should work"))
|
||||||
|
.to_owned();
|
||||||
|
} else {
|
||||||
|
line_below = "".to_owned();
|
||||||
|
}
|
||||||
|
|
||||||
|
Self {
|
||||||
|
span,
|
||||||
|
contexted_span,
|
||||||
|
line_above,
|
||||||
|
line_below,
|
||||||
|
line,
|
||||||
|
line_number,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_index(start: usize, orginal_file: &str) -> Self {
|
||||||
|
let span = TokenSpan {
|
||||||
|
start,
|
||||||
|
end: start,
|
||||||
|
};
|
||||||
|
Self::from_span(span, orginal_file)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_error_line(&self, source_error: &str) -> String {
|
||||||
|
// deconstruct the structure
|
||||||
|
let ErrorContext {
|
||||||
|
contexted_span,
|
||||||
|
line_number,
|
||||||
|
..
|
||||||
|
} = self;
|
||||||
|
|
||||||
|
let mut output = String::new();
|
||||||
|
output.push_str("\x1b[92;1m");
|
||||||
|
|
||||||
|
// pad to accommodate the line number printing.
|
||||||
|
// 32 -> needs two spaces padding to print it
|
||||||
|
line_number.to_string().chars().for_each(|_| {
|
||||||
|
output.push(' ');
|
||||||
|
});
|
||||||
|
|
||||||
|
// pad to the beginning of the error
|
||||||
|
for _ in 0..contexted_span.start {
|
||||||
|
output.push(' ');
|
||||||
|
}
|
||||||
|
|
||||||
|
// push the error markers
|
||||||
|
for _ in contexted_span.start..contexted_span.end {
|
||||||
|
output.push('^');
|
||||||
|
}
|
||||||
|
|
||||||
|
// // pad until end of line
|
||||||
|
// for _ in contexted_span.end..(line.len() - 1) {
|
||||||
|
// output.push('-');
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// additional space to avoid having to end with a '-'
|
||||||
|
output.push(' ');
|
||||||
|
|
||||||
|
output.push_str("help: ");
|
||||||
|
|
||||||
|
output.push_str(source_error);
|
||||||
|
output.push_str("\x1b[0m");
|
||||||
|
output
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait AdditionalHelp {
|
||||||
|
fn additional_help(&self) -> String;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait ErrorContextDisplay: fmt::Display {
|
||||||
|
type Error;
|
||||||
|
|
||||||
|
fn error_fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
|
||||||
|
where
|
||||||
|
<Self as ErrorContextDisplay>::Error: std::fmt::Display + AdditionalHelp,
|
||||||
|
{
|
||||||
|
let error_line = self
|
||||||
|
.context()
|
||||||
|
.get_error_line(&self.source().additional_help());
|
||||||
|
|
||||||
|
writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", self.source())?;
|
||||||
|
|
||||||
|
if !self.line_above().is_empty() {
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
"\x1b[32;1m{} |\x1b[0m {}",
|
||||||
|
self.line_number() - 1,
|
||||||
|
self.line_above()
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
"\x1b[36;1m{} |\x1b[0m {}",
|
||||||
|
self.line_number(),
|
||||||
|
self.line()
|
||||||
|
)?;
|
||||||
|
writeln!(f, " {}", error_line)?;
|
||||||
|
if !self.line_below().is_empty() {
|
||||||
|
writeln!(
|
||||||
|
f,
|
||||||
|
"\x1b[32;1m{} |\x1b[0m {}",
|
||||||
|
self.line_number() + 1,
|
||||||
|
self.line_below()
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
write!(f, "")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// getters
|
||||||
|
fn context(&self) -> &ErrorContext;
|
||||||
|
fn source(&self) -> &Self::Error;
|
||||||
|
fn line_number(&self) -> usize;
|
||||||
|
fn line_above(&self) -> &str;
|
||||||
|
fn line_below(&self) -> &str;
|
||||||
|
fn line(&self) -> &str;
|
||||||
|
}
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
use std::{error::Error, fmt::Display};
|
use std::{error::Error, fmt::Display};
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
|
|
||||||
|
use crate::error::{AdditionalHelp, ErrorContext, ErrorContextDisplay};
|
||||||
|
|
||||||
#[derive(Error, Debug)]
|
#[derive(Error, Debug)]
|
||||||
pub enum LexingError {
|
pub enum LexingError {
|
||||||
#[error("No matches were found")]
|
#[error("No matches were found")]
|
||||||
|
@ -13,88 +15,61 @@ pub enum LexingError {
|
||||||
ExpectedArrow,
|
ExpectedArrow,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl AdditionalHelp for LexingError {
|
||||||
|
fn additional_help(& self) -> String {
|
||||||
|
let out = match self {
|
||||||
|
LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
|
||||||
|
LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
|
||||||
|
LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(),
|
||||||
|
LexingError::UnknownCharacter(char) => {
|
||||||
|
format!("This char: `{char}`; is not a valid token")
|
||||||
|
},
|
||||||
|
};
|
||||||
|
out
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum SpannedLexingError {
|
pub struct SpannedLexingError {
|
||||||
Error {
|
pub source: LexingError,
|
||||||
source: LexingError,
|
pub context: ErrorContext,
|
||||||
/// The starting char index of the error in the source file
|
|
||||||
start: usize,
|
|
||||||
/// The starting char index of the error in the context line
|
|
||||||
contexted_start: usize,
|
|
||||||
/// The line above the error
|
|
||||||
line_above: String,
|
|
||||||
/// The line below the error
|
|
||||||
line_below: String,
|
|
||||||
/// The line in which the error occurred
|
|
||||||
line: String,
|
|
||||||
/// The line number of the main error line
|
|
||||||
line_number: usize,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Error for SpannedLexingError {
|
impl Error for SpannedLexingError {
|
||||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||||
let Self::Error { source, .. } = self;
|
Some(&self.source)
|
||||||
Some(source)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ErrorContextDisplay for SpannedLexingError {
|
||||||
|
type Error = LexingError;
|
||||||
|
|
||||||
|
fn context(&self) -> &crate::error::ErrorContext {
|
||||||
|
&self.context
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_number(&self) -> usize {
|
||||||
|
self.context.line_number
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_above(&self) -> &str {
|
||||||
|
&self.context.line_above
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_below(&self) -> &str {
|
||||||
|
&self.context.line_below
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line(&self) -> &str {
|
||||||
|
&self.context.line
|
||||||
|
}
|
||||||
|
|
||||||
|
fn source(&self) -> &<SpannedLexingError as ErrorContextDisplay>::Error {
|
||||||
|
&self.source
|
||||||
|
}
|
||||||
|
}
|
||||||
impl Display for SpannedLexingError {
|
impl Display for SpannedLexingError {
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
let Self::Error {
|
self.error_fmt(f)
|
||||||
source,
|
|
||||||
line_above,
|
|
||||||
line_below,
|
|
||||||
line,
|
|
||||||
line_number,
|
|
||||||
contexted_start,
|
|
||||||
..
|
|
||||||
} = self;
|
|
||||||
let error_line = {
|
|
||||||
let mut output = String::new();
|
|
||||||
output.push_str("\x1b[92;1m");
|
|
||||||
for _ in 0..(*contexted_start) {
|
|
||||||
output.push(' ');
|
|
||||||
}
|
|
||||||
line_number.to_string().chars().for_each(|_| {
|
|
||||||
output.push(' ');
|
|
||||||
});
|
|
||||||
output.push('^');
|
|
||||||
for _ in *contexted_start..(line.len() - 1) {
|
|
||||||
output.push('-');
|
|
||||||
}
|
|
||||||
output.push(' ');
|
|
||||||
let appandig_str = match source {
|
|
||||||
LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
|
|
||||||
LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
|
|
||||||
LexingError::UnknownCharacter(char) => format!("This char: `{char}`; is not a valid token"),
|
|
||||||
LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(),
|
|
||||||
};
|
|
||||||
output.push_str(&appandig_str);
|
|
||||||
output.push_str("\x1b[0m");
|
|
||||||
output
|
|
||||||
};
|
|
||||||
|
|
||||||
writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", source)?;
|
|
||||||
if !line_above.is_empty() {
|
|
||||||
writeln!(
|
|
||||||
f,
|
|
||||||
"\x1b[32;1m{} |\x1b[0m {}",
|
|
||||||
line_number - 1,
|
|
||||||
line_above
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
writeln!(f, "\x1b[36;1m{} |\x1b[0m {}", line_number, line)?;
|
|
||||||
writeln!(f, " {}", error_line)?;
|
|
||||||
if !line_below.is_empty() {
|
|
||||||
writeln!(
|
|
||||||
f,
|
|
||||||
"\x1b[32;1m{} |\x1b[0m {}",
|
|
||||||
line_number + 1,
|
|
||||||
line_below
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
write!(f, "")
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,3 +1,5 @@
|
||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
use self::{error::SpannedLexingError, tokenizer::Tokenizer};
|
use self::{error::SpannedLexingError, tokenizer::Tokenizer};
|
||||||
|
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
@ -8,7 +10,7 @@ mod test;
|
||||||
|
|
||||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||||
pub struct TokenStream {
|
pub struct TokenStream {
|
||||||
original_file: String,
|
pub original_file: String,
|
||||||
tokens: Vec<Token>,
|
tokens: Vec<Token>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -31,34 +33,83 @@ impl TokenStream {
|
||||||
original_file: src.to_owned(),
|
original_file: src.to_owned(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Get a token by index
|
||||||
|
pub fn get(&self, index: usize) -> Option<&Token> {
|
||||||
|
self.tokens.get(index)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a reference to the uppermost token, without modifying the token list
|
||||||
|
pub fn peek(&self) -> &Token {
|
||||||
|
self.tokens.last().expect("This should not be emtpy")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Remove to the uppermost token
|
||||||
|
pub fn pop(&mut self) -> Token {
|
||||||
|
self.tokens.pop().expect("This should not be emtpy")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Reverses the underlying tokes vector
|
||||||
|
/// This is facilitates using the pop and peek methods to parse the tokens from the beginning,
|
||||||
|
/// not the end
|
||||||
|
pub fn reverse(&mut self) {
|
||||||
|
self.tokens.reverse()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the TokenStream is empty.
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.tokens.is_empty()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A token span is recorded in chars starting from the beginning of the file:
|
/// A token span is recorded in chars starting from the beginning of the file:
|
||||||
/// A token span like this, for example:
|
/// A token span like this, for example:
|
||||||
/// ```no_run
|
/// ```dont_run
|
||||||
|
///# use trixy_lang_parser::lexing::TokenSpan;
|
||||||
/// TokenSpan {
|
/// TokenSpan {
|
||||||
/// start: 20,
|
/// start: 20,
|
||||||
/// end: 23,
|
/// end: 23,
|
||||||
/// }
|
/// }
|
||||||
/// ```
|
/// ```
|
||||||
/// signals, that the token starts at the 20th char in the source file and ends on the 23rd.
|
/// signals, that the token starts at the 20th char in the source file and ends on the 23rd.
|
||||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
|
||||||
pub struct TokenSpan {
|
pub struct TokenSpan {
|
||||||
start: usize,
|
|
||||||
/// The start of the token span
|
/// The start of the token span
|
||||||
end: usize,
|
pub start: usize,
|
||||||
|
/// The end of the token span
|
||||||
|
pub end: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A Token
|
/// A Token
|
||||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone)]
|
||||||
pub struct Token {
|
pub struct Token {
|
||||||
/// The token's original location in the source file
|
/// The token's original location in the source file
|
||||||
span: TokenSpan,
|
pub span: TokenSpan,
|
||||||
kind: TokenKind,
|
pub kind: TokenKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Token {
|
||||||
|
/// Return the TokenKind of a token
|
||||||
|
pub fn kind(&self) -> &TokenKind {
|
||||||
|
&self.kind
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the TokenSpan of a token
|
||||||
|
pub fn span(&self) -> &TokenSpan {
|
||||||
|
&self.span
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Get a dummy token, this is intended for error handling
|
||||||
|
pub fn get_dummy() -> Token {
|
||||||
|
Self {
|
||||||
|
span: TokenSpan { start: 0, end: 0 },
|
||||||
|
kind: TokenKind::Dummy,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Possibly kinds of tokens
|
/// Possibly kinds of tokens
|
||||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||||
pub enum TokenKind {
|
pub enum TokenKind {
|
||||||
Keyword(Keyword),
|
Keyword(Keyword),
|
||||||
Identifier(String),
|
Identifier(String),
|
||||||
|
@ -68,12 +119,49 @@ pub enum TokenKind {
|
||||||
Arrow,
|
Arrow,
|
||||||
BraceOpen,
|
BraceOpen,
|
||||||
BraceClose,
|
BraceClose,
|
||||||
ParenthesisOpen,
|
ParenOpen,
|
||||||
ParenthesisClose,
|
ParenClose,
|
||||||
|
/// This is not a real TokenKind, but only used for error handling
|
||||||
|
Dummy,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenKind {
|
||||||
|
pub fn same_kind(&self, other: &TokenKind) -> bool {
|
||||||
|
if let TokenKind::Identifier(_) = self {
|
||||||
|
if let TokenKind::Identifier(_) = other {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self == other
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for TokenKind {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
TokenKind::Keyword(word) => write!(f, "KEYWORD({})", word),
|
||||||
|
TokenKind::Identifier(ident) => {
|
||||||
|
if ident == "" {
|
||||||
|
write!(f, "IDENTIFIER")
|
||||||
|
} else {
|
||||||
|
write!(f, "IDENTIFIER({})", ident)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TokenKind::Colon => f.write_str("COLON"),
|
||||||
|
TokenKind::Semicolon => f.write_str("SEMICOLON"),
|
||||||
|
TokenKind::Comma => f.write_str("COMMA"),
|
||||||
|
TokenKind::Arrow => f.write_str("ARROW"),
|
||||||
|
TokenKind::BraceOpen => f.write_str("BRACEOPEN"),
|
||||||
|
TokenKind::BraceClose => f.write_str("BRACECLOSE"),
|
||||||
|
TokenKind::ParenOpen => f.write_str("PARENOPEN"),
|
||||||
|
TokenKind::ParenClose => f.write_str("PARENCLOSE"),
|
||||||
|
TokenKind::Dummy => f.write_str("DUMMY"),
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Keywords used in the language
|
/// Keywords used in the language
|
||||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
|
||||||
pub enum Keyword {
|
pub enum Keyword {
|
||||||
/// Start a namespace declaration
|
/// Start a namespace declaration
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
|
@ -82,3 +170,85 @@ pub enum Keyword {
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
r#fn,
|
r#fn,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Display for Keyword {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
match self {
|
||||||
|
Keyword::nasp => f.write_str("nasp"),
|
||||||
|
Keyword::r#fn => f.write_str("fn"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Shorthand macro for generating a token from *anything* which can be
|
||||||
|
/// converted into a `TokenKind`, or any of the `TokenKind` variants.
|
||||||
|
///
|
||||||
|
/// # Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// use trixy_lang_parser::token;
|
||||||
|
/// # fn main() {
|
||||||
|
/// token![nasp];
|
||||||
|
/// token![;];
|
||||||
|
/// token![Arrow];
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! token {
|
||||||
|
[Semicolon] => { $crate::lexing::TokenKind::Semicolon };
|
||||||
|
[;] => { $crate::lexing::TokenKind::Semicolon };
|
||||||
|
[Colon] => { $crate::lexing::TokenKind::Colon };
|
||||||
|
[:] => { $crate::lexing::TokenKind::Colon };
|
||||||
|
[Comma] => { $crate::lexing::TokenKind::Comma };
|
||||||
|
[,] => { $crate::lexing::TokenKind::Comma };
|
||||||
|
[Arrow] => { $crate::lexing::TokenKind::Arrow };
|
||||||
|
[->] => { $crate::lexing::TokenKind::Arrow };
|
||||||
|
[BraceOpen] => { $crate::lexing::TokenKind::BraceOpen };
|
||||||
|
// [{] => { $crate::lexing::TokenKind::BraceOpen };
|
||||||
|
[BraceClose] => { $crate::lexing::TokenKind::BraceClose };
|
||||||
|
// [}] => { $crate::lexing::TokenKind::BraceClose };
|
||||||
|
[ParenOpen] => { $crate::lexing::TokenKind::ParenOpen };
|
||||||
|
// [(] => { $crate::lexing::TokenKind::ParenthesisOpen };
|
||||||
|
[ParenClose] => { $crate::lexing::TokenKind::ParenClose };
|
||||||
|
// [)] => { $crate::lexing::TokenKind::ParenthesisClose };
|
||||||
|
|
||||||
|
[nasp] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::nasp) };
|
||||||
|
[fn] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#fn) };
|
||||||
|
|
||||||
|
// This is only works for checking for a identifier
|
||||||
|
// see the `same_kind` method on TokenKind
|
||||||
|
[Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
|
||||||
|
[Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::TokenKind;
|
||||||
|
use crate::token;
|
||||||
|
|
||||||
|
macro_rules! token_macro_test {
|
||||||
|
($name:ident, $from:tt, => $to:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let got: TokenKind = token![$from];
|
||||||
|
let should_be = $to;
|
||||||
|
|
||||||
|
assert_eq!(got, should_be);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
($name:ident, $from:tt, => $to:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let got: TokenKind = token![$from];
|
||||||
|
let should_be = $to;
|
||||||
|
|
||||||
|
assert_eq!(got, should_be);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
token_macro_test!(tok_expands_to_arrow, ->, => TokenKind::Arrow);
|
||||||
|
token_macro_test!(tok_expands_to_semicolon, Semicolon, => TokenKind::Semicolon);
|
||||||
|
token_macro_test!(tok_expands_to_nasp, nasp, => TokenKind::Keyword(crate::lexing::Keyword::nasp));
|
||||||
|
token_macro_test!(tok_expands_to_fn, fn, => TokenKind::Keyword(crate::lexing::Keyword::r#fn));
|
||||||
|
}
|
||||||
|
|
|
@ -36,7 +36,7 @@ nasp commands {
|
||||||
},
|
},
|
||||||
Token {
|
Token {
|
||||||
span: TokenSpan { start: 30, end: 31 },
|
span: TokenSpan { start: 30, end: 31 },
|
||||||
kind: TokenKind::ParenthesisOpen,
|
kind: TokenKind::ParenOpen,
|
||||||
},
|
},
|
||||||
Token {
|
Token {
|
||||||
span: TokenSpan { start: 31, end: 36 },
|
span: TokenSpan { start: 31, end: 36 },
|
||||||
|
@ -52,7 +52,7 @@ nasp commands {
|
||||||
},
|
},
|
||||||
Token {
|
Token {
|
||||||
span: TokenSpan { start: 44, end: 45 },
|
span: TokenSpan { start: 44, end: 45 },
|
||||||
kind: TokenKind::ParenthesisClose,
|
kind: TokenKind::ParenClose,
|
||||||
},
|
},
|
||||||
Token {
|
Token {
|
||||||
span: TokenSpan { start: 46, end: 48 },
|
span: TokenSpan { start: 46, end: 48 },
|
||||||
|
|
|
@ -1,6 +1,9 @@
|
||||||
// This code is heavily inspired by: https://michael-f-bryan.github.io/static-analyser-in-rust/book/lex.html
|
// This code is heavily inspired by: https://michael-f-bryan.github.io/static-analyser-in-rust/book/lex.html
|
||||||
|
|
||||||
use crate::lexing::{Keyword, TokenSpan};
|
use crate::{
|
||||||
|
error::ErrorContext,
|
||||||
|
lexing::{Keyword, TokenSpan},
|
||||||
|
};
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
error::{LexingError, SpannedLexingError},
|
error::{LexingError, SpannedLexingError},
|
||||||
|
@ -29,61 +32,11 @@ impl<'a> Tokenizer<'a> {
|
||||||
let start = self.current_index;
|
let start = self.current_index;
|
||||||
|
|
||||||
let (token_kind, index) = self.get_next_tokenkind().map_err(|e| {
|
let (token_kind, index) = self.get_next_tokenkind().map_err(|e| {
|
||||||
let (line_above, line, line_below, contexted_start, line_number) = {
|
let context = ErrorContext::from_index(start, self.original_text);
|
||||||
let line_number = self
|
|
||||||
.original_text
|
|
||||||
.chars()
|
|
||||||
.take(start)
|
|
||||||
.filter(|a| a == &'\n')
|
|
||||||
.count();
|
|
||||||
let lines: Vec<_> = self.original_text.lines().collect();
|
|
||||||
|
|
||||||
let line = (*lines
|
SpannedLexingError { source: e, context }
|
||||||
.get(line_number)
|
|
||||||
.expect("This should work, as have *at least* one (index = 0) line"))
|
|
||||||
.to_owned();
|
|
||||||
|
|
||||||
let contexted_start = {
|
|
||||||
let matched_line: Vec<_> = self.original_text.match_indices(&line).collect();
|
|
||||||
let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place");
|
|
||||||
debug_assert_eq!(matched_line, &&line);
|
|
||||||
start - index
|
|
||||||
};
|
|
||||||
|
|
||||||
let line_above;
|
|
||||||
if line_number == 0 {
|
|
||||||
// We only have one line, so no line above
|
|
||||||
line_above = "".to_owned();
|
|
||||||
} else {
|
|
||||||
line_above = (*lines
|
|
||||||
.get(line_number - 1)
|
|
||||||
.expect("We checked that this should work"))
|
|
||||||
.to_owned();
|
|
||||||
}
|
|
||||||
|
|
||||||
let line_below;
|
|
||||||
if lines.len() - 1 > line_number {
|
|
||||||
// We have a line after the current line
|
|
||||||
line_below = (*lines
|
|
||||||
.get(line_number + 1)
|
|
||||||
.expect("We checked that this should work"))
|
|
||||||
.to_owned();
|
|
||||||
} else {
|
|
||||||
line_below = "".to_owned();
|
|
||||||
}
|
|
||||||
|
|
||||||
(line_above, line, line_below, contexted_start, line_number)
|
|
||||||
};
|
|
||||||
SpannedLexingError::Error {
|
|
||||||
source: e,
|
|
||||||
start,
|
|
||||||
contexted_start,
|
|
||||||
line_above,
|
|
||||||
line_below,
|
|
||||||
line_number,
|
|
||||||
line,
|
|
||||||
}
|
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
self.chomp(index); // end - start
|
self.chomp(index); // end - start
|
||||||
let end = self.current_index;
|
let end = self.current_index;
|
||||||
Ok(Some(Token {
|
Ok(Some(Token {
|
||||||
|
@ -100,8 +53,8 @@ impl<'a> Tokenizer<'a> {
|
||||||
};
|
};
|
||||||
|
|
||||||
let (tok, length) = match next {
|
let (tok, length) = match next {
|
||||||
'(' => (TokenKind::ParenthesisOpen, 1),
|
'(' => (TokenKind::ParenOpen, 1),
|
||||||
')' => (TokenKind::ParenthesisClose, 1),
|
')' => (TokenKind::ParenClose, 1),
|
||||||
'{' => (TokenKind::BraceOpen, 1),
|
'{' => (TokenKind::BraceOpen, 1),
|
||||||
'}' => (TokenKind::BraceClose, 1),
|
'}' => (TokenKind::BraceClose, 1),
|
||||||
':' => (TokenKind::Colon, 1),
|
':' => (TokenKind::Colon, 1),
|
||||||
|
|
|
@ -2,11 +2,12 @@ use error::TrixyError;
|
||||||
|
|
||||||
use crate::lexing::TokenStream;
|
use crate::lexing::TokenStream;
|
||||||
|
|
||||||
use self::command_spec::CommandSpec;
|
use self::command_spec::unchecked::CommandSpec;
|
||||||
|
|
||||||
mod command_spec;
|
mod command_spec;
|
||||||
pub mod error;
|
pub mod error;
|
||||||
pub mod lexing;
|
pub mod lexing;
|
||||||
|
pub mod parsing;
|
||||||
|
|
||||||
pub fn parse_trixy_lang(input: &str) -> Result<CommandSpec, TrixyError> {
|
pub fn parse_trixy_lang(input: &str) -> Result<CommandSpec, TrixyError> {
|
||||||
let input_tokens = TokenStream::lex(input)?;
|
let input_tokens = TokenStream::lex(input)?;
|
||||||
|
@ -14,45 +15,45 @@ pub fn parse_trixy_lang(input: &str) -> Result<CommandSpec, TrixyError> {
|
||||||
todo!()
|
todo!()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
// #[cfg(test)]
|
||||||
mod test {
|
// mod test {
|
||||||
use crate::{
|
// use crate::{
|
||||||
command_spec::{CommandSpec, Declaration, Genus, NamedType, Namespace, Type},
|
// command_spec::unchecked::{CommandSpec, Declaration, Genus, Namespace},
|
||||||
parse_trixy_lang,
|
// parse_trixy_lang,
|
||||||
};
|
// };
|
||||||
|
//
|
||||||
#[test]
|
// #[test]
|
||||||
fn test_function_with_namespace() {
|
// fn test_function_with_namespace() {
|
||||||
let expected = parse_trixy_lang(
|
// let expected = parse_trixy_lang(
|
||||||
"
|
// "
|
||||||
nasp commands {
|
// nasp commands {
|
||||||
fn say_something(name_to_greet: String, what_to_say: String) -> String;
|
// fn say_something(name_to_greet: String, what_to_say: String) -> String;
|
||||||
}
|
// }
|
||||||
",
|
// ",
|
||||||
)
|
// )
|
||||||
.unwrap();
|
// .unwrap();
|
||||||
let correct: CommandSpec = {
|
// let correct: CommandSpec = {
|
||||||
let declarations = vec![Declaration {
|
// let declarations = vec![Declaration {
|
||||||
namespace: vec![Namespace {
|
// namespace: vec![Namespace {
|
||||||
name: "commands".to_owned(),
|
// name: "commands".to_owned(),
|
||||||
}],
|
// }],
|
||||||
genus: Genus::Function {
|
// genus: Genus::Function {
|
||||||
name: "say_something".to_owned(),
|
// name: "say_something".to_owned(),
|
||||||
inputs: vec![
|
// inputs: vec![
|
||||||
NamedType {
|
// NamedType {
|
||||||
name: "name_to_greet".to_owned(),
|
// name: "name_to_greet".to_owned(),
|
||||||
base: Type::String,
|
// base: Type::String,
|
||||||
},
|
// },
|
||||||
NamedType {
|
// NamedType {
|
||||||
name: "what_to_say".to_owned(),
|
// name: "what_to_say".to_owned(),
|
||||||
base: Type::String,
|
// base: Type::String,
|
||||||
},
|
// },
|
||||||
],
|
// ],
|
||||||
output: Type::String,
|
// output: Type::String,
|
||||||
},
|
// },
|
||||||
}];
|
// }];
|
||||||
CommandSpec { declarations }
|
// CommandSpec { declarations }
|
||||||
};
|
// };
|
||||||
assert_eq!(expected, correct);
|
// assert_eq!(expected, correct);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
|
@ -23,6 +23,11 @@ pub enum Command {
|
||||||
/// The file containing the trixy code to tokenize
|
/// The file containing the trixy code to tokenize
|
||||||
file: PathBuf,
|
file: PathBuf,
|
||||||
},
|
},
|
||||||
|
Parse {
|
||||||
|
#[clap(value_parser)]
|
||||||
|
/// The file containing the trixy code to parse
|
||||||
|
file: PathBuf,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn main() {
|
pub fn main() {
|
||||||
|
@ -34,12 +39,34 @@ pub fn main() {
|
||||||
let input_tokens = match TokenStream::lex(&input) {
|
let input_tokens = match TokenStream::lex(&input) {
|
||||||
Ok(err) => err,
|
Ok(err) => err,
|
||||||
Err(ok) => {
|
Err(ok) => {
|
||||||
println!("{}", ok);
|
eprintln!("{}", ok);
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
println!("{:#?}", input_tokens);
|
println!("{:#?}", input_tokens);
|
||||||
}
|
}
|
||||||
|
Command::Parse { file } => {
|
||||||
|
let input = fs::read_to_string(file).unwrap();
|
||||||
|
|
||||||
|
let input_tokens = match TokenStream::lex(&input) {
|
||||||
|
Ok(ok) => ok,
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("Error while tokenizing:");
|
||||||
|
eprintln!("{}", err);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let parsed = match input_tokens.parse_unchecked() {
|
||||||
|
Ok(ok) => ok,
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("Error while doing the first (unchecked) parsing run:");
|
||||||
|
eprintln!("{}", err);
|
||||||
|
exit(1)
|
||||||
|
}
|
||||||
|
};
|
||||||
|
println!("{:#?}", parsed);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
use std::{error::Error, fmt::Display};
|
||||||
|
use thiserror::Error;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
error::{AdditionalHelp, ErrorContext, ErrorContextDisplay},
|
||||||
|
lexing::{TokenKind, TokenSpan},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Error, Debug)]
|
||||||
|
pub enum ParsingError {
|
||||||
|
#[error("Expected '{expected}' but received '{actual}'")]
|
||||||
|
ExpectedDifferentToken {
|
||||||
|
expected: TokenKind,
|
||||||
|
actual: TokenKind,
|
||||||
|
span: TokenSpan,
|
||||||
|
},
|
||||||
|
|
||||||
|
#[error("Expected a Keyword to start a new declaration, but found: '{actual}'")]
|
||||||
|
ExpectedKeyword { actual: TokenKind, span: TokenSpan },
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ParsingError {
|
||||||
|
pub fn get_span(&self) -> TokenSpan {
|
||||||
|
match self {
|
||||||
|
ParsingError::ExpectedDifferentToken { span, .. } => *span,
|
||||||
|
ParsingError::ExpectedKeyword { span, .. } => *span,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl AdditionalHelp for ParsingError {
|
||||||
|
fn additional_help(&self) -> String {
|
||||||
|
match self {
|
||||||
|
ParsingError::ExpectedDifferentToken {
|
||||||
|
expected,
|
||||||
|
actual,
|
||||||
|
..
|
||||||
|
} => format!(
|
||||||
|
"I expected a '{}' here, but you put a '{}' there!",
|
||||||
|
expected, actual
|
||||||
|
),
|
||||||
|
ParsingError::ExpectedKeyword { actual, .. } => format!(
|
||||||
|
"I expected a keyword (that is something like 'fn' or 'nasp') but you put a '{}' there!",
|
||||||
|
actual),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct SpannedParsingError {
|
||||||
|
pub source: ParsingError,
|
||||||
|
pub context: ErrorContext,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for SpannedParsingError {
|
||||||
|
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||||
|
Some(&self.source)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for SpannedParsingError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
self.error_fmt(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ErrorContextDisplay for SpannedParsingError {
|
||||||
|
type Error = ParsingError;
|
||||||
|
|
||||||
|
fn context(&self) -> &crate::error::ErrorContext {
|
||||||
|
&self.context
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_number(&self) -> usize {
|
||||||
|
self.context.line_number
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_above(&self) -> &str {
|
||||||
|
&self.context.line_above
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line_below(&self) -> &str {
|
||||||
|
&self.context.line_below
|
||||||
|
}
|
||||||
|
|
||||||
|
fn line(&self) -> &str {
|
||||||
|
&self.context.line
|
||||||
|
}
|
||||||
|
|
||||||
|
fn source(&self) -> &<SpannedParsingError as ErrorContextDisplay>::Error {
|
||||||
|
&self.source
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,4 @@
|
||||||
|
mod error;
|
||||||
|
mod unchecked;
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test;
|
|
@ -0,0 +1,88 @@
|
||||||
|
use crate::{
|
||||||
|
command_spec::unchecked::{CommandSpec, Declaration, Function, FunctionInput, Genus},
|
||||||
|
lexing::{Token, TokenKind, TokenSpan, TokenStream},
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::error::ParsingError;
|
||||||
|
|
||||||
|
use pretty_assertions::assert_eq;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_failing() {
|
||||||
|
let input = "
|
||||||
|
fn print(message: CommandTransferValue);
|
||||||
|
|
||||||
|
nasp trinitrix { {}
|
||||||
|
fn hi honner(name: String) -> String; ;
|
||||||
|
}
|
||||||
|
|
||||||
|
";
|
||||||
|
let parsed = TokenStream::lex(input).unwrap().parse_unchecked();
|
||||||
|
let err = parsed.unwrap_err().source;
|
||||||
|
match err {
|
||||||
|
ParsingError::ExpectedDifferentToken { .. } => panic!("Wrong error"),
|
||||||
|
ParsingError::ExpectedKeyword { .. } => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_full() {
|
||||||
|
let input = "fn print(message: CommandTransferValue);
|
||||||
|
|
||||||
|
nasp trinitrix {
|
||||||
|
fn hi(name: String) -> String;
|
||||||
|
}
|
||||||
|
";
|
||||||
|
let parsed = TokenStream::lex(input).unwrap().parse_unchecked().unwrap();
|
||||||
|
let expected = CommandSpec {
|
||||||
|
declarations: vec![
|
||||||
|
Declaration {
|
||||||
|
namespace: vec![],
|
||||||
|
genus: Genus::Function(Function {
|
||||||
|
identifier: Token {
|
||||||
|
span: TokenSpan { start: 3, end: 8 },
|
||||||
|
kind: TokenKind::Identifier("print".to_owned()),
|
||||||
|
},
|
||||||
|
inputs: vec![FunctionInput {
|
||||||
|
name: Token {
|
||||||
|
span: TokenSpan { start: 9, end: 16 },
|
||||||
|
kind: TokenKind::Identifier("message".to_owned()),
|
||||||
|
},
|
||||||
|
r#type: Token {
|
||||||
|
span: TokenSpan { start: 18, end: 38 },
|
||||||
|
kind: TokenKind::Identifier("CommandTransferValue".to_owned()),
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
output: None,
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
Declaration {
|
||||||
|
namespace: vec![Token {
|
||||||
|
span: TokenSpan { start: 47, end: 56 },
|
||||||
|
kind: TokenKind::Identifier("trinitrix".to_owned()),
|
||||||
|
}],
|
||||||
|
genus: Genus::Function(Function {
|
||||||
|
identifier: Token {
|
||||||
|
span: TokenSpan { start: 66, end: 68 },
|
||||||
|
kind: TokenKind::Identifier("hi".to_owned()),
|
||||||
|
},
|
||||||
|
inputs: vec![FunctionInput {
|
||||||
|
name: Token {
|
||||||
|
span: TokenSpan { start: 69, end: 73 },
|
||||||
|
kind: TokenKind::Identifier("name".to_owned()),
|
||||||
|
},
|
||||||
|
r#type: Token {
|
||||||
|
span: TokenSpan { start: 75, end: 81 },
|
||||||
|
kind: TokenKind::Identifier("String".to_owned()),
|
||||||
|
},
|
||||||
|
}],
|
||||||
|
output: Some(Token {
|
||||||
|
span: TokenSpan { start: 86, end: 92 },
|
||||||
|
kind: TokenKind::Identifier("String".to_owned()),
|
||||||
|
}),
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
],
|
||||||
|
};
|
||||||
|
assert_eq!(parsed, expected);
|
||||||
|
}
|
|
@ -0,0 +1,167 @@
|
||||||
|
use crate::{
|
||||||
|
command_spec::unchecked::{CommandSpec, Declaration, Function, FunctionInput},
|
||||||
|
error::ErrorContext,
|
||||||
|
lexing::{Token, TokenKind, TokenStream},
|
||||||
|
token,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::error::{ParsingError, SpannedParsingError};
|
||||||
|
|
||||||
|
impl TokenStream {
|
||||||
|
pub fn parse_unchecked(self) -> Result<CommandSpec, SpannedParsingError> {
|
||||||
|
let mut parser = Parser::new(self);
|
||||||
|
parser.parse()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(super) struct Parser {
|
||||||
|
token_stream: TokenStream,
|
||||||
|
current_namespaces: Vec<Token>, // This should in the second pass turn into Identifiers
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Parser {
|
||||||
|
fn new(mut token_stream: TokenStream) -> Self {
|
||||||
|
token_stream.reverse();
|
||||||
|
Self {
|
||||||
|
token_stream,
|
||||||
|
current_namespaces: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse(&mut self) -> Result<CommandSpec, SpannedParsingError> {
|
||||||
|
let mut declarations = vec![];
|
||||||
|
while !self.token_stream.is_empty() {
|
||||||
|
let mut next = self.parse_next().map_err(|err| {
|
||||||
|
let span = err.get_span();
|
||||||
|
SpannedParsingError {
|
||||||
|
source: err,
|
||||||
|
context: ErrorContext::from_span(span, &self.token_stream.original_file),
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
|
||||||
|
declarations.append(&mut next);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(CommandSpec { declarations })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_next(&mut self) -> Result<Vec<Declaration>, ParsingError> {
|
||||||
|
match self.peek().kind() {
|
||||||
|
token![nasp] => Ok(self.parse_namespace()?),
|
||||||
|
token![fn] => Ok(vec![Declaration::new_function(
|
||||||
|
self.parse_function()?,
|
||||||
|
self.current_namespaces.clone(),
|
||||||
|
)]),
|
||||||
|
_ => {
|
||||||
|
let err = ParsingError::ExpectedKeyword {
|
||||||
|
span: *self.peek().span(),
|
||||||
|
actual: self.peek().kind().clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
return Err(err);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_namespace(&mut self) -> Result<Vec<Declaration>, ParsingError> {
|
||||||
|
self.expect(token![nasp])?;
|
||||||
|
let namespace_name = self.expect(token![Ident])?;
|
||||||
|
self.current_namespaces.push(namespace_name);
|
||||||
|
self.expect(token![BraceOpen])?;
|
||||||
|
|
||||||
|
let mut declarations = vec![];
|
||||||
|
while !self.expect_peek(token![BraceClose]) {
|
||||||
|
declarations.append(&mut self.parse_next()?);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect(token![BraceClose])?;
|
||||||
|
self.current_namespaces.pop();
|
||||||
|
Ok(declarations)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_function(&mut self) -> Result<Function, ParsingError> {
|
||||||
|
self.expect(token![fn])?;
|
||||||
|
let name = self.expect(token![Ident])?;
|
||||||
|
self.expect(token![ParenOpen])?;
|
||||||
|
let mut inputs = vec![];
|
||||||
|
|
||||||
|
while self.expect_peek(token![Ident]) {
|
||||||
|
let input_name = self.expect(token![Ident])?;
|
||||||
|
self.expect(token![Colon])?;
|
||||||
|
let input_type = self.expect(token![Ident])?;
|
||||||
|
inputs.push(FunctionInput {
|
||||||
|
name: input_name,
|
||||||
|
r#type: input_type,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
self.expect(token![ParenClose])?;
|
||||||
|
let mut output_type = None;
|
||||||
|
if self.expect_peek(token![->]) {
|
||||||
|
self.expect(token![->])?;
|
||||||
|
output_type = Some(self.expect(token![Ident])?);
|
||||||
|
}
|
||||||
|
self.expect(token![;])?;
|
||||||
|
Ok(Function {
|
||||||
|
identifier: name,
|
||||||
|
inputs,
|
||||||
|
output: output_type,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Expect a token in the next input position:
|
||||||
|
/// For example:
|
||||||
|
///
|
||||||
|
/// ```dont_run
|
||||||
|
/// use trixy_lang_parser::{
|
||||||
|
/// lexing::{Keyword, TokenKind, TokenStream},
|
||||||
|
/// parsing::unchecked::Parser,
|
||||||
|
/// token,
|
||||||
|
/// };
|
||||||
|
///
|
||||||
|
/// # fn main() {
|
||||||
|
/// let token_stream = TokenStream::lex("nasp {}").unwrap();
|
||||||
|
/// let parser = Parser::new(token_stream);
|
||||||
|
/// assert_eq!(parser.expect(token![nasp]).unwrap(), TokenKind::Keyword(Keyword::nasp));
|
||||||
|
/// assert_eq!(parser.expect(token![BraceOpen]).unwrap(), TokenKind::BraceOpen);
|
||||||
|
/// assert_eq!(parser.expect(token![BraceClose]).unwrap(), TokenKind::BraceClose);
|
||||||
|
/// assert!(parser.expect(token![BraceClose]).is_err());
|
||||||
|
/// # }
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
pub(super) fn expect(&mut self, token: TokenKind) -> Result<Token, ParsingError> {
|
||||||
|
let actual_token = self.peek();
|
||||||
|
if actual_token.kind().same_kind(&token) {
|
||||||
|
Ok(self.pop())
|
||||||
|
} else {
|
||||||
|
let err = ParsingError::ExpectedDifferentToken {
|
||||||
|
expected: token,
|
||||||
|
actual: actual_token.kind().clone(),
|
||||||
|
span: *actual_token.span(),
|
||||||
|
};
|
||||||
|
|
||||||
|
Err(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Check if the next token is of the specified TokenKind.
|
||||||
|
/// Does not alter the token_stream
|
||||||
|
fn expect_peek(&self, token: TokenKind) -> bool {
|
||||||
|
let actual_token = self.peek();
|
||||||
|
if actual_token.kind().same_kind(&token) {
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Looks at the next token without removing it
|
||||||
|
fn peek(&self) -> &Token {
|
||||||
|
self.token_stream.peek()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Removes the next token
|
||||||
|
fn pop(&mut self) -> Token {
|
||||||
|
self.token_stream.pop()
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue