feat(trixy-lang_parser): Add first parser pass
This commit is contained in:
parent
74efd3eda6
commit
3a65c33b15
|
@ -2,5 +2,5 @@
|
|||
/target
|
||||
/result
|
||||
|
||||
# lua_macros is a library
|
||||
# This crate is a library
|
||||
Cargo.lock
|
||||
|
|
|
@ -0,0 +1,6 @@
|
|||
# trixy-lang_parser
|
||||
This crate contains a parser (and lexer) for the Trixy language.
|
||||
The corresponding grammar is in the grammar file [here](./docs/grammar.ebnf) encoded in [Extended Backus-Naur Form](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form).
|
||||
|
||||
## Docs
|
||||
Run `./generate_docs` to turn the grammar file into railroad diagrams.
|
|
@ -0,0 +1,17 @@
|
|||
# (*
|
||||
# Trixy is fully whitespace independent, this means that you can
|
||||
# interleave whitespace in the definitions.
|
||||
# The same applies to comments:
|
||||
# - Line comments (`// \n`) and
|
||||
# - Block comments (`/* */`).
|
||||
# *)
|
||||
|
||||
CommandSpec = { Function | Namespace } ;
|
||||
Function = "fn" Identifier "(" {Identifier ":" Type} ")" [ "->" Type ] ";" ;
|
||||
Namespace = "nasp" Identifier "{" {Function | Namespace} "}" ;
|
||||
Type = "String" | "Integer" ; # (* This corresponds to the CommandTransferValue *)
|
||||
Identifier = CHARACTER { NUMBER | CHARACTER } ;
|
||||
|
||||
# (*
|
||||
# vim: ft=ebnf
|
||||
# *)
|
Binary file not shown.
|
@ -0,0 +1,9 @@
|
|||
fn print(message: CommandTransferValue);
|
||||
|
||||
nasp trinitrix { {}
|
||||
fn hi honner(name: String) -> String; ;
|
||||
}
|
||||
|
||||
|
||||
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
|
||||
// vim: syntax=rust
|
|
@ -0,0 +1,126 @@
|
|||
/// Prints to the output, with a newline.
|
||||
// HACK(@soispha): The stdlib Lua `print()` function has stdout as output hardcoded,
|
||||
// redirecting stdout seems too much like a hack thus we are just redefining the print function
|
||||
// to output to a controlled output. <2023-09-09>
|
||||
fn print(input: CommandTransferValue);
|
||||
|
||||
nasp trinitrix {
|
||||
/// Language specific functions, which mirror the `trinitrix.api` namespace.
|
||||
/// That is, if you have to choose between a `std` and a `api` function choose the `std`
|
||||
/// one as it will most likely be more high-level and easier to use (as it isn't abstracted
|
||||
/// over multiple languages). Feel free to drop down to the lower level api, if you feel
|
||||
/// like that more, it should be as stable and user-oriented as the `std` functions
|
||||
nasp std {}
|
||||
|
||||
/// Debug only functions, these are effectively useless
|
||||
nasp debug {
|
||||
/// Greets the user
|
||||
fn greet(input: String) -> String;
|
||||
|
||||
/// Returns a table of greeted users
|
||||
fn greet_multiple() -> Table;
|
||||
}
|
||||
|
||||
/// General API to change stuff in Trinitrix
|
||||
nasp api {
|
||||
/// Closes the application
|
||||
fn exit();
|
||||
|
||||
/// Send a message to the current room
|
||||
/// The send message is interpreted literally.
|
||||
fn room_message_send(msg: String);
|
||||
|
||||
/// Open the help pages at the first occurrence of
|
||||
/// the input string if it is Some, otherwise open
|
||||
/// the help pages at the start
|
||||
fn help(input: Option<String>);
|
||||
|
||||
// Register a function to be used with the Trinitrix API
|
||||
// (This function is actually implemented in the std namespace)
|
||||
/* fn register_function(function: RawFunction); */
|
||||
|
||||
/// Function that change the UI, or UI state
|
||||
nasp ui {
|
||||
/// Shows the command line
|
||||
fn command_line_show();
|
||||
|
||||
/// Hides the command line
|
||||
fn command_line_hide();
|
||||
|
||||
/// Go to the next plane
|
||||
fn cycle_planes();
|
||||
/// Go to the previous plane
|
||||
fn cycle_planes_rev();
|
||||
|
||||
/// Sets the current app mode to Normal / navigation mode
|
||||
fn set_mode_normal();
|
||||
/// Sets the current app mode to Insert / editing mode
|
||||
fn set_mode_insert();
|
||||
}
|
||||
|
||||
/// Manipulate keymappings, the mode is specified as a String build up of all mode
|
||||
/// the keymapping should be active in. The mapping works as follows:
|
||||
/// n => normal Mode
|
||||
/// c => command Mode
|
||||
/// i => insert Mode
|
||||
///
|
||||
/// The key works in a similar matter, specifying the required keypresses to trigger the
|
||||
/// callback. For example "aba" for require the user to press "a" then "b" then "a" again
|
||||
/// to trigger the mapping. Special characters are encoded as follows:
|
||||
/// "<C-a>ba" => "Ctrl+a" then "b" then "a"
|
||||
/// "<S-a>" => "A" or "Shift+a"
|
||||
/// "A" => "A"
|
||||
/// "<M-a> " => "Alt+a" (<A-a>) or "Meta+a"(<M-a>) (most terminals can't really differentiate between these characters)
|
||||
/// "a<C-b><C-a>" => "a" then "Ctrl+b" then "Ctrl+a" (also works for Shift, Alt and Super)
|
||||
/// "<CSM-b>" => "Ctrl+Shift+Alt+b" (the ordering doesn't matter)
|
||||
/// "a " => "a" then a literal space (" ")
|
||||
/// "å🙂" => "å" then "🙂" (full Unicode support!)
|
||||
/// "<ESC>" => escape key
|
||||
/// "<F3>" => F3 key
|
||||
/// "<BACKSPACE>" => backspace key (and so forth)
|
||||
/// "<DASH>" => a literal "-"
|
||||
/// "<ANGULAR_BRACKET_OPEN>" or "<ABO>" => a literal "<"
|
||||
/// "<ANGULAR_BRACKET_CLOSE>" or "<ABC>" => a literal ">"
|
||||
///
|
||||
/// The callback MUST be registered first by calling
|
||||
/// `trinitrix.api.register_function()` the returned value can than be used to
|
||||
/// set the keymap.
|
||||
nasp keymaps {
|
||||
/// Add a new keymapping
|
||||
fn add(mode: String, key: String, callback: Function);
|
||||
|
||||
/// Remove a keymapping
|
||||
///
|
||||
/// Does nothing, if the keymapping doesn't exists
|
||||
fn remove((/* mode: */ String, /* key: */ String));
|
||||
|
||||
/// List declared keymappings
|
||||
fn get(mode: String);
|
||||
}
|
||||
|
||||
/// Functions only used internally within Trinitrix
|
||||
nasp raw {
|
||||
/// Send an error to the default error output
|
||||
fn raise_error(input: String);
|
||||
|
||||
/// Send output to the default output
|
||||
/// This is mainly used to display the final
|
||||
/// output of evaluated lua commands.
|
||||
fn display_output(input: String);
|
||||
|
||||
/// Input a character without checking for possible keymaps
|
||||
/// If the current state does not expect input, this character is ignored
|
||||
/// The encoding is the same as in the `trinitrix.api.keymaps` commands
|
||||
fn send_input_unprocessed(input: String);
|
||||
|
||||
/// This namespace is used to store some command specific data (like functions, as
|
||||
/// ensuring memory locations stay allocated in garbage collected language is hard)
|
||||
///
|
||||
/// Treat it as an implementation detail
|
||||
nasp __private {}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
|
||||
// vim: syntax=rust
|
|
@ -3,7 +3,9 @@ fn print(message: CommandTransferValue);
|
|||
nasp trinitrix {
|
||||
fn hi(name: String) -> String;
|
||||
}
|
||||
namespace commands { >-
|
||||
|
||||
nasp trinitrix {
|
||||
fn ho(name: String) -> String;
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
|
||||
|
||||
ebnf2pdf "./docs/grammar.ebnf"
|
||||
mv out.pdf ./docs/grammar.pdf
|
||||
|
||||
|
||||
# vim: ft=sh
|
|
@ -0,0 +1,58 @@
|
|||
//! This module contains the already type checked types.
|
||||
//!
|
||||
//!
|
||||
|
||||
use crate::lexing::{Keyword, TokenKind};
|
||||
pub enum PrimitiveTypes {
|
||||
String,
|
||||
/// Nothing
|
||||
Void,
|
||||
}
|
||||
|
||||
impl From<TokenKind> for Identifier {
|
||||
fn from(value: TokenKind) -> Self {
|
||||
match value {
|
||||
TokenKind::Identifier(ident) => Identifier(ident),
|
||||
TokenKind::Keyword(_)
|
||||
| TokenKind::Colon
|
||||
| TokenKind::Semicolon
|
||||
| TokenKind::Comma
|
||||
| TokenKind::Arrow
|
||||
| TokenKind::BraceOpen
|
||||
| TokenKind::BraceClose
|
||||
| TokenKind::ParenOpen
|
||||
| TokenKind::Dummy
|
||||
| TokenKind::ParenClose => {
|
||||
panic!("Tried to convert a non Identifier TokenKind to a Identefier. This is a bug")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An Identifier
|
||||
/// These include
|
||||
/// - Variable names
|
||||
/// - Function names
|
||||
/// - Namespace names
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Identifier(String);
|
||||
|
||||
impl From<TokenKind> for Keyword {
|
||||
fn from(value: TokenKind) -> Self {
|
||||
match value {
|
||||
TokenKind::Keyword(keyword) => keyword,
|
||||
TokenKind::Identifier(_)
|
||||
| TokenKind::Colon
|
||||
| TokenKind::Semicolon
|
||||
| TokenKind::Comma
|
||||
| TokenKind::Arrow
|
||||
| TokenKind::BraceOpen
|
||||
| TokenKind::BraceClose
|
||||
| TokenKind::ParenOpen
|
||||
| TokenKind::Dummy
|
||||
| TokenKind::ParenClose => {
|
||||
panic!("Tried to convert a non Keyword TokenKind to a Keyword. This is a bug")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,36 +1,2 @@
|
|||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct CommandSpec {
|
||||
pub(crate) declarations: Vec<Declaration>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct Declaration {
|
||||
pub(crate) namespace: Vec<Namespace>,
|
||||
pub(crate) genus: Genus,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct Namespace {
|
||||
pub(crate) name: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) enum Genus {
|
||||
Function {
|
||||
name: String,
|
||||
inputs: Vec<NamedType>,
|
||||
output: Type,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct NamedType {
|
||||
pub(crate) name: String,
|
||||
pub(crate) base: Type,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) enum Type {
|
||||
String,
|
||||
Void,
|
||||
}
|
||||
pub mod checked;
|
||||
pub mod unchecked;
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
//! This module contains the not type checked types.
|
||||
//! These are generated on the first pass of the parser, to be later converted into the checked
|
||||
//! ones.
|
||||
|
||||
use crate::lexing::Token;
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct CommandSpec {
|
||||
pub declarations: Vec<Declaration>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Declaration {
|
||||
pub namespace: Vec<Token>, // Will later be turned into Namespace
|
||||
pub genus: Genus,
|
||||
}
|
||||
|
||||
impl Declaration {
|
||||
pub fn new_function(function: Function, namespace: Vec<Token>) -> Self {
|
||||
Declaration { namespace, genus: Genus::Function(function) }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Namespace {
|
||||
pub name: Token, // Will later become an Identifier
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub enum Genus {
|
||||
/// Not actually a genus, but used in parsing to accommodate multiple errors
|
||||
Dummy,
|
||||
/// A function
|
||||
Function(Function),
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct Function {
|
||||
pub identifier: Token, // Will later become an Identifier
|
||||
pub inputs: Vec<FunctionInput>,
|
||||
pub output: Option<Token>, // Will later become an Type
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct FunctionInput {
|
||||
pub name: Token, // Will later become an Identifier
|
||||
pub r#type: Token, // Will later become an Type
|
||||
}
|
|
@ -1,9 +1,191 @@
|
|||
use core::fmt;
|
||||
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::lexing::error::SpannedLexingError;
|
||||
use crate::lexing::{error::SpannedLexingError, TokenSpan};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum TrixyError {
|
||||
#[error(transparent)]
|
||||
Parsing(#[from] SpannedLexingError),
|
||||
}
|
||||
|
||||
/// The context of an Error.
|
||||
#[derive(Debug)]
|
||||
pub struct ErrorContext {
|
||||
/// The span of the error in the source file
|
||||
pub span: TokenSpan,
|
||||
/// The span of the error in the context line relative to the context line
|
||||
pub contexted_span: TokenSpan,
|
||||
/// The line above the error
|
||||
pub line_above: String,
|
||||
/// The line below the error
|
||||
pub line_below: String,
|
||||
/// The line in which the error occurred
|
||||
pub line: String,
|
||||
/// The line number of the main error line
|
||||
pub line_number: usize,
|
||||
}
|
||||
|
||||
impl ErrorContext {
|
||||
pub fn from_span(span: TokenSpan, original_file: &str) -> Self {
|
||||
let line_number = original_file
|
||||
.chars()
|
||||
.take(span.start)
|
||||
.filter(|a| a == &'\n')
|
||||
.count()
|
||||
// This is here, as we are missing one newline with the method above
|
||||
+ 1;
|
||||
|
||||
let lines: Vec<_> = original_file.lines().collect();
|
||||
|
||||
let line = (*lines
|
||||
.get(line_number - 1)
|
||||
.expect("This should work, as have *at least* one (index = 0) line"))
|
||||
.to_owned();
|
||||
|
||||
let contexted_span = {
|
||||
let matched_line: Vec<_> = original_file.match_indices(&line).collect();
|
||||
let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place");
|
||||
debug_assert_eq!(matched_line, &&line);
|
||||
TokenSpan {
|
||||
start: span.start - index,
|
||||
end: span.end - index,
|
||||
}
|
||||
};
|
||||
|
||||
let line_above;
|
||||
if line_number == 0 {
|
||||
// We only have one line, so no line above
|
||||
line_above = "".to_owned();
|
||||
} else {
|
||||
line_above = (*lines
|
||||
.get((line_number - 1) - 1)
|
||||
.expect("We checked that this should work"))
|
||||
.to_owned();
|
||||
}
|
||||
|
||||
let line_below;
|
||||
if lines.len() - 1 > line_number {
|
||||
// We have a line after the current line
|
||||
line_below = (*lines
|
||||
.get((line_number + 1) - 1)
|
||||
.expect("We checked that this should work"))
|
||||
.to_owned();
|
||||
} else {
|
||||
line_below = "".to_owned();
|
||||
}
|
||||
|
||||
Self {
|
||||
span,
|
||||
contexted_span,
|
||||
line_above,
|
||||
line_below,
|
||||
line,
|
||||
line_number,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_index(start: usize, orginal_file: &str) -> Self {
|
||||
let span = TokenSpan {
|
||||
start,
|
||||
end: start,
|
||||
};
|
||||
Self::from_span(span, orginal_file)
|
||||
}
|
||||
|
||||
pub fn get_error_line(&self, source_error: &str) -> String {
|
||||
// deconstruct the structure
|
||||
let ErrorContext {
|
||||
contexted_span,
|
||||
line_number,
|
||||
..
|
||||
} = self;
|
||||
|
||||
let mut output = String::new();
|
||||
output.push_str("\x1b[92;1m");
|
||||
|
||||
// pad to accommodate the line number printing.
|
||||
// 32 -> needs two spaces padding to print it
|
||||
line_number.to_string().chars().for_each(|_| {
|
||||
output.push(' ');
|
||||
});
|
||||
|
||||
// pad to the beginning of the error
|
||||
for _ in 0..contexted_span.start {
|
||||
output.push(' ');
|
||||
}
|
||||
|
||||
// push the error markers
|
||||
for _ in contexted_span.start..contexted_span.end {
|
||||
output.push('^');
|
||||
}
|
||||
|
||||
// // pad until end of line
|
||||
// for _ in contexted_span.end..(line.len() - 1) {
|
||||
// output.push('-');
|
||||
// }
|
||||
//
|
||||
// additional space to avoid having to end with a '-'
|
||||
output.push(' ');
|
||||
|
||||
output.push_str("help: ");
|
||||
|
||||
output.push_str(source_error);
|
||||
output.push_str("\x1b[0m");
|
||||
output
|
||||
}
|
||||
}
|
||||
|
||||
pub trait AdditionalHelp {
|
||||
fn additional_help(&self) -> String;
|
||||
}
|
||||
|
||||
pub trait ErrorContextDisplay: fmt::Display {
|
||||
type Error;
|
||||
|
||||
fn error_fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result
|
||||
where
|
||||
<Self as ErrorContextDisplay>::Error: std::fmt::Display + AdditionalHelp,
|
||||
{
|
||||
let error_line = self
|
||||
.context()
|
||||
.get_error_line(&self.source().additional_help());
|
||||
|
||||
writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", self.source())?;
|
||||
|
||||
if !self.line_above().is_empty() {
|
||||
writeln!(
|
||||
f,
|
||||
"\x1b[32;1m{} |\x1b[0m {}",
|
||||
self.line_number() - 1,
|
||||
self.line_above()
|
||||
)?;
|
||||
}
|
||||
writeln!(
|
||||
f,
|
||||
"\x1b[36;1m{} |\x1b[0m {}",
|
||||
self.line_number(),
|
||||
self.line()
|
||||
)?;
|
||||
writeln!(f, " {}", error_line)?;
|
||||
if !self.line_below().is_empty() {
|
||||
writeln!(
|
||||
f,
|
||||
"\x1b[32;1m{} |\x1b[0m {}",
|
||||
self.line_number() + 1,
|
||||
self.line_below()
|
||||
)
|
||||
} else {
|
||||
write!(f, "")
|
||||
}
|
||||
}
|
||||
|
||||
// getters
|
||||
fn context(&self) -> &ErrorContext;
|
||||
fn source(&self) -> &Self::Error;
|
||||
fn line_number(&self) -> usize;
|
||||
fn line_above(&self) -> &str;
|
||||
fn line_below(&self) -> &str;
|
||||
fn line(&self) -> &str;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
use std::{error::Error, fmt::Display};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::error::{AdditionalHelp, ErrorContext, ErrorContextDisplay};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum LexingError {
|
||||
#[error("No matches were found")]
|
||||
|
@ -13,88 +15,61 @@ pub enum LexingError {
|
|||
ExpectedArrow,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SpannedLexingError {
|
||||
Error {
|
||||
source: LexingError,
|
||||
/// The starting char index of the error in the source file
|
||||
start: usize,
|
||||
/// The starting char index of the error in the context line
|
||||
contexted_start: usize,
|
||||
/// The line above the error
|
||||
line_above: String,
|
||||
/// The line below the error
|
||||
line_below: String,
|
||||
/// The line in which the error occurred
|
||||
line: String,
|
||||
/// The line number of the main error line
|
||||
line_number: usize,
|
||||
impl AdditionalHelp for LexingError {
|
||||
fn additional_help(& self) -> String {
|
||||
let out = match self {
|
||||
LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
|
||||
LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
|
||||
LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(),
|
||||
LexingError::UnknownCharacter(char) => {
|
||||
format!("This char: `{char}`; is not a valid token")
|
||||
},
|
||||
};
|
||||
out
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SpannedLexingError {
|
||||
pub source: LexingError,
|
||||
pub context: ErrorContext,
|
||||
}
|
||||
|
||||
impl Error for SpannedLexingError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
let Self::Error { source, .. } = self;
|
||||
Some(source)
|
||||
Some(&self.source)
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorContextDisplay for SpannedLexingError {
|
||||
type Error = LexingError;
|
||||
|
||||
fn context(&self) -> &crate::error::ErrorContext {
|
||||
&self.context
|
||||
}
|
||||
|
||||
fn line_number(&self) -> usize {
|
||||
self.context.line_number
|
||||
}
|
||||
|
||||
fn line_above(&self) -> &str {
|
||||
&self.context.line_above
|
||||
}
|
||||
|
||||
fn line_below(&self) -> &str {
|
||||
&self.context.line_below
|
||||
}
|
||||
|
||||
fn line(&self) -> &str {
|
||||
&self.context.line
|
||||
}
|
||||
|
||||
fn source(&self) -> &<SpannedLexingError as ErrorContextDisplay>::Error {
|
||||
&self.source
|
||||
}
|
||||
}
|
||||
impl Display for SpannedLexingError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Self::Error {
|
||||
source,
|
||||
line_above,
|
||||
line_below,
|
||||
line,
|
||||
line_number,
|
||||
contexted_start,
|
||||
..
|
||||
} = self;
|
||||
let error_line = {
|
||||
let mut output = String::new();
|
||||
output.push_str("\x1b[92;1m");
|
||||
for _ in 0..(*contexted_start) {
|
||||
output.push(' ');
|
||||
}
|
||||
line_number.to_string().chars().for_each(|_| {
|
||||
output.push(' ');
|
||||
});
|
||||
output.push('^');
|
||||
for _ in *contexted_start..(line.len() - 1) {
|
||||
output.push('-');
|
||||
}
|
||||
output.push(' ');
|
||||
let appandig_str = match source {
|
||||
LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
|
||||
LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
|
||||
LexingError::UnknownCharacter(char) => format!("This char: `{char}`; is not a valid token"),
|
||||
LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(),
|
||||
};
|
||||
output.push_str(&appandig_str);
|
||||
output.push_str("\x1b[0m");
|
||||
output
|
||||
};
|
||||
|
||||
writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", source)?;
|
||||
if !line_above.is_empty() {
|
||||
writeln!(
|
||||
f,
|
||||
"\x1b[32;1m{} |\x1b[0m {}",
|
||||
line_number - 1,
|
||||
line_above
|
||||
)?;
|
||||
}
|
||||
writeln!(f, "\x1b[36;1m{} |\x1b[0m {}", line_number, line)?;
|
||||
writeln!(f, " {}", error_line)?;
|
||||
if !line_below.is_empty() {
|
||||
writeln!(
|
||||
f,
|
||||
"\x1b[32;1m{} |\x1b[0m {}",
|
||||
line_number + 1,
|
||||
line_below
|
||||
)
|
||||
} else {
|
||||
write!(f, "")
|
||||
}
|
||||
self.error_fmt(f)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use self::{error::SpannedLexingError, tokenizer::Tokenizer};
|
||||
|
||||
pub mod error;
|
||||
|
@ -8,7 +10,7 @@ mod test;
|
|||
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
pub struct TokenStream {
|
||||
original_file: String,
|
||||
pub original_file: String,
|
||||
tokens: Vec<Token>,
|
||||
}
|
||||
|
||||
|
@ -31,34 +33,83 @@ impl TokenStream {
|
|||
original_file: src.to_owned(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Get a token by index
|
||||
pub fn get(&self, index: usize) -> Option<&Token> {
|
||||
self.tokens.get(index)
|
||||
}
|
||||
|
||||
/// Get a reference to the uppermost token, without modifying the token list
|
||||
pub fn peek(&self) -> &Token {
|
||||
self.tokens.last().expect("This should not be emtpy")
|
||||
}
|
||||
|
||||
/// Remove to the uppermost token
|
||||
pub fn pop(&mut self) -> Token {
|
||||
self.tokens.pop().expect("This should not be emtpy")
|
||||
}
|
||||
|
||||
/// Reverses the underlying tokes vector
|
||||
/// This is facilitates using the pop and peek methods to parse the tokens from the beginning,
|
||||
/// not the end
|
||||
pub fn reverse(&mut self) {
|
||||
self.tokens.reverse()
|
||||
}
|
||||
|
||||
/// Check if the TokenStream is empty.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.tokens.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// A token span is recorded in chars starting from the beginning of the file:
|
||||
/// A token span like this, for example:
|
||||
/// ```no_run
|
||||
/// ```dont_run
|
||||
///# use trixy_lang_parser::lexing::TokenSpan;
|
||||
/// TokenSpan {
|
||||
/// start: 20,
|
||||
/// end: 23,
|
||||
/// }
|
||||
/// ```
|
||||
/// signals, that the token starts at the 20th char in the source file and ends on the 23rd.
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
|
||||
pub struct TokenSpan {
|
||||
start: usize,
|
||||
/// The start of the token span
|
||||
end: usize,
|
||||
pub start: usize,
|
||||
/// The end of the token span
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
/// A Token
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone)]
|
||||
pub struct Token {
|
||||
/// The token's original location in the source file
|
||||
span: TokenSpan,
|
||||
kind: TokenKind,
|
||||
pub span: TokenSpan,
|
||||
pub kind: TokenKind,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
/// Return the TokenKind of a token
|
||||
pub fn kind(&self) -> &TokenKind {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
/// Return the TokenSpan of a token
|
||||
pub fn span(&self) -> &TokenSpan {
|
||||
&self.span
|
||||
}
|
||||
|
||||
/// Get a dummy token, this is intended for error handling
|
||||
pub fn get_dummy() -> Token {
|
||||
Self {
|
||||
span: TokenSpan { start: 0, end: 0 },
|
||||
kind: TokenKind::Dummy,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Possibly kinds of tokens
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub enum TokenKind {
|
||||
Keyword(Keyword),
|
||||
Identifier(String),
|
||||
|
@ -68,12 +119,49 @@ pub enum TokenKind {
|
|||
Arrow,
|
||||
BraceOpen,
|
||||
BraceClose,
|
||||
ParenthesisOpen,
|
||||
ParenthesisClose,
|
||||
ParenOpen,
|
||||
ParenClose,
|
||||
/// This is not a real TokenKind, but only used for error handling
|
||||
Dummy,
|
||||
}
|
||||
|
||||
impl TokenKind {
|
||||
pub fn same_kind(&self, other: &TokenKind) -> bool {
|
||||
if let TokenKind::Identifier(_) = self {
|
||||
if let TokenKind::Identifier(_) = other {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
self == other
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for TokenKind {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
TokenKind::Keyword(word) => write!(f, "KEYWORD({})", word),
|
||||
TokenKind::Identifier(ident) => {
|
||||
if ident == "" {
|
||||
write!(f, "IDENTIFIER")
|
||||
} else {
|
||||
write!(f, "IDENTIFIER({})", ident)
|
||||
}
|
||||
}
|
||||
TokenKind::Colon => f.write_str("COLON"),
|
||||
TokenKind::Semicolon => f.write_str("SEMICOLON"),
|
||||
TokenKind::Comma => f.write_str("COMMA"),
|
||||
TokenKind::Arrow => f.write_str("ARROW"),
|
||||
TokenKind::BraceOpen => f.write_str("BRACEOPEN"),
|
||||
TokenKind::BraceClose => f.write_str("BRACECLOSE"),
|
||||
TokenKind::ParenOpen => f.write_str("PARENOPEN"),
|
||||
TokenKind::ParenClose => f.write_str("PARENCLOSE"),
|
||||
TokenKind::Dummy => f.write_str("DUMMY"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Keywords used in the language
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)]
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
|
||||
pub enum Keyword {
|
||||
/// Start a namespace declaration
|
||||
#[allow(non_camel_case_types)]
|
||||
|
@ -82,3 +170,85 @@ pub enum Keyword {
|
|||
#[allow(non_camel_case_types)]
|
||||
r#fn,
|
||||
}
|
||||
|
||||
impl Display for Keyword {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Keyword::nasp => f.write_str("nasp"),
|
||||
Keyword::r#fn => f.write_str("fn"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shorthand macro for generating a token from *anything* which can be
|
||||
/// converted into a `TokenKind`, or any of the `TokenKind` variants.
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
/// use trixy_lang_parser::token;
|
||||
/// # fn main() {
|
||||
/// token![nasp];
|
||||
/// token![;];
|
||||
/// token![Arrow];
|
||||
/// # }
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! token {
|
||||
[Semicolon] => { $crate::lexing::TokenKind::Semicolon };
|
||||
[;] => { $crate::lexing::TokenKind::Semicolon };
|
||||
[Colon] => { $crate::lexing::TokenKind::Colon };
|
||||
[:] => { $crate::lexing::TokenKind::Colon };
|
||||
[Comma] => { $crate::lexing::TokenKind::Comma };
|
||||
[,] => { $crate::lexing::TokenKind::Comma };
|
||||
[Arrow] => { $crate::lexing::TokenKind::Arrow };
|
||||
[->] => { $crate::lexing::TokenKind::Arrow };
|
||||
[BraceOpen] => { $crate::lexing::TokenKind::BraceOpen };
|
||||
// [{] => { $crate::lexing::TokenKind::BraceOpen };
|
||||
[BraceClose] => { $crate::lexing::TokenKind::BraceClose };
|
||||
// [}] => { $crate::lexing::TokenKind::BraceClose };
|
||||
[ParenOpen] => { $crate::lexing::TokenKind::ParenOpen };
|
||||
// [(] => { $crate::lexing::TokenKind::ParenthesisOpen };
|
||||
[ParenClose] => { $crate::lexing::TokenKind::ParenClose };
|
||||
// [)] => { $crate::lexing::TokenKind::ParenthesisClose };
|
||||
|
||||
[nasp] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::nasp) };
|
||||
[fn] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#fn) };
|
||||
|
||||
// This is only works for checking for a identifier
|
||||
// see the `same_kind` method on TokenKind
|
||||
[Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
|
||||
[Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::TokenKind;
|
||||
use crate::token;
|
||||
|
||||
macro_rules! token_macro_test {
|
||||
($name:ident, $from:tt, => $to:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let got: TokenKind = token![$from];
|
||||
let should_be = $to;
|
||||
|
||||
assert_eq!(got, should_be);
|
||||
}
|
||||
};
|
||||
($name:ident, $from:tt, => $to:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let got: TokenKind = token![$from];
|
||||
let should_be = $to;
|
||||
|
||||
assert_eq!(got, should_be);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
token_macro_test!(tok_expands_to_arrow, ->, => TokenKind::Arrow);
|
||||
token_macro_test!(tok_expands_to_semicolon, Semicolon, => TokenKind::Semicolon);
|
||||
token_macro_test!(tok_expands_to_nasp, nasp, => TokenKind::Keyword(crate::lexing::Keyword::nasp));
|
||||
token_macro_test!(tok_expands_to_fn, fn, => TokenKind::Keyword(crate::lexing::Keyword::r#fn));
|
||||
}
|
||||
|
|
|
@ -36,7 +36,7 @@ nasp commands {
|
|||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 30, end: 31 },
|
||||
kind: TokenKind::ParenthesisOpen,
|
||||
kind: TokenKind::ParenOpen,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 31, end: 36 },
|
||||
|
@ -52,7 +52,7 @@ nasp commands {
|
|||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 44, end: 45 },
|
||||
kind: TokenKind::ParenthesisClose,
|
||||
kind: TokenKind::ParenClose,
|
||||
},
|
||||
Token {
|
||||
span: TokenSpan { start: 46, end: 48 },
|
||||
|
|
|
@ -1,6 +1,9 @@
|
|||
// This code is heavily inspired by: https://michael-f-bryan.github.io/static-analyser-in-rust/book/lex.html
|
||||
|
||||
use crate::lexing::{Keyword, TokenSpan};
|
||||
use crate::{
|
||||
error::ErrorContext,
|
||||
lexing::{Keyword, TokenSpan},
|
||||
};
|
||||
|
||||
use super::{
|
||||
error::{LexingError, SpannedLexingError},
|
||||
|
@ -29,61 +32,11 @@ impl<'a> Tokenizer<'a> {
|
|||
let start = self.current_index;
|
||||
|
||||
let (token_kind, index) = self.get_next_tokenkind().map_err(|e| {
|
||||
let (line_above, line, line_below, contexted_start, line_number) = {
|
||||
let line_number = self
|
||||
.original_text
|
||||
.chars()
|
||||
.take(start)
|
||||
.filter(|a| a == &'\n')
|
||||
.count();
|
||||
let lines: Vec<_> = self.original_text.lines().collect();
|
||||
let context = ErrorContext::from_index(start, self.original_text);
|
||||
|
||||
let line = (*lines
|
||||
.get(line_number)
|
||||
.expect("This should work, as have *at least* one (index = 0) line"))
|
||||
.to_owned();
|
||||
|
||||
let contexted_start = {
|
||||
let matched_line: Vec<_> = self.original_text.match_indices(&line).collect();
|
||||
let (index, matched_line) = matched_line.get(0).expect("This first index should always match, as we took the line from the string in the first place");
|
||||
debug_assert_eq!(matched_line, &&line);
|
||||
start - index
|
||||
};
|
||||
|
||||
let line_above;
|
||||
if line_number == 0 {
|
||||
// We only have one line, so no line above
|
||||
line_above = "".to_owned();
|
||||
} else {
|
||||
line_above = (*lines
|
||||
.get(line_number - 1)
|
||||
.expect("We checked that this should work"))
|
||||
.to_owned();
|
||||
}
|
||||
|
||||
let line_below;
|
||||
if lines.len() - 1 > line_number {
|
||||
// We have a line after the current line
|
||||
line_below = (*lines
|
||||
.get(line_number + 1)
|
||||
.expect("We checked that this should work"))
|
||||
.to_owned();
|
||||
} else {
|
||||
line_below = "".to_owned();
|
||||
}
|
||||
|
||||
(line_above, line, line_below, contexted_start, line_number)
|
||||
};
|
||||
SpannedLexingError::Error {
|
||||
source: e,
|
||||
start,
|
||||
contexted_start,
|
||||
line_above,
|
||||
line_below,
|
||||
line_number,
|
||||
line,
|
||||
}
|
||||
SpannedLexingError { source: e, context }
|
||||
})?;
|
||||
|
||||
self.chomp(index); // end - start
|
||||
let end = self.current_index;
|
||||
Ok(Some(Token {
|
||||
|
@ -100,8 +53,8 @@ impl<'a> Tokenizer<'a> {
|
|||
};
|
||||
|
||||
let (tok, length) = match next {
|
||||
'(' => (TokenKind::ParenthesisOpen, 1),
|
||||
')' => (TokenKind::ParenthesisClose, 1),
|
||||
'(' => (TokenKind::ParenOpen, 1),
|
||||
')' => (TokenKind::ParenClose, 1),
|
||||
'{' => (TokenKind::BraceOpen, 1),
|
||||
'}' => (TokenKind::BraceClose, 1),
|
||||
':' => (TokenKind::Colon, 1),
|
||||
|
|
|
@ -2,11 +2,12 @@ use error::TrixyError;
|
|||
|
||||
use crate::lexing::TokenStream;
|
||||
|
||||
use self::command_spec::CommandSpec;
|
||||
use self::command_spec::unchecked::CommandSpec;
|
||||
|
||||
mod command_spec;
|
||||
pub mod error;
|
||||
pub mod lexing;
|
||||
pub mod parsing;
|
||||
|
||||
pub fn parse_trixy_lang(input: &str) -> Result<CommandSpec, TrixyError> {
|
||||
let input_tokens = TokenStream::lex(input)?;
|
||||
|
@ -14,45 +15,45 @@ pub fn parse_trixy_lang(input: &str) -> Result<CommandSpec, TrixyError> {
|
|||
todo!()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::{
|
||||
command_spec::{CommandSpec, Declaration, Genus, NamedType, Namespace, Type},
|
||||
parse_trixy_lang,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_function_with_namespace() {
|
||||
let expected = parse_trixy_lang(
|
||||
"
|
||||
nasp commands {
|
||||
fn say_something(name_to_greet: String, what_to_say: String) -> String;
|
||||
}
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
let correct: CommandSpec = {
|
||||
let declarations = vec![Declaration {
|
||||
namespace: vec![Namespace {
|
||||
name: "commands".to_owned(),
|
||||
}],
|
||||
genus: Genus::Function {
|
||||
name: "say_something".to_owned(),
|
||||
inputs: vec![
|
||||
NamedType {
|
||||
name: "name_to_greet".to_owned(),
|
||||
base: Type::String,
|
||||
},
|
||||
NamedType {
|
||||
name: "what_to_say".to_owned(),
|
||||
base: Type::String,
|
||||
},
|
||||
],
|
||||
output: Type::String,
|
||||
},
|
||||
}];
|
||||
CommandSpec { declarations }
|
||||
};
|
||||
assert_eq!(expected, correct);
|
||||
}
|
||||
}
|
||||
// #[cfg(test)]
|
||||
// mod test {
|
||||
// use crate::{
|
||||
// command_spec::unchecked::{CommandSpec, Declaration, Genus, Namespace},
|
||||
// parse_trixy_lang,
|
||||
// };
|
||||
//
|
||||
// #[test]
|
||||
// fn test_function_with_namespace() {
|
||||
// let expected = parse_trixy_lang(
|
||||
// "
|
||||
// nasp commands {
|
||||
// fn say_something(name_to_greet: String, what_to_say: String) -> String;
|
||||
// }
|
||||
// ",
|
||||
// )
|
||||
// .unwrap();
|
||||
// let correct: CommandSpec = {
|
||||
// let declarations = vec![Declaration {
|
||||
// namespace: vec![Namespace {
|
||||
// name: "commands".to_owned(),
|
||||
// }],
|
||||
// genus: Genus::Function {
|
||||
// name: "say_something".to_owned(),
|
||||
// inputs: vec![
|
||||
// NamedType {
|
||||
// name: "name_to_greet".to_owned(),
|
||||
// base: Type::String,
|
||||
// },
|
||||
// NamedType {
|
||||
// name: "what_to_say".to_owned(),
|
||||
// base: Type::String,
|
||||
// },
|
||||
// ],
|
||||
// output: Type::String,
|
||||
// },
|
||||
// }];
|
||||
// CommandSpec { declarations }
|
||||
// };
|
||||
// assert_eq!(expected, correct);
|
||||
// }
|
||||
// }
|
||||
|
|
|
@ -23,6 +23,11 @@ pub enum Command {
|
|||
/// The file containing the trixy code to tokenize
|
||||
file: PathBuf,
|
||||
},
|
||||
Parse {
|
||||
#[clap(value_parser)]
|
||||
/// The file containing the trixy code to parse
|
||||
file: PathBuf,
|
||||
},
|
||||
}
|
||||
|
||||
pub fn main() {
|
||||
|
@ -34,12 +39,34 @@ pub fn main() {
|
|||
let input_tokens = match TokenStream::lex(&input) {
|
||||
Ok(err) => err,
|
||||
Err(ok) => {
|
||||
println!("{}", ok);
|
||||
eprintln!("{}", ok);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
println!("{:#?}", input_tokens);
|
||||
}
|
||||
Command::Parse { file } => {
|
||||
let input = fs::read_to_string(file).unwrap();
|
||||
|
||||
let input_tokens = match TokenStream::lex(&input) {
|
||||
Ok(ok) => ok,
|
||||
Err(err) => {
|
||||
eprintln!("Error while tokenizing:");
|
||||
eprintln!("{}", err);
|
||||
exit(1);
|
||||
}
|
||||
};
|
||||
|
||||
let parsed = match input_tokens.parse_unchecked() {
|
||||
Ok(ok) => ok,
|
||||
Err(err) => {
|
||||
eprintln!("Error while doing the first (unchecked) parsing run:");
|
||||
eprintln!("{}", err);
|
||||
exit(1)
|
||||
}
|
||||
};
|
||||
println!("{:#?}", parsed);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,93 @@
|
|||
use std::{error::Error, fmt::Display};
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::{
|
||||
error::{AdditionalHelp, ErrorContext, ErrorContextDisplay},
|
||||
lexing::{TokenKind, TokenSpan},
|
||||
};
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum ParsingError {
|
||||
#[error("Expected '{expected}' but received '{actual}'")]
|
||||
ExpectedDifferentToken {
|
||||
expected: TokenKind,
|
||||
actual: TokenKind,
|
||||
span: TokenSpan,
|
||||
},
|
||||
|
||||
#[error("Expected a Keyword to start a new declaration, but found: '{actual}'")]
|
||||
ExpectedKeyword { actual: TokenKind, span: TokenSpan },
|
||||
}
|
||||
|
||||
impl ParsingError {
|
||||
pub fn get_span(&self) -> TokenSpan {
|
||||
match self {
|
||||
ParsingError::ExpectedDifferentToken { span, .. } => *span,
|
||||
ParsingError::ExpectedKeyword { span, .. } => *span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl AdditionalHelp for ParsingError {
|
||||
fn additional_help(&self) -> String {
|
||||
match self {
|
||||
ParsingError::ExpectedDifferentToken {
|
||||
expected,
|
||||
actual,
|
||||
..
|
||||
} => format!(
|
||||
"I expected a '{}' here, but you put a '{}' there!",
|
||||
expected, actual
|
||||
),
|
||||
ParsingError::ExpectedKeyword { actual, .. } => format!(
|
||||
"I expected a keyword (that is something like 'fn' or 'nasp') but you put a '{}' there!",
|
||||
actual),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SpannedParsingError {
|
||||
pub source: ParsingError,
|
||||
pub context: ErrorContext,
|
||||
}
|
||||
|
||||
impl Error for SpannedParsingError {
|
||||
fn source(&self) -> Option<&(dyn Error + 'static)> {
|
||||
Some(&self.source)
|
||||
}
|
||||
}
|
||||
|
||||
impl Display for SpannedParsingError {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
self.error_fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl ErrorContextDisplay for SpannedParsingError {
|
||||
type Error = ParsingError;
|
||||
|
||||
fn context(&self) -> &crate::error::ErrorContext {
|
||||
&self.context
|
||||
}
|
||||
|
||||
fn line_number(&self) -> usize {
|
||||
self.context.line_number
|
||||
}
|
||||
|
||||
fn line_above(&self) -> &str {
|
||||
&self.context.line_above
|
||||
}
|
||||
|
||||
fn line_below(&self) -> &str {
|
||||
&self.context.line_below
|
||||
}
|
||||
|
||||
fn line(&self) -> &str {
|
||||
&self.context.line
|
||||
}
|
||||
|
||||
fn source(&self) -> &<SpannedParsingError as ErrorContextDisplay>::Error {
|
||||
&self.source
|
||||
}
|
||||
}
|
|
@ -0,0 +1,4 @@
|
|||
mod error;
|
||||
mod unchecked;
|
||||
#[cfg(test)]
|
||||
mod test;
|
|
@ -0,0 +1,88 @@
|
|||
use crate::{
|
||||
command_spec::unchecked::{CommandSpec, Declaration, Function, FunctionInput, Genus},
|
||||
lexing::{Token, TokenKind, TokenSpan, TokenStream},
|
||||
};
|
||||
|
||||
use super::error::ParsingError;
|
||||
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_failing() {
|
||||
let input = "
|
||||
fn print(message: CommandTransferValue);
|
||||
|
||||
nasp trinitrix { {}
|
||||
fn hi honner(name: String) -> String; ;
|
||||
}
|
||||
|
||||
";
|
||||
let parsed = TokenStream::lex(input).unwrap().parse_unchecked();
|
||||
let err = parsed.unwrap_err().source;
|
||||
match err {
|
||||
ParsingError::ExpectedDifferentToken { .. } => panic!("Wrong error"),
|
||||
ParsingError::ExpectedKeyword { .. } => {}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_full() {
|
||||
let input = "fn print(message: CommandTransferValue);
|
||||
|
||||
nasp trinitrix {
|
||||
fn hi(name: String) -> String;
|
||||
}
|
||||
";
|
||||
let parsed = TokenStream::lex(input).unwrap().parse_unchecked().unwrap();
|
||||
let expected = CommandSpec {
|
||||
declarations: vec![
|
||||
Declaration {
|
||||
namespace: vec![],
|
||||
genus: Genus::Function(Function {
|
||||
identifier: Token {
|
||||
span: TokenSpan { start: 3, end: 8 },
|
||||
kind: TokenKind::Identifier("print".to_owned()),
|
||||
},
|
||||
inputs: vec![FunctionInput {
|
||||
name: Token {
|
||||
span: TokenSpan { start: 9, end: 16 },
|
||||
kind: TokenKind::Identifier("message".to_owned()),
|
||||
},
|
||||
r#type: Token {
|
||||
span: TokenSpan { start: 18, end: 38 },
|
||||
kind: TokenKind::Identifier("CommandTransferValue".to_owned()),
|
||||
},
|
||||
}],
|
||||
output: None,
|
||||
}),
|
||||
},
|
||||
Declaration {
|
||||
namespace: vec![Token {
|
||||
span: TokenSpan { start: 47, end: 56 },
|
||||
kind: TokenKind::Identifier("trinitrix".to_owned()),
|
||||
}],
|
||||
genus: Genus::Function(Function {
|
||||
identifier: Token {
|
||||
span: TokenSpan { start: 66, end: 68 },
|
||||
kind: TokenKind::Identifier("hi".to_owned()),
|
||||
},
|
||||
inputs: vec![FunctionInput {
|
||||
name: Token {
|
||||
span: TokenSpan { start: 69, end: 73 },
|
||||
kind: TokenKind::Identifier("name".to_owned()),
|
||||
},
|
||||
r#type: Token {
|
||||
span: TokenSpan { start: 75, end: 81 },
|
||||
kind: TokenKind::Identifier("String".to_owned()),
|
||||
},
|
||||
}],
|
||||
output: Some(Token {
|
||||
span: TokenSpan { start: 86, end: 92 },
|
||||
kind: TokenKind::Identifier("String".to_owned()),
|
||||
}),
|
||||
}),
|
||||
},
|
||||
],
|
||||
};
|
||||
assert_eq!(parsed, expected);
|
||||
}
|
|
@ -0,0 +1,167 @@
|
|||
use crate::{
|
||||
command_spec::unchecked::{CommandSpec, Declaration, Function, FunctionInput},
|
||||
error::ErrorContext,
|
||||
lexing::{Token, TokenKind, TokenStream},
|
||||
token,
|
||||
};
|
||||
|
||||
use super::error::{ParsingError, SpannedParsingError};
|
||||
|
||||
impl TokenStream {
|
||||
pub fn parse_unchecked(self) -> Result<CommandSpec, SpannedParsingError> {
|
||||
let mut parser = Parser::new(self);
|
||||
parser.parse()
|
||||
}
|
||||
}
|
||||
|
||||
pub(super) struct Parser {
|
||||
token_stream: TokenStream,
|
||||
current_namespaces: Vec<Token>, // This should in the second pass turn into Identifiers
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
fn new(mut token_stream: TokenStream) -> Self {
|
||||
token_stream.reverse();
|
||||
Self {
|
||||
token_stream,
|
||||
current_namespaces: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(&mut self) -> Result<CommandSpec, SpannedParsingError> {
|
||||
let mut declarations = vec![];
|
||||
while !self.token_stream.is_empty() {
|
||||
let mut next = self.parse_next().map_err(|err| {
|
||||
let span = err.get_span();
|
||||
SpannedParsingError {
|
||||
source: err,
|
||||
context: ErrorContext::from_span(span, &self.token_stream.original_file),
|
||||
}
|
||||
})?;
|
||||
|
||||
declarations.append(&mut next);
|
||||
}
|
||||
|
||||
Ok(CommandSpec { declarations })
|
||||
}
|
||||
|
||||
fn parse_next(&mut self) -> Result<Vec<Declaration>, ParsingError> {
|
||||
match self.peek().kind() {
|
||||
token![nasp] => Ok(self.parse_namespace()?),
|
||||
token![fn] => Ok(vec![Declaration::new_function(
|
||||
self.parse_function()?,
|
||||
self.current_namespaces.clone(),
|
||||
)]),
|
||||
_ => {
|
||||
let err = ParsingError::ExpectedKeyword {
|
||||
span: *self.peek().span(),
|
||||
actual: self.peek().kind().clone(),
|
||||
};
|
||||
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_namespace(&mut self) -> Result<Vec<Declaration>, ParsingError> {
|
||||
self.expect(token![nasp])?;
|
||||
let namespace_name = self.expect(token![Ident])?;
|
||||
self.current_namespaces.push(namespace_name);
|
||||
self.expect(token![BraceOpen])?;
|
||||
|
||||
let mut declarations = vec![];
|
||||
while !self.expect_peek(token![BraceClose]) {
|
||||
declarations.append(&mut self.parse_next()?);
|
||||
}
|
||||
|
||||
self.expect(token![BraceClose])?;
|
||||
self.current_namespaces.pop();
|
||||
Ok(declarations)
|
||||
}
|
||||
|
||||
fn parse_function(&mut self) -> Result<Function, ParsingError> {
|
||||
self.expect(token![fn])?;
|
||||
let name = self.expect(token![Ident])?;
|
||||
self.expect(token![ParenOpen])?;
|
||||
let mut inputs = vec![];
|
||||
|
||||
while self.expect_peek(token![Ident]) {
|
||||
let input_name = self.expect(token![Ident])?;
|
||||
self.expect(token![Colon])?;
|
||||
let input_type = self.expect(token![Ident])?;
|
||||
inputs.push(FunctionInput {
|
||||
name: input_name,
|
||||
r#type: input_type,
|
||||
})
|
||||
}
|
||||
|
||||
self.expect(token![ParenClose])?;
|
||||
let mut output_type = None;
|
||||
if self.expect_peek(token![->]) {
|
||||
self.expect(token![->])?;
|
||||
output_type = Some(self.expect(token![Ident])?);
|
||||
}
|
||||
self.expect(token![;])?;
|
||||
Ok(Function {
|
||||
identifier: name,
|
||||
inputs,
|
||||
output: output_type,
|
||||
})
|
||||
}
|
||||
|
||||
/// Expect a token in the next input position:
|
||||
/// For example:
|
||||
///
|
||||
/// ```dont_run
|
||||
/// use trixy_lang_parser::{
|
||||
/// lexing::{Keyword, TokenKind, TokenStream},
|
||||
/// parsing::unchecked::Parser,
|
||||
/// token,
|
||||
/// };
|
||||
///
|
||||
/// # fn main() {
|
||||
/// let token_stream = TokenStream::lex("nasp {}").unwrap();
|
||||
/// let parser = Parser::new(token_stream);
|
||||
/// assert_eq!(parser.expect(token![nasp]).unwrap(), TokenKind::Keyword(Keyword::nasp));
|
||||
/// assert_eq!(parser.expect(token![BraceOpen]).unwrap(), TokenKind::BraceOpen);
|
||||
/// assert_eq!(parser.expect(token![BraceClose]).unwrap(), TokenKind::BraceClose);
|
||||
/// assert!(parser.expect(token![BraceClose]).is_err());
|
||||
/// # }
|
||||
/// ```
|
||||
///
|
||||
pub(super) fn expect(&mut self, token: TokenKind) -> Result<Token, ParsingError> {
|
||||
let actual_token = self.peek();
|
||||
if actual_token.kind().same_kind(&token) {
|
||||
Ok(self.pop())
|
||||
} else {
|
||||
let err = ParsingError::ExpectedDifferentToken {
|
||||
expected: token,
|
||||
actual: actual_token.kind().clone(),
|
||||
span: *actual_token.span(),
|
||||
};
|
||||
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the next token is of the specified TokenKind.
|
||||
/// Does not alter the token_stream
|
||||
fn expect_peek(&self, token: TokenKind) -> bool {
|
||||
let actual_token = self.peek();
|
||||
if actual_token.kind().same_kind(&token) {
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Looks at the next token without removing it
|
||||
fn peek(&self) -> &Token {
|
||||
self.token_stream.peek()
|
||||
}
|
||||
|
||||
/// Removes the next token
|
||||
fn pop(&mut self) -> Token {
|
||||
self.token_stream.pop()
|
||||
}
|
||||
}
|
Reference in New Issue