commit 233fa3e1b5aee153de2818aab474b8712c9b0d91 Author: Soispha Date: Sat Dec 23 16:07:17 2023 +0100 chore: Initial commit This not the "real" first commit, the others are in the trinitrix/core repository, made before the split-up took place diff --git a/.envrc b/.envrc new file mode 100644 index 0000000..96eca6b --- /dev/null +++ b/.envrc @@ -0,0 +1,7 @@ +use flake || use nix +watch_file flake.nix + +if on_git_branch; then + echo && git status --short --branch && + echo && git fetch --verbose +fi diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..39ffcc7 --- /dev/null +++ b/.gitignore @@ -0,0 +1,9 @@ +# build +/target +/result + +# direnv +.direnv + +# trixy is a library +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..af5db2d --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "trixy" +version = "0.1.0" +edition = "2021" + +[lib] +proc-macro = true + +[dependencies] +convert_case = "0.6.0" +proc-macro2 = "1.0.70" +quote = "1.0.33" +syn = { version = "2.0.41", features = ["extra-traits", "full", "parsing"] } diff --git a/flake.lock b/flake.lock new file mode 100644 index 0000000..5e94277 --- /dev/null +++ b/flake.lock @@ -0,0 +1,274 @@ +{ + "nodes": { + "crane": { + "inputs": { + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1703089493, + "narHash": "sha256-WUjYqUP/Lhhop9+aiHVFREgElunx1AHEWxqMT8ePfzo=", + "owner": "ipetkov", + "repo": "crane", + "rev": "2a5136f14a9ac93d9d370d64a36026c5de3ae8a4", + "type": "github" + }, + "original": { + "owner": "ipetkov", + "repo": "crane", + "type": "github" + } + }, + "ebnf2pdf": { + "inputs": { + "ebnf2ps": "ebnf2ps", + "flake-utils": [ + "flake-utils" + ], + "flake_version_update": "flake_version_update", + "nixpkgs": [ + "nixpkgs" + ], + "shell_library": "shell_library", + "systems": "systems" + }, + "locked": { + "lastModified": 1703259682, + "narHash": "sha256-Tfkl4aOXvmwPsAaYouR2Q/MRL5Ryr874S4ylxEIPnvg=", + "ref": "refs/heads/prime", + "rev": "c615b5892f682af7e613eae78a52ca9721755a75", + "revCount": 5, + "type": "git", + "url": "https://codeberg.org/soispha/ebnf2pdf.git" + }, + "original": { + "type": "git", + "url": "https://codeberg.org/soispha/ebnf2pdf.git" + } + }, + "ebnf2ps": { + "inputs": { + "flake-compat": "flake-compat", + "flake-utils": [ + "ebnf2pdf", + "flake-utils" + ], + "nixpkgs": [ + "ebnf2pdf", + "nixpkgs" + ], + "systems": [ + "ebnf2pdf", + "systems" + ] + }, + "locked": { + "lastModified": 1702739351, + "narHash": "sha256-A+rPYKLymnd3BP0ZxminOdz4ayV0C0xC7LI9a3Gu5aM=", + "ref": "refs/heads/master", + "rev": "50b96f52e7c0c1c9e774e1855790ef3a9f47c8b6", + "revCount": 15, + "type": "git", + "url": "https://codeberg.org/soispha/ebnf2ps.git" + }, + "original": { + "type": "git", + "url": "https://codeberg.org/soispha/ebnf2ps.git" + } + }, + "flake-compat": { + "flake": false, + "locked": { + "lastModified": 1696426674, + "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "flake-compat_2": { + "flake": false, + "locked": { + "lastModified": 1696426674, + "narHash": "sha256-kvjfFW7WAETZlt09AgDn1MrtKzP7t90Vf7vypd3OL1U=", + "owner": "edolstra", + "repo": "flake-compat", + "rev": "0f9255e01c2351cc7d116c072cb317785dd33b33", + "type": "github" + }, + "original": { + "owner": "edolstra", + "repo": "flake-compat", + "type": "github" + } + }, + "flake-utils": { + "inputs": { + "systems": "systems_2" + }, + "locked": { + "lastModified": 1701680307, + "narHash": "sha256-kAuep2h5ajznlPMD9rnQyffWG8EM/C73lejGofXvdM8=", + "owner": "numtide", + "repo": "flake-utils", + "rev": "4022d587cbbfd70fe950c1e2083a02621806a725", + "type": "github" + }, + "original": { + "owner": "numtide", + "repo": "flake-utils", + "type": "github" + } + }, + "flake_version_update": { + "inputs": { + "flake-utils": [ + "ebnf2pdf", + "flake-utils" + ], + "nixpkgs": [ + "ebnf2pdf", + "nixpkgs" + ], + "systems": [ + "ebnf2pdf", + "systems" + ] + }, + "locked": { + "lastModified": 1685288691, + "narHash": "sha256-oP6h34oJ8rm6KlUpyZrX+ww3hnoWny2ecrEXxkU7F3c=", + "ref": "refs/heads/prime", + "rev": "e9a97e01eca780bd16e1dbdbd8856b59558f4959", + "revCount": 5, + "type": "git", + "url": "https://codeberg.org/soispha/flake_version_update.git" + }, + "original": { + "type": "git", + "url": "https://codeberg.org/soispha/flake_version_update.git" + } + }, + "nixpkgs": { + "locked": { + "lastModified": 1703134684, + "narHash": "sha256-SQmng1EnBFLzS7WSRyPM9HgmZP2kLJcPAz+Ug/nug6o=", + "owner": "NixOS", + "repo": "nixpkgs", + "rev": "d6863cbcbbb80e71cecfc03356db1cda38919523", + "type": "github" + }, + "original": { + "owner": "NixOS", + "ref": "nixpkgs-unstable", + "repo": "nixpkgs", + "type": "github" + } + }, + "root": { + "inputs": { + "crane": "crane", + "ebnf2pdf": "ebnf2pdf", + "flake-compat": "flake-compat_2", + "flake-utils": "flake-utils", + "nixpkgs": "nixpkgs", + "rust-overlay": "rust-overlay" + } + }, + "rust-overlay": { + "inputs": { + "flake-utils": [ + "flake-utils" + ], + "nixpkgs": [ + "nixpkgs" + ] + }, + "locked": { + "lastModified": 1703297543, + "narHash": "sha256-d4QlzcoSNzb/iZheWtjOyRxoBSaLETSpViENGEkxv9g=", + "owner": "oxalica", + "repo": "rust-overlay", + "rev": "fc77c8b416b1537522d30c710baaaaebf769f816", + "type": "github" + }, + "original": { + "owner": "oxalica", + "repo": "rust-overlay", + "type": "github" + } + }, + "shell_library": { + "inputs": { + "flake-utils": [ + "ebnf2pdf", + "flake-utils" + ], + "flake_version_update": [ + "ebnf2pdf", + "flake_version_update" + ], + "nixpkgs": [ + "ebnf2pdf", + "nixpkgs" + ], + "systems": [ + "ebnf2pdf", + "systems" + ] + }, + "locked": { + "lastModified": 1702394821, + "narHash": "sha256-wdq1KUZPUbPAbPXPqrkSjx9POS9XhCgSjIecq6KN7JI=", + "ref": "refs/heads/prime", + "rev": "552419325081adaded55effa255d6f967cfc5245", + "revCount": 118, + "type": "git", + "url": "https://codeberg.org/soispha/shell_library.git" + }, + "original": { + "type": "git", + "url": "https://codeberg.org/soispha/shell_library.git" + } + }, + "systems": { + "locked": { + "lastModified": 1680978846, + "narHash": "sha256-Gtqg8b/v49BFDpDetjclCYXm8mAnTrUzR0JnE2nv5aw=", + "owner": "nix-systems", + "repo": "x86_64-linux", + "rev": "2ecfcac5e15790ba6ce360ceccddb15ad16d08a8", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "x86_64-linux", + "type": "github" + } + }, + "systems_2": { + "locked": { + "lastModified": 1681028828, + "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", + "owner": "nix-systems", + "repo": "default", + "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", + "type": "github" + }, + "original": { + "owner": "nix-systems", + "repo": "default", + "type": "github" + } + } + }, + "root": "root", + "version": 7 +} diff --git a/flake.nix b/flake.nix new file mode 100644 index 0000000..1542558 --- /dev/null +++ b/flake.nix @@ -0,0 +1,107 @@ +{ + description = "A rust crate used to generate multi-language apis for your + application"; + inputs = { + nixpkgs.url = "github:NixOS/nixpkgs/nixpkgs-unstable"; + + # inputs for following + flake-compat = { + url = "github:edolstra/flake-compat"; + flake = false; + }; + + crane = { + url = "github:ipetkov/crane"; + inputs = { + nixpkgs.follows = "nixpkgs"; + }; + }; + flake-utils = { + url = "github:numtide/flake-utils"; + inputs = {}; + }; + rust-overlay = { + url = "github:oxalica/rust-overlay"; + inputs = { + nixpkgs.follows = "nixpkgs"; + flake-utils.follows = "flake-utils"; + }; + }; + + ebnf2pdf = { + url = "git+https://codeberg.org/soispha/ebnf2pdf.git"; + inputs = { + nixpkgs.follows = "nixpkgs"; + flake-utils.follows = "flake-utils"; + }; + }; + }; + outputs = { + self, + nixpkgs, + crane, + flake-utils, + rust-overlay, + ... + }: + flake-utils.lib.eachDefaultSystem (system: let + pkgs = import nixpkgs { + inherit system; + overlays = [(import rust-overlay)]; + }; + + nightly = false; + rust = + if nightly + then + (pkgs.rust-bin.selectLatestNightlyWith (toolchain: + toolchain.default)) + .override { + extensions = ["rustc-codegen-cranelift-preview"]; + } + else pkgs.rust-bin.stable.latest.default; + rust_min = + if nightly + then + (pkgs.rust-bin.selectLatestNightlyWith (toolchain: + toolchain.minimal)) + .override { + extensions = ["rustc-codegen-cranelift-preview"]; + } + else pkgs.rust-bin.stable.latest.minimal; + + craneLib = (crane.mkLib pkgs).overrideToolchain rust_min; + + nativeBuildInputs = with pkgs; [ + pkg-config + ]; + buildInputs = with pkgs; [ + lua54Packages.lua + ]; + + craneBuild = craneLib.buildPackage { + src = craneLib.cleanCargoSource ./.; + + doCheck = true; + inherit nativeBuildInputs buildInputs; + }; + in { + checks.default = craneBuild; + packages = { + default = craneBuild; + }; + + devShells.default = pkgs.mkShell { + packages = with pkgs; [ + alejandra + + rust + cargo-edit + cargo-expand + + ebnf2pdf.outputs.packages."${system}".default + ]; + inherit nativeBuildInputs buildInputs; + }; + }); +} diff --git a/src/config/mod.rs b/src/config/mod.rs new file mode 100644 index 0000000..5899d6e --- /dev/null +++ b/src/config/mod.rs @@ -0,0 +1,103 @@ +//! This module is responsible for parsing the config passed to the macro call: +//! For example: +//! ```no_run +//! trixy_generate! { +//! path: ./trintrix_command_interface.tri +//! languages: rust, lua, c +//! } +//! ``` + +use std::path::PathBuf; + +use proc_macro2::Ident; +use syn::{parse::Parse, punctuated::Punctuated, LitStr, Result, Token}; + +mod kw { + syn::custom_keyword!(path); + syn::custom_keyword!(languages); +} + +#[derive(Debug)] +pub enum Language { + Rust, + Lua, + C, +} + +#[derive(Debug)] +struct Languages { + #[allow(dead_code)] + languages: kw::languages, + #[allow(dead_code)] + colon: Token![:], + raw: Punctuated, +} + +#[derive(Debug)] +struct Path { + #[allow(dead_code)] + path: kw::path, + #[allow(dead_code)] + colon: Token![:], + raw: PathBuf, +} + +#[derive(Debug)] +pub struct TrixyConfig { + /// The Path to the base command interface config file + path: Path, + + /// The languages the commands should be exposed in + languages: Languages, +} +impl TrixyConfig { + pub fn get_path(&self) -> PathBuf { + self.path.raw + } +} + +impl Parse for TrixyConfig { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + Ok(Self { + path: input.parse()?, + languages: input.parse()?, + }) + } +} + +impl Parse for Path { + fn parse(input: syn::parse::ParseStream) -> Result { + let path: kw::path = input.parse()?; + let colon: Token![:] = input.parse()?; + let raw = PathBuf::from(input.parse::()?.value()); + Ok(Self { path, colon, raw }) + } +} + +impl Parse for Languages { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let languages: kw::languages = input.parse()?; + let colon: Token![:] = input.parse()?; + let raw = Punctuated::::parse_separated_nonempty(input)?; + Ok(Self { + languages, + colon, + raw, + }) + } +} + +impl Parse for Language { + fn parse(input: syn::parse::ParseStream) -> Result { + let ident: Ident = input.parse()?; + match &ident.to_string()[..] { + "rust" | "Rust" => Ok(Self::Rust), + "lua" | "Lua" => Ok(Self::Lua), + "c" | "C" => Ok(Self::C), + other => Err(input.error(format!( + "The language: `{}` is not a registered language!", + other + ))), + } + } +} diff --git a/src/generate/command_enum/mod.rs b/src/generate/command_enum/mod.rs new file mode 100644 index 0000000..61f0b7b --- /dev/null +++ b/src/generate/command_enum/mod.rs @@ -0,0 +1,103 @@ +use convert_case::{Case, Casing}; +use proc_macro2::TokenStream as TokenStream2; +use quote::{format_ident, quote, ToTokens}; +use syn::{punctuated::Punctuated, Ident, Token, Type}; + +use crate::{DataCommandEnum, command_enum_parsing::Field}; + +use super::get_input_type_of_bare_fn_field; + +pub fn command_enum(input: &DataCommandEnum) -> TokenStream2 { + let (fields, namespace_enums): (TokenStream2, TokenStream2) = + turn_fields_to_enum(&input.fields); + + quote! { + #[derive(Debug)] + pub enum Command { + #fields + } + #namespace_enums + } +} + +fn turn_fields_to_enum(fields: &Punctuated) -> (TokenStream2, TokenStream2) { + let output: Vec<_> = fields + .iter() + .map(|field| turn_struct_field_to_enum(field)) + .collect(); + + let mut fields_output: TokenStream2 = Default::default(); + let mut namespace_enums_output: TokenStream2 = Default::default(); + + for (fields, namespace_enum) in output { + fields_output.extend(fields.to_token_stream()); + namespace_enums_output.extend(namespace_enum.to_token_stream()); + } + + (fields_output, namespace_enums_output) +} + +fn turn_struct_field_to_enum(field: &Field) -> (TokenStream2, TokenStream2) { + match field { + Field::Function(fun_field) => { + let field_name = format_ident!( + "{}", + fun_field + .name + .to_string() + .from_case(Case::Snake) + .to_case(Case::Pascal) + ); + + let input_type: Option = get_input_type_of_bare_fn_field(fun_field); + + match input_type { + Some(input_type) => ( + quote! { + #field_name(#input_type), + }, + quote! {}, + ), + None => ( + quote! { + #field_name, + }, + quote! {}, + ), + } + } + Field::Namespace(namespace) => { + let (namespace_output_fields, namespace_output_namespace_enums) = + turn_fields_to_enum(&namespace.fields); + let namespace_name: Ident = format_ident!( + "{}", + namespace + .path + .iter() + .map(|name| name.to_string()) + .collect::() + ); + + let new_namespace_name: Ident = format_ident!( + "{}", + namespace_name + .to_string() + .from_case(Case::Snake) + .to_case(Case::Pascal) + ); + + ( + quote! { + #new_namespace_name(#new_namespace_name), + }, + quote! { + #[derive(Debug)] + pub enum #new_namespace_name { + #namespace_output_fields + } + #namespace_output_namespace_enums + }, + ) + } + } +} diff --git a/src/generate/lua_wrapper/lua_functions_to_globals/mod.rs b/src/generate/lua_wrapper/lua_functions_to_globals/mod.rs new file mode 100644 index 0000000..124817a --- /dev/null +++ b/src/generate/lua_wrapper/lua_functions_to_globals/mod.rs @@ -0,0 +1,123 @@ +use proc_macro2::TokenStream as TokenStream2; +use quote::{format_ident, quote}; +use syn::{punctuated::Punctuated, Token}; + +use crate::{ + command_enum_parsing::{Field, NamespacePath, FunctionDeclaration}, + DataCommandEnum, +}; + +pub fn generate_add_lua_functions_to_globals(input: &DataCommandEnum) -> TokenStream2 { + fn turn_field_to_functions( + input: &Punctuated, + namespace_path: Option<&NamespacePath>, + ) -> TokenStream2 { + input + .iter() + .map(|field| match field { + Field::Function(function) => generate_function_adder(function, namespace_path), + Field::Namespace(namespace) => { + let mut passed_namespace = + namespace_path.unwrap_or(&Default::default()).clone(); + namespace + .path + .clone() + .into_iter() + .for_each(|val| passed_namespace.push(val)); + + turn_field_to_functions(&namespace.fields, Some(&passed_namespace)) + } + }) + .collect() + } + let function_adders: TokenStream2 = turn_field_to_functions(&input.fields, None); + + quote! { + pub fn add_lua_functions_to_globals( + lua: mlua::Lua, + tx: tokio::sync::mpsc::Sender, + ) -> mlua::Lua { + lua.set_app_data(tx); + let globals = lua.globals(); + + #function_adders + + drop(globals); + lua + } + } +} + +fn generate_function_adder( + field: &FunctionDeclaration, + namespace_path: Option<&NamespacePath>, +) -> TokenStream2 { + let field_ident = &field.name; + + let function_ident = format_ident!("wrapped_lua_function_{}", field_ident); + let function_name = field_ident.to_string(); + + let setter = if let Some(namespace_path) = namespace_path { + // ```lua + // local globals = { + // ns1: { + // ns_value, + // ns_value2, + // }, + // ns2: { + // ns_value3, + // } + // } + // ns1.ns_value + // ``` + let mut counter = 0; + let namespace_table_gen: TokenStream2 = namespace_path.iter().map(|path| { + let path = path.to_string(); + counter += 1; + let mut set_function: TokenStream2 = Default::default(); + if counter == namespace_path.len() { + set_function = quote! { + table.set(#function_name, #function_ident).expect( + "Setting a static global value should work" + ); + }; + } + quote! { + let table: mlua::Table = { + if table.contains_key(#path).expect("This check should work") { + let table2 = table.get(#path).expect("This was already checked"); + table2 + } else { + table.set(#path, lua.create_table().expect("This should also always work")).expect("Setting this value should work"); + table.get(#path).expect("This was set, just above") + } + }; + #set_function + } + }).collect(); + + quote! { + let table = &globals; + { + #namespace_table_gen + } + } + } else { + quote! { + globals.set(#function_name, #function_ident).expect( + "Setting a static global value should work" + ); + } + }; + quote! { + { + let #function_ident = lua.create_async_function(#field_ident).expect( + &format!( + "The function: `{}` should be defined", + #function_name + ) + ); + #setter + } + } +} diff --git a/src/generate/lua_wrapper/mod.rs b/src/generate/lua_wrapper/mod.rs new file mode 100644 index 0000000..ddbb235 --- /dev/null +++ b/src/generate/lua_wrapper/mod.rs @@ -0,0 +1,22 @@ +use proc_macro2::TokenStream as TokenStream2; +use quote::quote; + +use crate::{ + generate::lua_wrapper::{ + lua_functions_to_globals::generate_add_lua_functions_to_globals, + rust_wrapper_functions::generate_rust_wrapper_functions, + }, + DataCommandEnum, +}; + +mod lua_functions_to_globals; +mod rust_wrapper_functions; + +pub fn lua_wrapper(input: &DataCommandEnum) -> TokenStream2 { + let add_lua_functions_to_globals = generate_add_lua_functions_to_globals(input); + let rust_wrapper_functions = generate_rust_wrapper_functions(None, input); + quote! { + #add_lua_functions_to_globals + #rust_wrapper_functions + } +} diff --git a/src/generate/lua_wrapper/rust_wrapper_functions/mod.rs b/src/generate/lua_wrapper/rust_wrapper_functions/mod.rs new file mode 100644 index 0000000..c0dc052 --- /dev/null +++ b/src/generate/lua_wrapper/rust_wrapper_functions/mod.rs @@ -0,0 +1,231 @@ +use convert_case::{Case, Casing}; +use proc_macro2::TokenStream as TokenStream2; +use quote::quote; +use syn::{punctuated::Punctuated, token::Comma, GenericArgument, Lifetime, Token, Type}; + +use crate::{ + command_enum_parsing::{Field, FunctionDeclaration, NamespacePath}, + generate::{get_input_type_of_bare_fn_field, get_return_type_of_bare_fn_field}, + DataCommandEnum, +}; + +pub fn generate_rust_wrapper_functions( + namespace: Option<&NamespacePath>, + input: &DataCommandEnum, +) -> TokenStream2 { + generate_rust_wrapper_functions_rec(namespace, &input.fields) +} + +pub fn generate_rust_wrapper_functions_rec( + namespace: Option<&NamespacePath>, + input: &Punctuated, +) -> TokenStream2 { + let wrapped_functions: TokenStream2 = input + .iter() + .map(|field| match field { + Field::Function(fun_field) => { + wrap_lua_function(namespace.unwrap_or(&Default::default()), fun_field) + } + Field::Namespace(nasp) => { + let mut passed_namespace = namespace.unwrap_or(&Default::default()).clone(); + nasp.path + .clone() + .into_iter() + .for_each(|val| passed_namespace.push(val)); + generate_rust_wrapper_functions_rec(Some(&passed_namespace), &nasp.fields) + } + }) + .collect(); + + quote! { + #wrapped_functions + } +} + +fn wrap_lua_function(namespace: &NamespacePath, field: &FunctionDeclaration) -> TokenStream2 { + let input_type = get_input_type_of_bare_fn_field(field); + let return_type = get_return_type_of_bare_fn_field(field); + + let function_name = &field.name; + let function_body = get_function_body(&namespace, field, input_type.is_some(), &return_type); + + let lifetime_args = + get_and_add_lifetimes_form_inputs_and_outputs(input_type.clone(), return_type); + + let input_type = input_type + .unwrap_or(syn::parse(quote! {()}.into()).expect("This is static, it always works")); + + quote! { + async fn #function_name <#lifetime_args>( + lua: &mlua::Lua, + input: #input_type + ) -> Result { + #function_body + } + } +} + +fn get_and_add_lifetimes_form_inputs_and_outputs<'a>( + input_type: Option, + return_type: Option, +) -> Punctuated { + fn get_lifetime_args_from_type<'a>(return_type: syn::Type) -> Option> { + match return_type { + syn::Type::Path(path) => { + let args_to_final_path_segment = &path + .path + .segments + .last() + .expect("The path should have a last segment") + .arguments; + match args_to_final_path_segment { + syn::PathArguments::None => + /* We ignore this case */ + { + None + } + syn::PathArguments::AngleBracketed(angle) => { + let lifetime_args: Vec<_> = angle + .args + .iter() + .filter_map(|arg| { + if let GenericArgument::Lifetime(lifetime) = arg { + Some(lifetime.to_owned()) + } else { + None + } + }) + .collect(); + return Some(lifetime_args); + } + syn::PathArguments::Parenthesized(_) => todo!("Parenthesized Life time"), + } + } + syn::Type::Tuple(_) => { + // TODO(@soispha): I don't really know if tuples can have lifetimes, but let's just + // ignore them for now <2023-10-14> + dbg!("Ignoring tuple lifetime!"); + + None + } + non_path => todo!("Non path lifetime: {:#?}", non_path), + } + } + + let mut output: Punctuated = Punctuated::new(); + if let Some(input_type) = input_type { + let lifetime_args = get_lifetime_args_from_type(input_type).unwrap_or(vec![]); + lifetime_args.into_iter().for_each(|arg| output.push(arg)); + } + if let Some(return_type) = return_type { + let lifetime_args = get_lifetime_args_from_type(return_type).unwrap_or(vec![]); + lifetime_args.into_iter().for_each(|arg| output.push(arg)); + } + output +} + +fn get_function_body( + namespace: &NamespacePath, + field: &FunctionDeclaration, + has_input: bool, + output_type: &Option, +) -> TokenStream2 { + let command_name = field + .name + .to_string() + .from_case(Case::Snake) + .to_case(Case::Pascal); + + let command_ident = { + if has_input { + format!("{}(", command_name) + } else { + command_name.clone() + } + }; + + let command_namespace: String = { + namespace + .iter() + .map(|path| { + let path_enum_name: String = path + .to_string() + .from_case(Case::Snake) + .to_case(Case::Pascal); + + path_enum_name.clone() + "(" + &path_enum_name + "::" + }) + .collect::>() + .join("") + }; + + let send_output: TokenStream2 = { + let finishing_brackets = { + if has_input { + let mut output = "input.clone()".to_owned(); + output.push_str(&(0..namespace.len()).map(|_| ')').collect::()); + output + } else { + (0..namespace.len()).map(|_| ')').collect::() + } + }; + + ("Event::CommandEvent( Command::".to_owned() + + &command_namespace + + &command_ident + + &finishing_brackets + + {if has_input {")"} else {""}} /* Needed as command_name opens one */ + + ",Some(callback_tx))") + .parse() + .expect("This code should be valid") + }; + + let function_return = if let Some(_) = output_type { + quote! { + return Ok(output.into_lua(lua).expect("This conversion should always work")); + } + } else { + quote! { + return Ok(mlua::Value::Nil); + } + }; + let does_function_expect_output = if output_type.is_some() { + quote! { + // We didn't receive output but expected output. Raise an error to notify the lua code + // about it. + return Err(mlua::Error::ExternalError(std::sync::Arc::new( + err + ))); + } + } else { + quote! { + // We didn't receive output and didn't expect output. Everything went well! + return Ok(mlua::Value::Nil); + } + }; + + quote! { + let (callback_tx, callback_rx) = tokio::sync::oneshot::channel::(); + let tx: mlua::AppDataRef> = + lua.app_data_ref().expect("This should exist, it was set before"); + + (*tx) + .send(#send_output) + .await + .expect("This should work, as the receiver is not dropped"); + + cli_log::info!("Sent CommandEvent: `{}`", #command_name); + + match callback_rx.await { + Ok(output) => { + cli_log::info!( + "Lua function: `{}` returned output to lua: `{}`", #command_name, &output + ); + #function_return + }, + Err(err) => { + #does_function_expect_output + } + }; + } +} diff --git a/src/generate/mod.rs b/src/generate/mod.rs new file mode 100644 index 0000000..103b3f1 --- /dev/null +++ b/src/generate/mod.rs @@ -0,0 +1,56 @@ +mod command_enum; +mod lua_wrapper; + +pub use command_enum::command_enum; +pub use lua_wrapper::lua_wrapper; +use syn::{ReturnType, Type, TypeBareFn}; + +use crate::command_enum_parsing::FunctionDeclaration; + +pub fn get_bare_fn_input_type(function: &TypeBareFn) -> Option { + if function.inputs.len() == 1 { + Some( + function + .inputs + .first() + .expect("Only one element exists, we checked the length above") + .ty + .clone(), + ) + } else if function.inputs.len() == 0 { + // No inputs, so we can't return a type + None + } else { + unreachable!( + "The Function can only take one or zero arguments. + Use a tuple `(arg1, arg2)` if you want more" + ); + } +} + +pub fn get_input_type_of_bare_fn_field(field: &FunctionDeclaration) -> Option { + match &field.ty { + syn::Type::BareFn(function) => get_bare_fn_input_type(&function), + _ => unimplemented!( + "Please specify the type as a bare fn type. + That is: `fn() -> `" + ), + } +} +pub fn get_return_type_of_bare_fn_field(field: &FunctionDeclaration) -> Option { + match &field.ty { + syn::Type::BareFn(function) => get_bare_fn_return_type(&function), + _ => unimplemented!( + "Please specify the type as a bare fn type. + That is: `fn() -> `" + ), + } +} + +pub fn get_bare_fn_return_type(function: &TypeBareFn) -> Option { + let return_path: &ReturnType = &function.output; + match return_path { + ReturnType::Default => None, + ReturnType::Type(_, return_type) => Some(*return_type.to_owned()), + } +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..21aa639 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,97 @@ +use command_enum_parsing::DataCommandEnum; +use config::TrixyConfig; +use proc_macro::TokenStream; +use proc_macro2::TokenStream as TokenStream2; +use quote::quote; +use syn::parse_macro_input; + +use crate::trixy_lang::parse_trixy_lang; + +mod command_enum_parsing; +mod config; +mod generate; +mod trixy_lang; + +/// This is the heart of the command api +/// It mainly does two things: +/// - Generate a command enum +/// - Wrap the enum in all supported languages (only lua for now) +/// - Generate wrapper lua function for each command +/// - Generate a `add_lua_functions_to_globals` function, which adds +/// the rust wrapper functions to the lua globals. +/// +/// The input and output values of the wrapped functions are derived from the values specified in +/// the input to the `parse_command_enum` proc macro. +/// +/// For example this rust code: +/// ```no_run +/// parse_command_enum! { +/// /// Greets the user +/// greet: fn(String) -> String, +/// } +/// ``` +/// results in this expanded code: +/// ```no_run +/// #[derive(Debug)] +/// pub enum Command { +/// Greet(String), +/// } +/// pub fn add_lua_functions_to_globals( +/// lua: mlua::Lua, +/// tx: tokio::sync::mpsc::Sender, +/// ) -> mlua::Lua { +/// lua.set_app_data(tx); +/// let globals = lua.globals(); +/// { +/// let wrapped_lua_function_greet = lua +/// .create_async_function(greet) +/// .expect( +/// format!( +/// "The function: `{}` should be defined", +/// "greet", +/// ) +/// ); +/// globals +/// .set("greet", wrapped_lua_function_greet) +/// .expect("Setting a static global value should work"); +/// } +/// drop(globals); +/// lua +/// } +/// async fn greet(lua: &mlua::Lua, input: String) -> Result { +/// let (callback_tx, callback_rx) = tokio::sync::oneshot::channel::(); +/// let tx: core::cell::Ref> = lua +/// .app_data_ref() +/// .expect("This should exist, it was set before"); +/// (*tx) +/// .send(Event::CommandEvent(Command::Greet(input.clone()), Some(callback_tx))) +/// .await +/// .expect("This should work, as the receiver is not dropped"); +/// match callback_rx.await { +/// Ok(output) => { +/// return Ok(output); +/// } +/// Err(err) => { +/// return Err(mlua::Error::ExternalError(std::sync::Arc::new(err))); +/// } +/// }; +/// } +/// ``` +#[proc_macro] +pub fn trixy_generate(input: TokenStream) -> TokenStream { + let input = parse_macro_input!(input as TrixyConfig); + + let trixy_code = parse_trixy_lang(input.get_path()); + todo!() + // // Build the language wrappers + // let lua_wrapper: TokenStream2 = generate::lua_wrapper(&input); + // + // // Build the final enum + // let command_enum = generate::command_enum(&input); + + // let output = quote! { + // #command_enum + // #lua_wrapper + // }; + // output.into() +} diff --git a/trixy-lang_parser/.gitignore b/trixy-lang_parser/.gitignore new file mode 100644 index 0000000..20c0ba9 --- /dev/null +++ b/trixy-lang_parser/.gitignore @@ -0,0 +1,6 @@ +# build +/target +/result + +# This crate is a library +Cargo.lock diff --git a/trixy-lang_parser/Cargo.toml b/trixy-lang_parser/Cargo.toml new file mode 100644 index 0000000..500ee94 --- /dev/null +++ b/trixy-lang_parser/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "trixy-lang_parser" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +clap = { version = "4.4.11", features = ["derive"] } +pretty_assertions = "1.4.0" +thiserror = "1.0.50" diff --git a/trixy-lang_parser/README.md b/trixy-lang_parser/README.md new file mode 100644 index 0000000..0a6f6bb --- /dev/null +++ b/trixy-lang_parser/README.md @@ -0,0 +1,6 @@ +# trixy-lang_parser +This crate contains a parser (and lexer) for the Trixy language. +The corresponding grammar is in the grammar file [here](./docs/grammar.ebnf) encoded in [Extended Backus-Naur Form](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form). + +## Docs +Run `./generate_docs` to turn the grammar file into railroad diagrams. diff --git a/trixy-lang_parser/docs/grammar.ebnf b/trixy-lang_parser/docs/grammar.ebnf new file mode 100644 index 0000000..d495fc3 --- /dev/null +++ b/trixy-lang_parser/docs/grammar.ebnf @@ -0,0 +1,32 @@ +# (* +# Trixy is fully whitespace independent, this means that you can +# interleave whitespace in the definitions. +# The same applies to comments: +# - Line comments (`// \n`) and +# - Block comments (`/* */`). +# *) + +CommandSpec = {Function | Namespace | Enumeration | Structure } ; + +Function = {DocComment} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ; +Namespace = {DocComment} "nasp" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ; +Structure = {DocComment} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}" ";"; +Enumeration = {DocComment} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}" ";"; + +Type = Identifier ["<" Type {"," Type} ">"]; + +Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ; +DocIdentifier = {DocComment} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ; + +NamedType = Identifier ":" Type; +DocNamedType = {DocComment} Identifier ":" Type; + + +DocComment = "///" {ANYTHING} LineEnding; + +Comment = "//" [ NOT ("/" {ANYTHING} LineEnding) | "//"] {ANYTHING} LineEnding; +LineEnding = "\\n" | "\\r" | "\\r\\n"; + +# (* +# vim: ft=ebnf +# *) diff --git a/trixy-lang_parser/docs/grammar.pdf b/trixy-lang_parser/docs/grammar.pdf new file mode 100644 index 0000000..716a39f Binary files /dev/null and b/trixy-lang_parser/docs/grammar.pdf differ diff --git a/trixy-lang_parser/example/comments.tri b/trixy-lang_parser/example/comments.tri new file mode 100644 index 0000000..597996a --- /dev/null +++ b/trixy-lang_parser/example/comments.tri @@ -0,0 +1,12 @@ +fn print(message: String); + +/// First doc comment +// Some more text +nasp trinitrix { + /// Second doc comment + fn hi(name: String) -> String; +} + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy-lang_parser/example/empty.tri b/trixy-lang_parser/example/empty.tri new file mode 100644 index 0000000..e69de29 diff --git a/trixy-lang_parser/example/failing.tri b/trixy-lang_parser/example/failing.tri new file mode 100644 index 0000000..7227248 --- /dev/null +++ b/trixy-lang_parser/example/failing.tri @@ -0,0 +1,9 @@ +fn print(message: CommandTransferValue); + +nasp trinitrix { {} + fn hi honner(name: String) -> String; ; +} + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy-lang_parser/example/failing_comments.tri b/trixy-lang_parser/example/failing_comments.tri new file mode 100644 index 0000000..7aa985b --- /dev/null +++ b/trixy-lang_parser/example/failing_comments.tri @@ -0,0 +1,13 @@ +fn print(message: CommandTransferValue); + +/// Some doc comment +// Some more text +nasp trinitrix { + fn hi(name: String) -> String; +} + +/// Trailing doc comment (I will fail) + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy-lang_parser/example/failing_types.tri b/trixy-lang_parser/example/failing_types.tri new file mode 100644 index 0000000..8e5ed74 --- /dev/null +++ b/trixy-lang_parser/example/failing_types.tri @@ -0,0 +1,10 @@ +struct Callback { + func: Function, + timeout: Integer, +}; + +fn execute_callback(callback: Name); + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy-lang_parser/example/full.tri b/trixy-lang_parser/example/full.tri new file mode 100644 index 0000000..9b2f065 --- /dev/null +++ b/trixy-lang_parser/example/full.tri @@ -0,0 +1,136 @@ +/// Prints to the output, with a newline. +// HACK(@soispha): The stdlib Lua `print()` function has stdout as output hardcoded, +// redirecting stdout seems too much like a hack thus we are just redefining the print function +// to output to a controlled output. <2023-09-09> +//fn print(input: CommandTransferValue); + +nasp trinitrix { + /// Language specific functions, which mirror the `trinitrix.api` namespace. + /// That is, if you have to choose between a `std` and a `api` function choose the `std` + /// one as it will most likely be more high-level and easier to use (as it isn't abstracted + /// over multiple languages). Feel free to drop down to the lower level api, if you feel + /// like that more, it should be as stable and user-oriented as the `std` functions + nasp std {} + + /// Debug only functions, these are effectively useless + nasp debug { + enum UserGreet { + Friendly, + Angrily, + Hastly + }; + struct GreetedUser { + names: Vec, + new: GreetedUser, + state: UserGreet + }; + /// Greets the user + fn greet(input: String) -> String; + + /// Returns a table of greeted users + fn greet_multiple() -> GreetedUser; + } + + /// General API to change stuff in Trinitrix + nasp api { + /// Closes the application + fn exit(); + + /// Send a message to the current room + /// The send message is interpreted literally. + fn room_message_send(msg: String); + + /// Open the help pages at the first occurrence of + /// the input string if it is Some, otherwise open + /// the help pages at the start + fn help(input: Option); + + // Register a function to be used with the Trinitrix API + // (This function is actually implemented in the std namespace) + /* fn register_function(function: RawFunction); */ + + /// Function that change the UI, or UI state + nasp ui { + /// Shows the command line + fn command_line_show(); + + /// Hides the command line + fn command_line_hide(); + + /// Go to the next plane + fn cycle_planes(); + /// Go to the previous plane + fn cycle_planes_rev(); + + /// Sets the current app mode to Normal / navigation mode + fn set_mode_normal(); + /// Sets the current app mode to Insert / editing mode + fn set_mode_insert(); + } + + /// Manipulate keymappings, the mode is specified as a String build up of all mode + /// the keymapping should be active in. The mapping works as follows: + /// n => normal Mode + /// c => command Mode + /// i => insert Mode + /// + /// The key works in a similar matter, specifying the required keypresses to trigger the + /// callback. For example "aba" for require the user to press "a" then "b" then "a" again + /// to trigger the mapping. Special characters are encoded as follows: + /// "ba" => "Ctrl+a" then "b" then "a" + /// "" => "A" or "Shift+a" + /// "A" => "A" + /// " " => "Alt+a" () or "Meta+a"() (most terminals can't really differentiate between these characters) + /// "a" => "a" then "Ctrl+b" then "Ctrl+a" (also works for Shift, Alt and Super) + /// "" => "Ctrl+Shift+Alt+b" (the ordering doesn't matter) + /// "a " => "a" then a literal space (" ") + /// "å🙂" => "å" then "🙂" (full Unicode support!) + /// "" => escape key + /// "" => F3 key + /// "" => backspace key (and so forth) + /// "" => a literal "-" + /// "" or "" => a literal "<" + /// "" or "" => a literal ">" + /// + /// The callback MUST be registered first by calling + /// `trinitrix.api.register_function()` the returned value can than be used to + /// set the keymap. + nasp keymaps { + /// Add a new keymapping + fn add(mode: String, key: String, callback: Function); + + /// Remove a keymapping + /// + /// Does nothing, if the keymapping doesn't exists + fn remove(mode: String, key: String); + + /// List declared keymappings + fn get(mode: String); + } + + /// Functions only used internally within Trinitrix + nasp raw { + /// Send an error to the default error output + fn raise_error(input: String); + + /// Send output to the default output + /// This is mainly used to display the final + /// output of evaluated lua commands. + fn display_output(input: String); + + /// Input a character without checking for possible keymaps + /// If the current state does not expect input, this character is ignored + /// The encoding is the same as in the `trinitrix.api.keymaps` commands + fn send_input_unprocessed(input: String); + + /// This namespace is used to store some command specific data (like functions, as + /// ensuring memory locations stay allocated in garbage collected language is hard) + /// + /// Treat it as an implementation detail + nasp __private {} + } + } +} + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy-lang_parser/example/multiple.tri b/trixy-lang_parser/example/multiple.tri new file mode 100644 index 0000000..0fca007 --- /dev/null +++ b/trixy-lang_parser/example/multiple.tri @@ -0,0 +1,13 @@ +fn print(message: CommandTransferValue); + +nasp trinitrix { + fn hi(name: String) -> String; +} + +nasp trinitrix { + fn ho(name: String, name2: String) -> String; +} + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy-lang_parser/example/simple.tri b/trixy-lang_parser/example/simple.tri new file mode 100644 index 0000000..c9b5c9a --- /dev/null +++ b/trixy-lang_parser/example/simple.tri @@ -0,0 +1,9 @@ +fn print(message: CommandTransferValue); + +nasp trinitrix { + fn hi(name: String) -> String; +} + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy-lang_parser/example/types.tri b/trixy-lang_parser/example/types.tri new file mode 100644 index 0000000..b599445 --- /dev/null +++ b/trixy-lang_parser/example/types.tri @@ -0,0 +1,18 @@ +nasp trinitrix { + struct Callback { + func: Function, + timeout: Integer, + }; + + enum CallbackPriority { + High, + Medium, + Low, + }; + + fn execute_callback(callback: Callback, priority: CallbackPriority); +} + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy-lang_parser/generate_docs b/trixy-lang_parser/generate_docs new file mode 100755 index 0000000..f84a636 --- /dev/null +++ b/trixy-lang_parser/generate_docs @@ -0,0 +1,9 @@ +#!/usr/bin/env sh + + + +ebnf2pdf make "./docs/grammar.ebnf" +mv grammar.ebnf.pdf ./docs/grammar.pdf + + +# vim: ft=sh diff --git a/trixy-lang_parser/src/command_spec/checked.rs b/trixy-lang_parser/src/command_spec/checked.rs new file mode 100644 index 0000000..30d0eda --- /dev/null +++ b/trixy-lang_parser/src/command_spec/checked.rs @@ -0,0 +1,154 @@ +//! This module contains the already type checked types. + +use std::fmt::Display; + +use crate::lexing::TokenKind; + +use super::unchecked; + +/// These are the "primitive" types used in trixy, you can use any of them to create new structures +pub const BASE_TYPES: [ConstIdentifier; 8] = [ + Identifier::from("Integer"), + Identifier::from("Float"), + Identifier::from("Decimal"), + Identifier::from("String"), + Identifier::from("Function"), + Identifier::from("Option"), + Identifier::from("Result"), + Identifier::from("Vec"), +]; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Namespace { + pub name: Identifier, + + pub functions: Vec, + pub structures: Vec, + pub enumerations: Vec, + pub namespaces: Vec, + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct CommandSpec { + pub structures: Vec, + pub enumerations: Vec, + pub functions: Vec, + pub namespaces: Vec, +} + +impl From for CommandSpec { + fn from(value: Namespace) -> Self { + Self { + structures: value.structures, + enumerations: value.enumerations, + functions: value.functions, + namespaces: value.namespaces, + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Structure { + pub identifier: Identifier, + pub contents: Vec, + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Enumeration { + pub identifier: Identifier, + pub states: Vec, + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Function { + pub identifier: Identifier, + pub inputs: Vec, + pub output: Option, + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Type { + pub identifier: Identifier, + pub generic_args: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct NamedType { + pub name: Identifier, + pub r#type: Type, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct DocNamedType { + pub name: Identifier, + pub r#type: Type, + pub attributes: Vec, +} + +impl From for Identifier { + fn from(value: TokenKind) -> Self { + match value { + TokenKind::Identifier(ident) => Identifier { name: ident }, + _ => { + panic!( + "Tried to convert a non Identifier TokenKind to a Identefier. This is a bug + Token was: '{}' + ", + value + ) + } + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub enum Attribute { + #[allow(non_camel_case_types)] + doc(String), +} +impl From for Attribute { + fn from(value: unchecked::Attribute) -> Self { + match value { + unchecked::Attribute::doc { content: name, .. } => Self::doc(name), + } + } +} + +/// An Identifier +/// These include +/// - Variable names +/// - Function names +/// - Namespace names +/// - Type names +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct Identifier { + pub name: String, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct DocIdentifier { + pub name: String, + pub attributes: Vec, +} + +/// A const version of [Identifier] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct ConstIdentifier { + pub name: &'static str, +} + +impl Display for Identifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.name) + } +} + +impl Identifier { + const fn from(value: &'static str) -> ConstIdentifier { + ConstIdentifier { name: value } + } +} diff --git a/trixy-lang_parser/src/command_spec/mod.rs b/trixy-lang_parser/src/command_spec/mod.rs new file mode 100644 index 0000000..1bf868c --- /dev/null +++ b/trixy-lang_parser/src/command_spec/mod.rs @@ -0,0 +1,2 @@ +pub mod checked; +pub mod unchecked; diff --git a/trixy-lang_parser/src/command_spec/unchecked.rs b/trixy-lang_parser/src/command_spec/unchecked.rs new file mode 100644 index 0000000..5757ee0 --- /dev/null +++ b/trixy-lang_parser/src/command_spec/unchecked.rs @@ -0,0 +1,127 @@ +//! This module contains the not type checked types. +//! These are generated on the first pass of the parser, to be later converted into the checked +//! ones. + +use std::fmt::{Display, Write}; + +use crate::lexing::{Token, TokenSpan}; + +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord)] +pub struct CommandSpec { + pub structures: Vec, + pub enumerations: Vec, + pub functions: Vec, + pub namespaces: Vec, +} + +impl From for Namespace { + fn from(value: CommandSpec) -> Self { + Self { + name: Token::get_dummy(), + functions: value.functions, + structures: value.structures, + enumerations: value.enumerations, + namespaces: value.namespaces, + attributes: vec![], + } + } +} + +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct Namespace { + pub name: Token, // Will later become an Identifier + + pub functions: Vec, + pub structures: Vec, + pub enumerations: Vec, + pub namespaces: Vec, + + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub enum Declaration { + Function(Function), + Structure(Structure), + Enumeration(Enumeration), + Namespace(Namespace), +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub enum Attribute { + #[allow(non_camel_case_types)] + doc { content: String, span: TokenSpan }, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct Function { + pub identifier: Token, // Will later become an Identifier + pub inputs: Vec, + pub output: Option, + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct Structure { + pub identifier: Token, // Will later become an Identifier + pub contents: Vec, + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct Enumeration { + pub identifier: Token, // Will later become an Identifier + pub states: Vec, // Will later become an Identifier + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct DocToken { + pub token: Token, + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct DocNamedType { + pub name: Token, // Will later become an Identifier + pub r#type: Type, + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct NamedType { + pub name: Token, // Will later become an Identifier + pub r#type: Type, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct Type { + pub identifier: Token, // Will later become an Identifier + pub generic_args: Vec, +} + +impl Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let ident = match self.identifier.kind() { + crate::lexing::TokenKind::Identifier(ident) => ident, + _ => panic!("Tried to display a non identifier token in the Type display implementation. This is a bug"), + }; + + f.write_str(ident)?; + if !self.generic_args.is_empty() { + f.write_char('<')?; + let mut first_run = true; + for arg in &self.generic_args { + if !first_run { + f.write_str(", ")?; + } else { + first_run = false; + } + write!(f, "{}", arg)?; + } + f.write_char('>') + } else { + f.write_str("") + } + } +} diff --git a/trixy-lang_parser/src/error.rs b/trixy-lang_parser/src/error.rs new file mode 100644 index 0000000..a98f518 --- /dev/null +++ b/trixy-lang_parser/src/error.rs @@ -0,0 +1,194 @@ +use core::fmt; + +use thiserror::Error; + +use crate::{ + lexing::{error::SpannedLexingError, TokenSpan}, + parsing::checked::error::SpannedParsingError, +}; + +#[derive(Error, Debug)] +pub enum TrixyError { + #[error(transparent)] + Lexing(#[from] SpannedLexingError), + #[error(transparent)] + Parsing(#[from] SpannedParsingError), +} + +/// The context of an Error. +#[derive(Debug, Clone)] +pub struct ErrorContext { + /// The span of the error in the source file + pub span: TokenSpan, + /// The span of the error in the context line relative to the context line + pub contexted_span: TokenSpan, + /// The line above the error + pub line_above: String, + /// The line below the error + pub line_below: String, + /// The line in which the error occurred + pub line: String, + /// The line number of the main error line + pub line_number: usize, +} + +impl ErrorContext { + pub fn from_span(span: TokenSpan, original_file: &str) -> Self { + let line_number = original_file + .chars() + .take(span.start) + .filter(|a| a == &'\n') + .count() + // This is here, as we are missing one newline with the method above + + 1; + + let lines: Vec<_> = original_file.lines().collect(); + + let line = (*lines + .get(line_number - 1) + .expect("This should work, as have *at least* one (index = 0) line")) + .to_owned(); + + let contexted_span = { + let matched_line: Vec<_> = original_file.match_indices(&line).collect(); + let (index, matched_line) = matched_line.first().expect("This first index should always match, as we took the line from the string in the first place"); + debug_assert_eq!(matched_line, &&line); + TokenSpan { + start: span.start - index, + end: span.end - index, + } + }; + + let line_above = if line_number == 1 { + // We only have one line, so no line above + "".to_owned() + } else { + (*lines + .get((line_number - 1) - 1) + .expect("We checked that this should work")) + .to_owned() + }; + + let line_below = if lines.len() - 1 > line_number { + // We have a line after the current line + (*lines + .get((line_number + 1) - 1) + .expect("We checked that this should work")) + .to_owned() + } else { + "".to_owned() + }; + + Self { + span, + contexted_span, + line_above, + line_below, + line, + line_number, + } + } + + pub fn from_index(start: usize, orginal_file: &str) -> Self { + let span = TokenSpan { + start, + end: start + 1, + }; + Self::from_span(span, orginal_file) + } + + pub fn get_error_line(&self, source_error: &str) -> String { + // deconstruct the structure + let ErrorContext { + contexted_span, + line_number, + .. + } = self; + + let mut output = String::new(); + output.push_str("\x1b[92;1m"); + + // pad to accommodate the line number printing. + // 32 -> needs two spaces padding to print it + line_number.to_string().chars().for_each(|_| { + output.push(' '); + }); + + // pad to the beginning of the error + for _ in 0..contexted_span.start { + output.push(' '); + } + + // push the error markers + for _ in contexted_span.start..contexted_span.end { + output.push('^'); + } + + // // pad until end of line + // for _ in contexted_span.end..(line.len() - 1) { + // output.push('-'); + // } + // + // additional space to avoid having to end with a '-' + output.push(' '); + + output.push_str("help: "); + + output.push_str(source_error); + output.push_str("\x1b[0m"); + output + } +} + +pub trait AdditionalHelp { + fn additional_help(&self) -> String; +} + +pub trait ErrorContextDisplay: fmt::Display { + type Error; + + fn error_fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result + where + ::Error: std::fmt::Display + AdditionalHelp, + { + let error_line = self + .context() + .get_error_line(&self.source().additional_help()); + + writeln!(f, "\x1b[31;1merror: \x1b[37;1m{}\x1b[0m", self.source())?; + + if !self.line_above().is_empty() { + writeln!( + f, + "\x1b[32;1m{} |\x1b[0m {}", + self.line_number() - 1, + self.line_above() + )?; + } + writeln!( + f, + "\x1b[36;1m{} |\x1b[0m {}", + self.line_number(), + self.line() + )?; + writeln!(f, " {}", error_line)?; + if !self.line_below().is_empty() { + writeln!( + f, + "\x1b[32;1m{} |\x1b[0m {}", + self.line_number() + 1, + self.line_below() + ) + } else { + write!(f, "") + } + } + + // getters + fn context(&self) -> &ErrorContext; + fn source(&self) -> &Self::Error; + fn line_number(&self) -> usize; + fn line_above(&self) -> &str; + fn line_below(&self) -> &str; + fn line(&self) -> &str; +} diff --git a/trixy-lang_parser/src/lexing/error.rs b/trixy-lang_parser/src/lexing/error.rs new file mode 100644 index 0000000..ecaf92e --- /dev/null +++ b/trixy-lang_parser/src/lexing/error.rs @@ -0,0 +1,77 @@ +use std::{error::Error, fmt::Display}; +use thiserror::Error; + +use crate::error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}; + +#[derive(Error, Debug)] +pub enum LexingError { + #[error("No matches were found")] + NoMatchesTaken, + #[error("Expected an token, but reached end of file")] + UnexpectedEOF, + #[error("Char ('{0}') is not a know token!")] + UnknownCharacter(char), + #[error("The Arrow token must be of the form: ->")] + ExpectedArrow, + #[error("The Comment token must start with two slashes")] + ExpectedComment, +} + +impl AdditionalHelp for LexingError { + fn additional_help(&self) -> String { + match self { + LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(), + LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(), + LexingError::ExpectedArrow => "The `-` token is interpretet as a started arrow (`->`), but we could not find the arrow tip (`>`)".to_owned(), + LexingError::UnknownCharacter(char) => { + format!("This char: `{char}`; is not a valid token") + }, + LexingError::ExpectedComment => "The '/' started comment parsing, but I could not find a matching '/'".to_owned(), + } + } +} + +#[derive(Debug)] +pub struct SpannedLexingError { + pub source: LexingError, + pub context: ErrorContext, +} + +impl Error for SpannedLexingError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(&self.source) + } +} + +impl ErrorContextDisplay for SpannedLexingError { + type Error = LexingError; + + fn context(&self) -> &crate::error::ErrorContext { + &self.context + } + + fn line_number(&self) -> usize { + self.context.line_number + } + + fn line_above(&self) -> &str { + &self.context.line_above + } + + fn line_below(&self) -> &str { + &self.context.line_below + } + + fn line(&self) -> &str { + &self.context.line + } + + fn source(&self) -> &::Error { + &self.source + } +} +impl Display for SpannedLexingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.error_fmt(f) + } +} diff --git a/trixy-lang_parser/src/lexing/mod.rs b/trixy-lang_parser/src/lexing/mod.rs new file mode 100644 index 0000000..77fd918 --- /dev/null +++ b/trixy-lang_parser/src/lexing/mod.rs @@ -0,0 +1,307 @@ +use std::fmt::Display; + +use self::{error::SpannedLexingError, tokenizer::Tokenizer}; + +pub mod error; +mod tokenizer; + +#[cfg(test)] +mod test; + +#[derive(Debug, PartialEq, PartialOrd, Ord, Eq)] +pub struct TokenStream { + pub original_file: String, + tokens: Vec, +} + +impl TokenStream { + /// Turn a string of valid Trixy code into a list of tokens, including the + /// location of that token's start and end point in the original source code. + /// + /// Note the token indices represent the half-open interval `[start, end)`, + /// equivalent to `start .. end` in Rust. + pub fn lex(src: &str) -> Result { + let mut tokenizer = Tokenizer::new(src); + let mut tokens = Vec::new(); + + while let Some(tok) = tokenizer.next_token()? { + tokens.push(tok); + } + + // filter out comments + let tokens = tokens + .into_iter() + .filter(|token| !matches!(token.kind, TokenKind::Comment(_))) + .collect(); + + Ok(Self { + tokens, + original_file: src.to_owned(), + }) + } + + /// Get a token by index + pub fn get(&self, index: usize) -> Option<&Token> { + self.tokens.get(index) + } + + /// Get a reference to the uppermost token, without modifying the token list + pub fn peek(&self) -> Option<&Token> { + self.tokens.last() + } + + /// Remove to the uppermost token + pub fn pop(&mut self) -> Token { + self.tokens.pop().expect("This should not be emtpy") + } + + /// Reverses the underlying tokes vector + /// This is facilitates using the pop and peek methods to parse the tokens from the beginning, + /// not the end + pub fn reverse(&mut self) { + self.tokens.reverse() + } + + /// Check if the TokenStream is empty. + pub fn is_empty(&self) -> bool { + self.tokens.is_empty() + } +} + +/// A token span is recorded in chars starting from the beginning of the file: +/// A token span like this, for example: +/// ```dont_run +///# use trixy_lang_parser::lexing::TokenSpan; +/// TokenSpan { +/// start: 20, +/// end: 23, +/// } +/// ``` +/// signals, that the token starts at the 20th char in the source file and ends on the 23rd. +#[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] +pub struct TokenSpan { + /// The start of the token span + pub start: usize, + /// The end of the token span + pub end: usize, +} + +impl TokenSpan { + pub fn from_range(start: TokenSpan, end: TokenSpan) -> Self { + Self { + start: start.start, + end: end.end, + } + } +} + +/// A Token +#[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone)] +pub struct Token { + /// The token's original location in the source file + pub span: TokenSpan, + pub kind: TokenKind, +} + +impl Token { + /// Return the TokenKind of a token + pub fn kind(&self) -> &TokenKind { + &self.kind + } + + /// Return the TokenSpan of a token + pub fn span(&self) -> &TokenSpan { + &self.span + } + + /// Get a dummy token, this is intended for error handling + pub fn get_dummy() -> Token { + Self { + span: TokenSpan { start: 0, end: 0 }, + kind: TokenKind::Dummy, + } + } +} + +/// Possibly kinds of tokens +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub enum TokenKind { + Keyword(Keyword), + Identifier(String), + Colon, + Semicolon, + Comma, + Arrow, + BraceOpen, + BraceClose, + ParenOpen, + ParenClose, + SquareOpen, + SquareClose, + + DocComment(String), + Comment(String), + + /// This is not a real TokenKind, but only used for error handling + #[default] + Dummy, +} + +impl TokenKind { + pub fn same_kind(&self, other: &TokenKind) -> bool { + if let TokenKind::Identifier(_) = self { + if let TokenKind::Identifier(_) = other { + return true; + } + } + if let TokenKind::Comment(_) = self { + if let TokenKind::Comment(_) = other { + return true; + } + } + if let TokenKind::DocComment(_) = self { + if let TokenKind::DocComment(_) = other { + return true; + } + } + self == other + } +} + +impl Display for TokenKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + TokenKind::Keyword(word) => write!(f, "KEYWORD({})", word), + TokenKind::Identifier(ident) => { + if ident.is_empty() { + write!(f, "IDENTIFIER") + } else { + write!(f, "IDENTIFIER({})", ident) + } + } + TokenKind::Colon => f.write_str("COLON"), + TokenKind::Semicolon => f.write_str("SEMICOLON"), + TokenKind::Comma => f.write_str("COMMA"), + TokenKind::Arrow => f.write_str("ARROW"), + TokenKind::BraceOpen => f.write_str("BRACEOPEN"), + TokenKind::BraceClose => f.write_str("BRACECLOSE"), + TokenKind::ParenOpen => f.write_str("PARENOPEN"), + TokenKind::ParenClose => f.write_str("PARENCLOSE"), + TokenKind::Dummy => f.write_str("DUMMY"), + TokenKind::SquareOpen => f.write_str("SQUAREOPEN"), + TokenKind::SquareClose => f.write_str("SQUARECLOSE"), + TokenKind::DocComment(text) => write!(f, "DOCCOMMENT({})", text), + TokenKind::Comment(text) => write!(f, "COMMENT({})", text), + } + } +} + +/// Keywords used in the language +#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] +pub enum Keyword { + /// Start a namespace declaration + #[allow(non_camel_case_types)] + nasp, + /// Start a function declaration + #[allow(non_camel_case_types)] + r#fn, + /// Start a structure declaration + #[allow(non_camel_case_types)] + r#struct, + /// Start a enum declaration + #[allow(non_camel_case_types)] + r#enum, +} + +impl Display for Keyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Keyword::nasp => f.write_str("nasp"), + Keyword::r#fn => f.write_str("fn"), + Keyword::r#struct => f.write_str("struct"), + Keyword::r#enum => f.write_str("enum"), + } + } +} + +/// Shorthand macro for generating a token from *anything* which can be +/// converted into a `TokenKind`, or any of the `TokenKind` variants. +/// +/// # Examples +/// +/// ``` +/// use trixy_lang_parser::token; +/// # fn main() { +/// token![nasp]; +/// token![;]; +/// token![Arrow]; +/// # } +/// ``` +#[macro_export] +macro_rules! token { + [Semicolon] => { $crate::lexing::TokenKind::Semicolon }; + [;] => { $crate::lexing::TokenKind::Semicolon }; + [Colon] => { $crate::lexing::TokenKind::Colon }; + [:] => { $crate::lexing::TokenKind::Colon }; + [Comma] => { $crate::lexing::TokenKind::Comma }; + [,] => { $crate::lexing::TokenKind::Comma }; + [Arrow] => { $crate::lexing::TokenKind::Arrow }; + [->] => { $crate::lexing::TokenKind::Arrow }; + [SquareOpen] => { $crate::lexing::TokenKind::SquareOpen }; + [<] => { $crate::lexing::TokenKind::SquareOpen }; + [SquareClose] => { $crate::lexing::TokenKind::SquareClose }; + [>] => { $crate::lexing::TokenKind::SquareClose}; + [BraceOpen] => { $crate::lexing::TokenKind::BraceOpen }; + // [{] => { $crate::lexing::TokenKind::BraceOpen }; + [BraceClose] => { $crate::lexing::TokenKind::BraceClose }; + // [}] => { $crate::lexing::TokenKind::BraceClose }; + [ParenOpen] => { $crate::lexing::TokenKind::ParenOpen }; + // [(] => { $crate::lexing::TokenKind::ParenthesisOpen }; + [ParenClose] => { $crate::lexing::TokenKind::ParenClose }; + // [)] => { $crate::lexing::TokenKind::ParenthesisClose }; + + [nasp] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::nasp) }; + [fn] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#fn) }; + [struct] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#struct) }; + [enum] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#enum) }; + + // This is only works for checking for a identifier or comment + // see the `same_kind` method on TokenKind + [Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; + [Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; + [DocComment] => { $crate::lexing::TokenKind::DocComment("".to_owned()) }; + [DocCommentMatch] => { $crate::lexing::TokenKind::DocComment(_doc_comment) }; + [Comment] => { $crate::lexing::TokenKind::Comment("".to_owned()) }; +} + +#[cfg(test)] +mod tests { + use super::TokenKind; + use crate::token; + + macro_rules! token_macro_test { + ($name:ident, $from:tt, => $to:expr) => { + #[test] + fn $name() { + let got: TokenKind = token![$from]; + let should_be = $to; + + assert_eq!(got, should_be); + } + }; + ($name:ident, $from:tt, => $to:expr) => { + #[test] + fn $name() { + let got: TokenKind = token![$from]; + let should_be = $to; + + assert_eq!(got, should_be); + } + }; + } + + token_macro_test!(tok_expands_to_arrow, ->, => TokenKind::Arrow); + token_macro_test!(tok_expands_to_semicolon, Semicolon, => TokenKind::Semicolon); + token_macro_test!(tok_expands_to_nasp, nasp, => TokenKind::Keyword(crate::lexing::Keyword::nasp)); + token_macro_test!(tok_expands_to_fn, fn, => TokenKind::Keyword(crate::lexing::Keyword::r#fn)); +} diff --git a/trixy-lang_parser/src/lexing/test.rs b/trixy-lang_parser/src/lexing/test.rs new file mode 100644 index 0000000..396f1cb --- /dev/null +++ b/trixy-lang_parser/src/lexing/test.rs @@ -0,0 +1,194 @@ +use crate::lexing::{Keyword, Token, TokenKind, TokenSpan}; + +use super::TokenStream; + +use pretty_assertions::assert_eq; + +#[test] +fn test_lexing_trixy() { + let input = " +nasp commands { + fn expect(event: String) -> String; +} +"; + let token_stream = TokenStream::lex(input).unwrap(); + let expected_token_stream = { + let tokens = vec![ + Token { + span: TokenSpan { start: 1, end: 5 }, + kind: TokenKind::Keyword(Keyword::nasp), + }, + Token { + span: TokenSpan { start: 6, end: 14 }, + kind: TokenKind::Identifier("commands".to_owned()), + }, + Token { + span: TokenSpan { start: 15, end: 16 }, + kind: TokenKind::BraceOpen, + }, + Token { + span: TokenSpan { start: 21, end: 23 }, + kind: TokenKind::Keyword(Keyword::r#fn), + }, + Token { + span: TokenSpan { start: 24, end: 30 }, + kind: TokenKind::Identifier("expect".to_owned()), + }, + Token { + span: TokenSpan { start: 30, end: 31 }, + kind: TokenKind::ParenOpen, + }, + Token { + span: TokenSpan { start: 31, end: 36 }, + kind: TokenKind::Identifier("event".to_owned()), + }, + Token { + span: TokenSpan { start: 36, end: 37 }, + kind: TokenKind::Colon, + }, + Token { + span: TokenSpan { start: 38, end: 44 }, + kind: TokenKind::Identifier("String".to_owned()), + }, + Token { + span: TokenSpan { start: 44, end: 45 }, + kind: TokenKind::ParenClose, + }, + Token { + span: TokenSpan { start: 46, end: 48 }, + kind: TokenKind::Arrow, + }, + Token { + span: TokenSpan { start: 49, end: 55 }, + kind: TokenKind::Identifier("String".to_owned()), + }, + Token { + span: TokenSpan { start: 55, end: 56 }, + kind: TokenKind::Semicolon, + }, + Token { + span: TokenSpan { start: 57, end: 58 }, + kind: TokenKind::BraceClose, + }, + ]; + TokenStream { + tokens, + original_file: input.to_owned(), + } + }; + assert_eq!(token_stream, expected_token_stream) +} + +#[test] +fn test_failing_lexing() { + let input = " +nasp trinitrix { + nasp - commands { + fn hi(strings: String) -> String; + } +} +"; + let token_stream = TokenStream::lex(input); + eprintln!("{}", token_stream.as_ref().unwrap_err()); + + // uncomment the next line to see the error message, without having to remove cargo's output filter + // assert!(!token_stream.is_err()); + assert!(token_stream.is_err()); +} + +#[test] +fn test_multiple_tokens() { + let input = " +nasp nasp {{ +}} +"; + let token_stream = TokenStream::lex(input).unwrap(); + let expected_token_stream = { + let tokens = vec![ + Token { + span: TokenSpan { start: 1, end: 5 }, + kind: TokenKind::Keyword(Keyword::nasp), + }, + Token { + span: TokenSpan { start: 6, end: 10 }, + kind: TokenKind::Keyword(Keyword::nasp), + }, + Token { + span: TokenSpan { start: 11, end: 12 }, + kind: TokenKind::BraceOpen, + }, + Token { + span: TokenSpan { start: 12, end: 13 }, + kind: TokenKind::BraceOpen, + }, + Token { + span: TokenSpan { start: 14, end: 15 }, + kind: TokenKind::BraceClose, + }, + Token { + span: TokenSpan { start: 15, end: 16 }, + kind: TokenKind::BraceClose, + }, + ]; + TokenStream { + tokens, + original_file: input.to_owned(), + } + }; + assert_eq!(token_stream, expected_token_stream) +} + +#[test] +fn test_comments() { + let input = " + // Some comment + nasp nasp {{ + + }} + // NOTE(@soispha): We do not support nested multi line comments <2023-12-16> + /* Some + * multi + * line + * comment + */ +"; + let token_stream = TokenStream::lex(input) + .map_err(|e| { + eprintln!("{}", e); + panic!(); + }) + .unwrap(); + let expected_token_stream = { + let tokens = vec![ + Token { + span: TokenSpan { start: 33, end: 37 }, + kind: TokenKind::Keyword(Keyword::nasp), + }, + Token { + span: TokenSpan { start: 38, end: 42 }, + kind: TokenKind::Keyword(Keyword::nasp), + }, + Token { + span: TokenSpan { start: 43, end: 44 }, + kind: TokenKind::BraceOpen, + }, + Token { + span: TokenSpan { start: 44, end: 45 }, + kind: TokenKind::BraceOpen, + }, + Token { + span: TokenSpan { start: 55, end: 56 }, + kind: TokenKind::BraceClose, + }, + Token { + span: TokenSpan { start: 56, end: 57 }, + kind: TokenKind::BraceClose, + }, + ]; + TokenStream { + tokens, + original_file: input.to_owned(), + } + }; + assert_eq!(token_stream, expected_token_stream) +} diff --git a/trixy-lang_parser/src/lexing/tokenizer.rs b/trixy-lang_parser/src/lexing/tokenizer.rs new file mode 100644 index 0000000..6662f07 --- /dev/null +++ b/trixy-lang_parser/src/lexing/tokenizer.rs @@ -0,0 +1,226 @@ +// This code is heavily inspired by: https://michael-f-bryan.github.io/static-analyser-in-rust/book/lex.html + +use crate::{ + error::ErrorContext, + lexing::{Keyword, TokenSpan}, +}; + +use super::{ + error::{LexingError, SpannedLexingError}, + Token, TokenKind, +}; + +pub(super) struct Tokenizer<'a> { + current_index: usize, + remaining_text: &'a str, + original_text: &'a str, +} + +impl<'a> Tokenizer<'a> { + pub(super) fn new(input: &'a str) -> Self { + Self { + current_index: 0, + remaining_text: input, + original_text: input, + } + } + pub(super) fn next_token(&mut self) -> Result, SpannedLexingError> { + self.skip_ignored_tokens(); + if self.remaining_text.is_empty() { + Ok(None) + } else { + let start = self.current_index; + + let (token_kind, index) = self.get_next_tokenkind().map_err(|e| { + let context = ErrorContext::from_index(start, self.original_text); + + SpannedLexingError { source: e, context } + })?; + + self.chomp(index); // end - start + let end = self.current_index; + Ok(Some(Token { + span: TokenSpan { start, end }, + kind: token_kind, + })) + } + } + + fn get_next_tokenkind(&mut self) -> Result<(TokenKind, usize), LexingError> { + let next = match self.remaining_text.chars().next() { + Some(c) => c, + None => return Err(LexingError::UnexpectedEOF), + }; + + let (tok, length) = match next { + '(' => (TokenKind::ParenOpen, 1), + ')' => (TokenKind::ParenClose, 1), + '{' => (TokenKind::BraceOpen, 1), + '}' => (TokenKind::BraceClose, 1), + ':' => (TokenKind::Colon, 1), + ';' => (TokenKind::Semicolon, 1), + ',' => (TokenKind::Comma, 1), + '<' => (TokenKind::SquareOpen, 1), + '>' => (TokenKind::SquareClose, 1), + + '-' => tokenize_arrow(self.remaining_text)?, + '/' => tokenize_comment(self.remaining_text)?, + + // can't use a OR (`|`) here, as the guard takes precedence + c if c.is_alphabetic() => tokenize_ident(self.remaining_text)?, + '_' => tokenize_ident(self.remaining_text)?, + + other => return Err(LexingError::UnknownCharacter(other)), + }; + + Ok((tok, length)) + } + + fn skip_ignored_tokens(&mut self) { + loop { + let ws = self.skip_whitespace(); + let comments = self.skip_block_comment(); + if ws + comments == 0 { + return; + } + } + } + + /// Skip past any whitespace characters + fn skip_whitespace(&mut self) -> usize { + let mut remaining = self.remaining_text; + + // Filter out whitespace + let _ws = { + let ws = match take_while(remaining, |ch| ch.is_whitespace()) { + Ok((_, bytes_skipped)) => bytes_skipped, + _ => 0, + }; + remaining = &remaining[ws..]; + ws + }; + + let skip = self.remaining_text.len() - remaining.len(); + self.chomp(skip); + skip + } + fn skip_block_comment(&mut self) -> usize { + let pairs = [("/*", "*/")]; + + let src = self.remaining_text; + + for &(pattern, matcher) in &pairs { + if src.starts_with(pattern) { + let leftovers = skip_until(src, matcher); + let skip = src.len() - leftovers.len(); + self.chomp(skip); + return skip; + } + } + + 0 + } + + fn chomp(&mut self, chars_to_chomp: usize) { + self.remaining_text = &self.remaining_text[chars_to_chomp..]; + self.current_index += chars_to_chomp; + } +} + +fn tokenize_comment(text: &str) -> Result<(TokenKind, usize), LexingError> { + // every token starts with two slashes + let slashes: &str = &text[..2]; + if slashes != "//" { + Err(LexingError::ExpectedComment) + } else { + let text: &str = &text[2..]; + if let Some('/') = text.chars().next() { + let text = &text[1..]; + let (doc_comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?; + + // trim whitespace + let doc_comment = doc_comment.trim_start(); + let doc_comment = doc_comment.trim_end(); + + return Ok(( + TokenKind::DocComment(doc_comment.to_owned()), + chars_read + 3, + )); + } + let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?; + + // trim whitespace + let comment = comment.trim_start(); + let comment = comment.trim_end(); + + Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2)) + } +} + +fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> { + let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?; + + // Filter out keywords + let tokenkind = match got { + "nasp" => TokenKind::Keyword(Keyword::nasp), + "fn" => TokenKind::Keyword(Keyword::r#fn), + "struct" => TokenKind::Keyword(Keyword::r#struct), + "enum" => TokenKind::Keyword(Keyword::r#enum), + other => TokenKind::Identifier(other.to_string()), + }; + + Ok((tokenkind, chars_read)) +} + +fn tokenize_arrow(text: &str) -> Result<(TokenKind, usize), LexingError> { + let mut chars = text.chars(); + if let Some(char) = chars.next() { + if char == '-' { + if let Some(char) = chars.next() { + if char == '>' { + return Ok((TokenKind::Arrow, 2)); + } + } + } + } + // This is a implicit else as the other if clauses return + Err(LexingError::ExpectedArrow) +} + +/// Consumes bytes while a predicate evaluates to true. +fn take_while(data: &str, mut pred: F) -> Result<(&str, usize), LexingError> +where + F: FnMut(char) -> bool, +{ + let mut current_index = 0; + + for ch in data.chars() { + let should_continue = pred(ch); + + if !should_continue { + break; + } + + current_index += ch.len_utf8(); + } + + if current_index == 0 { + Err(LexingError::NoMatchesTaken) + } else { + Ok((&data[..current_index], current_index)) + } +} + +/// Skips input until the remaining string pattern starts with the pattern +fn skip_until<'a>(mut src: &'a str, pattern: &str) -> &'a str { + while !src.is_empty() && !src.starts_with(pattern) { + let next_char_size = src + .chars() + .next() + .expect("The string isn't empty") + .len_utf8(); + src = &src[next_char_size..]; + } + + &src[pattern.len()..] +} diff --git a/trixy-lang_parser/src/lib.rs b/trixy-lang_parser/src/lib.rs new file mode 100644 index 0000000..65f69b3 --- /dev/null +++ b/trixy-lang_parser/src/lib.rs @@ -0,0 +1,18 @@ +use error::TrixyError; + +use crate::lexing::TokenStream; + +use self::command_spec::checked::CommandSpec; + +mod command_spec; +pub mod error; +pub mod lexing; +pub mod parsing; + +pub fn parse_trixy_lang(input: &str) -> Result> { + let input_tokens = TokenStream::lex(input) + .map_err(|err| Box::new(err.into()))? + .parse() + .map_err(Into::::into)?; + Ok(input_tokens) +} diff --git a/trixy-lang_parser/src/main.rs b/trixy-lang_parser/src/main.rs new file mode 100644 index 0000000..aefc806 --- /dev/null +++ b/trixy-lang_parser/src/main.rs @@ -0,0 +1,110 @@ +use std::{fs, process::exit}; + +use trixy_lang_parser::lexing::TokenStream; + +use std::path::PathBuf; + +use clap::{Parser, Subcommand}; + +/// A helper command for the trixy-lang_parser crate +#[derive(Parser, Debug)] +#[clap(author, version, about, long_about = None)] +pub struct Args { + #[command(subcommand)] + /// The subcommand to execute + pub subcommand: Command, +} +#[derive(Subcommand, Debug)] +pub enum Command { + #[clap(value_parser)] + /// Only try to tokenize the file + Tokenize { + #[clap(value_parser)] + /// The file containing the trixy code to tokenize + file: PathBuf, + }, + /// Check syntax, without type checking + Parse { + #[clap(value_parser)] + /// The file containing the trixy code to parse + file: PathBuf, + }, + /// Type check + Process { + #[clap(value_parser)] + /// The file containing the trixy code to process + file: PathBuf, + }, +} + +pub fn main() { + let args = Args::parse(); + match args.subcommand { + Command::Tokenize { file } => { + let input = fs::read_to_string(file).unwrap(); + + let input_tokens = match TokenStream::lex(&input) { + Ok(err) => err, + Err(ok) => { + eprintln!("{}", ok); + exit(1); + } + }; + + println!("{:#?}", input_tokens); + } + Command::Parse { file } => { + let input = fs::read_to_string(file).unwrap(); + + let input_tokens = match TokenStream::lex(&input) { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while tokenizing:"); + eprintln!("{}", err); + exit(1); + } + }; + + let parsed = match input_tokens.parse_unchecked() { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while doing the first (unchecked) parsing run:"); + eprintln!("{}", err); + exit(1) + } + }; + println!("{:#?}", parsed); + } + Command::Process { file } => { + let input = fs::read_to_string(file).unwrap(); + + let input_tokens = match TokenStream::lex(&input) { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while tokenizing:"); + eprintln!("{}", err); + exit(1); + } + }; + + let parsed = match input_tokens.parse_unchecked() { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while doing the first (unchecked) parsing run:"); + eprintln!("{}", err); + exit(1) + } + }; + + let processed = match parsed.process(input) { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while doing the seconde (checked) parsing run:"); + eprintln!("{}", err); + exit(1) + } + }; + println!("{:#?}", processed); + } + } +} diff --git a/trixy-lang_parser/src/parsing/checked/error.rs b/trixy-lang_parser/src/parsing/checked/error.rs new file mode 100644 index 0000000..e088199 --- /dev/null +++ b/trixy-lang_parser/src/parsing/checked/error.rs @@ -0,0 +1,82 @@ +use thiserror::Error; + +use std::{error::Error, fmt::Display}; + +use crate::{ + command_spec::checked::Identifier, + error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}, + lexing::TokenSpan, + parsing::unchecked::error::SpannedParsingError as OldSpannedParsingError, +}; + +#[derive(Error, Debug)] +pub enum ParsingError { + #[error("The type ('{r#type}') was not declared before!")] + TypeNotDeclared { r#type: Identifier, span: TokenSpan }, + #[error(transparent)] + PreParseError(#[from] OldSpannedParsingError), +} + +impl ParsingError { + pub fn span(&self) -> &TokenSpan { + match self { + ParsingError::TypeNotDeclared { span, .. } => span, + ParsingError::PreParseError(err) => err.source.span(), + } + } +} + +impl AdditionalHelp for ParsingError { + fn additional_help(&self) -> String { + match self { + ParsingError::TypeNotDeclared { .. } => "This type should have been mentioned in the namespaces above, or in the namespace of this type usage".to_owned(), + ParsingError::PreParseError(err) => ErrorContextDisplay::source(err).additional_help(), + } + } +} + +#[derive(Debug)] +pub struct SpannedParsingError { + pub source: Box, + pub context: ErrorContext, +} + +impl Error for SpannedParsingError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(&self.source) + } +} + +impl Display for SpannedParsingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.error_fmt(f) + } +} + +impl ErrorContextDisplay for SpannedParsingError { + type Error = ParsingError; + + fn context(&self) -> &crate::error::ErrorContext { + &self.context + } + + fn line_number(&self) -> usize { + self.context.line_number + } + + fn line_above(&self) -> &str { + &self.context.line_above + } + + fn line_below(&self) -> &str { + &self.context.line_below + } + + fn line(&self) -> &str { + &self.context.line + } + + fn source(&self) -> &::Error { + &self.source + } +} diff --git a/trixy-lang_parser/src/parsing/checked/mod.rs b/trixy-lang_parser/src/parsing/checked/mod.rs new file mode 100644 index 0000000..669fd1b --- /dev/null +++ b/trixy-lang_parser/src/parsing/checked/mod.rs @@ -0,0 +1,261 @@ +use std::mem; + +use crate::{ + command_spec::{ + checked::{ + CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, NamedType, + Namespace, Structure, Type, BASE_TYPES, + }, + unchecked::{ + CommandSpec as UncheckedCommandSpec, DocNamedType as UncheckedDocNamedType, + Enumeration as UncheckedEnumeration, Function as UncheckedFunction, + NamedType as UncheckedNamedType, Namespace as UncheckedNamespace, + Structure as UncheckedStructure, Type as UncheckedType, + }, + }, + error::ErrorContext, + lexing::{TokenKind, TokenStream}, +}; + +use self::error::{ParsingError, SpannedParsingError}; + +pub mod error; +#[cfg(test)] +mod test; + +struct Parser { + command_spec: UncheckedCommandSpec, + structures: Vec, + enumerations: Vec, + original_file: String, +} + +impl TokenStream { + pub fn parse(mut self) -> Result { + let original_file = mem::take(&mut self.original_file); + + let unchecked = self.parse_unchecked().map_err(|err| { + let span = *err.source.span(); + SpannedParsingError { + source: Box::new(ParsingError::from(err)), + context: ErrorContext::from_span(span, &original_file), + } + })?; + + let checked = Parser { + command_spec: unchecked, + structures: vec![], + enumerations: vec![], + original_file, + } + .parse()?; + Ok(checked) + } +} + +impl UncheckedCommandSpec { + pub fn process(self, original_file: String) -> Result { + let checked = Parser { + command_spec: self, + structures: vec![], + enumerations: vec![], + original_file, + } + .parse()?; + Ok(checked) + } +} + +macro_rules! pass_attrs_along { + ($name:ident) => { + $name.attributes.into_iter().map(|a| a.into()).collect() + }; +} + +impl Parser { + fn parse(mut self) -> Result { + let namespace: UncheckedNamespace = + UncheckedNamespace::from(mem::take(&mut self.command_spec)); + let namespace = self.process_namespace(namespace).map_err(|err| { + let span = *err.span(); + SpannedParsingError { + source: Box::new(err), + context: ErrorContext::from_span(span, &self.original_file), + } + })?; + Ok(namespace.into()) + } + + fn process_namespace( + &mut self, + namespace: UncheckedNamespace, + ) -> Result { + let name = match namespace.name.kind { + TokenKind::Identifier(ident) => Identifier { name: ident }, + // This is not really used, so the value put here does not matter + TokenKind::Dummy => Identifier { + name: "".to_owned(), + }, + _ => unreachable!("This should never be more than these two enum veriants"), + }; + + let mut enumerations = vec![]; + let mut enumerations_counter = 0; + for enumeration in namespace.enumerations { + enumerations.push(self.process_enumeration(enumeration)?); + enumerations_counter += 1; + } + let mut structures = vec![]; + let mut structures_counter = 0; + for structure in namespace.structures { + structures.push(self.process_structure(structure)?); + structures_counter += 1; + } + + let mut functions = vec![]; + for function in namespace.functions { + functions.push(self.process_function(function)?); + } + let mut namespaces = vec![]; + for namespace in namespace.namespaces { + namespaces.push(self.process_namespace(namespace)?); + } + + // Remove added enums and structs again + (0..structures_counter).for_each(|_| { + self.structures.pop(); + }); + (0..enumerations_counter).for_each(|_| { + self.enumerations.pop(); + }); + + Ok(Namespace { + name, + functions, + structures, + enumerations, + namespaces, + attributes: pass_attrs_along!(namespace), + }) + } + + fn process_function( + &mut self, + mut function: UncheckedFunction, + ) -> Result { + let identifier = mem::take(&mut function.identifier.kind).into(); + let mut inputs = vec![]; + for input in function.inputs { + inputs.push(self.process_named_type(input)?); + } + let output = if let Some(r#type) = function.output { + Some(self.process_type(r#type)?) + } else { + None + }; + + Ok(Function { + identifier, + inputs, + output, + attributes: pass_attrs_along!(function), + }) + } + + fn process_enumeration( + &mut self, + mut enumeration: UncheckedEnumeration, + ) -> Result { + self.enumerations.push(enumeration.clone()); + + let identifier = mem::take(&mut enumeration.identifier.kind).into(); + + let mut states = vec![]; + for mut state in enumeration.states { + states.push({ + let ident: Identifier = mem::take(&mut state.token.kind).into(); + DocIdentifier { + name: ident.name, + attributes: pass_attrs_along!(state), + } + }) + } + + Ok(Enumeration { + identifier, + states, + attributes: pass_attrs_along!(enumeration), + }) + } + + fn process_structure( + &mut self, + mut structure: UncheckedStructure, + ) -> Result { + self.structures.push(structure.clone()); + + let identifier: Identifier = mem::take(&mut structure.identifier.kind).into(); + let mut contents = vec![]; + for named_type in structure.contents { + contents.push(self.process_doc_named_type(named_type)?); + } + + Ok(Structure { + identifier, + contents, + attributes: pass_attrs_along!(structure), + }) + } + + fn process_named_type( + &mut self, + mut named_type: UncheckedNamedType, + ) -> Result { + let name: Identifier = mem::take(&mut named_type.name.kind).into(); + let r#type: Type = self.process_type(named_type.r#type)?; + Ok(NamedType { name, r#type }) + } + fn process_doc_named_type( + &mut self, + mut doc_named_type: UncheckedDocNamedType, + ) -> Result { + let name: Identifier = mem::take(&mut doc_named_type.name.kind).into(); + let r#type: Type = self.process_type(doc_named_type.r#type)?; + Ok(DocNamedType { + name, + r#type, + attributes: pass_attrs_along!(doc_named_type), + }) + } + + fn process_type(&mut self, mut r#type: UncheckedType) -> Result { + let identifier: Identifier = mem::take(&mut r#type.identifier.kind).into(); + + if !self + .structures + .iter() + .map(|r#struct| Into::::into(r#struct.identifier.kind.clone())) + .any(|ident| ident == identifier) + && !self + .enumerations + .iter() + .map(|r#enum| Into::::into(r#enum.identifier.kind.clone())) + .any(|ident| ident == identifier) + && !BASE_TYPES.iter().any(|ident| ident.name == identifier.name) + { + return Err(ParsingError::TypeNotDeclared { + r#type: identifier, + span: r#type.identifier.span, + }); + } + + let mut generic_args = vec![]; + for generic_arg in r#type.generic_args { + generic_args.push(self.process_type(generic_arg)?); + } + Ok(Type { + identifier, + generic_args, + }) + } +} diff --git a/trixy-lang_parser/src/parsing/checked/test.rs b/trixy-lang_parser/src/parsing/checked/test.rs new file mode 100644 index 0000000..adf7a85 --- /dev/null +++ b/trixy-lang_parser/src/parsing/checked/test.rs @@ -0,0 +1,215 @@ +use crate::command_spec::checked::{ + Attribute, CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, + NamedType, Namespace, Structure, Type, +}; +use crate::lexing::TokenStream; + +use pretty_assertions::assert_eq; + +#[test] +fn test_full() { + let input = "nasp trinitrix { + struct Callback { + func: Function, + timeout: Integer, + }; + + enum CallbackPriority { + High, + Medium, + Low, + }; + + fn execute_callback(callback: Callback, priority: CallbackPriority); +}"; + let output = TokenStream::lex(&input).unwrap().parse().unwrap(); + let expected = CommandSpec { + structures: vec![], + enumerations: vec![], + functions: vec![], + namespaces: vec![Namespace { + name: Identifier { + name: "trinitrix".to_owned(), + }, + functions: vec![Function { + identifier: Identifier { + name: "execute_callback".to_owned(), + }, + inputs: vec![ + NamedType { + name: Identifier { + name: "callback".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "Callback".to_owned(), + }, + generic_args: vec![], + }, + }, + NamedType { + name: Identifier { + name: "priority".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "CallbackPriority".to_owned(), + }, + generic_args: vec![], + }, + }, + ], + output: None, + attributes: vec![], + }], + structures: vec![Structure { + identifier: Identifier { + name: "Callback".to_owned(), + }, + contents: vec![ + DocNamedType { + name: Identifier { + name: "func".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "Function".to_owned(), + }, + generic_args: vec![], + }, + attributes: vec![], + }, + DocNamedType { + name: Identifier { + name: "timeout".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "Integer".to_owned(), + }, + generic_args: vec![], + }, + attributes: vec![], + }, + ], + attributes: vec![], + }], + enumerations: vec![Enumeration { + identifier: Identifier { + name: "CallbackPriority".to_owned(), + }, + states: vec![ + DocIdentifier { + name: "High".to_owned(), + attributes: vec![], + }, + DocIdentifier { + name: "Medium".to_owned(), + attributes: vec![], + }, + DocIdentifier { + name: "Low".to_owned(), + attributes: vec![], + }, + ], + attributes: vec![], + }], + namespaces: vec![], + attributes: vec![], + }], + }; + assert_eq!(output, expected); +} + +#[test] +fn test_failing() { + let input = "struct Callback { + func: Function, + timeout: Integer, +}; + +// The type \"Name\" should not be defined +fn execute_callback(callback: Name); +"; + let output = TokenStream::lex(&input).unwrap().parse(); + match *(output.unwrap_err().source) { + super::error::ParsingError::TypeNotDeclared { r#type, .. } => { + assert_eq!( + r#type, + Identifier { + name: "Name".to_owned() + } + ) + } + _ => panic!("Wrong error in test!"), + }; +} + +#[test] +fn test_comments() { + let input = "fn print(message: String); + +/// First doc comment +// Some more text +nasp trinitrix { + /// Second doc comment + fn hi(name: String) -> String; +} +"; + let output = TokenStream::lex(&input).unwrap().parse().unwrap(); + let expected = CommandSpec { + structures: vec![], + enumerations: vec![], + functions: vec![Function { + identifier: Identifier { + name: "print".to_owned(), + }, + inputs: vec![NamedType { + name: Identifier { + name: "message".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "String".to_owned(), + }, + generic_args: vec![], + }, + }], + output: None, + attributes: vec![], + }], + namespaces: vec![Namespace { + name: Identifier { + name: "trinitrix".to_owned(), + }, + functions: vec![Function { + identifier: Identifier { + name: "hi".to_owned(), + }, + inputs: vec![NamedType { + name: Identifier { + name: "name".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "String".to_owned(), + }, + generic_args: vec![], + }, + }], + output: Some(Type { + identifier: Identifier { + name: "String".to_owned(), + }, + generic_args: vec![], + }), + attributes: vec![Attribute::doc("Second doc comment".to_owned())], + }], + structures: vec![], + enumerations: vec![], + namespaces: vec![], + attributes: vec![Attribute::doc("First doc comment".to_owned())], + }], + }; + assert_eq!(output, expected); +} diff --git a/trixy-lang_parser/src/parsing/mod.rs b/trixy-lang_parser/src/parsing/mod.rs new file mode 100644 index 0000000..f1506dc --- /dev/null +++ b/trixy-lang_parser/src/parsing/mod.rs @@ -0,0 +1,2 @@ +pub mod checked; +mod unchecked; diff --git a/trixy-lang_parser/src/parsing/unchecked/error.rs b/trixy-lang_parser/src/parsing/unchecked/error.rs new file mode 100644 index 0000000..f15c5d5 --- /dev/null +++ b/trixy-lang_parser/src/parsing/unchecked/error.rs @@ -0,0 +1,113 @@ +use std::{error::Error, fmt::Display}; +use thiserror::Error; + +use crate::{ + command_spec::unchecked::Attribute, + error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}, + lexing::{TokenKind, TokenSpan}, +}; + +#[derive(Error, Debug, Clone)] +pub enum ParsingError { + #[error("Expected '{expected}', but received: '{actual}'")] + ExpectedDifferentToken { + expected: TokenKind, + actual: TokenKind, + span: TokenSpan, + }, + + #[error("Expected '{expected}', but the token stream stopped")] + UnexpectedEOF { + expected: TokenKind, + span: TokenSpan, + }, + + #[error("Expected a Keyword to start a new declaration, but found: '{actual}'")] + ExpectedKeyword { actual: TokenKind, span: TokenSpan }, + + #[error("DocComment does not have target")] + TrailingDocComment { + comments: Vec, + span: TokenSpan, + }, +} +impl ParsingError { + pub fn span(&self) -> &TokenSpan { + match self { + ParsingError::ExpectedDifferentToken { span, .. } => span, + ParsingError::ExpectedKeyword { span, .. } => span, + ParsingError::TrailingDocComment { span, .. } => span, + ParsingError::UnexpectedEOF { span, .. } => span, + } + } + + pub fn get_span(&self) -> TokenSpan { + *self.span() + } +} + +impl AdditionalHelp for ParsingError { + fn additional_help(&self) -> String { + match self { + ParsingError::ExpectedDifferentToken { + expected, + actual, + .. + } => format!( + "I expected a '{}' here, but you put a '{}' there!", + expected, actual + ), + ParsingError::ExpectedKeyword { actual, .. } => format!( + "I expected a keyword (that is something like 'fn' or 'nasp') but you put a '{}' there!", + actual), + ParsingError::TrailingDocComment { .. } => "I expected some target (a function, namespace, enum, or something like this) which this doc comment annotates, but you put nothing there".to_owned(), + ParsingError::UnexpectedEOF { expected, .. } => format!("Put the expected token ('{expected}') here."), + } + } +} + +#[derive(Debug, Clone)] +pub struct SpannedParsingError { + pub source: Box, + pub context: ErrorContext, +} + +impl Error for SpannedParsingError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(&self.source) + } +} + +impl Display for SpannedParsingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.error_fmt(f) + } +} + +impl ErrorContextDisplay for SpannedParsingError { + type Error = ParsingError; + + fn context(&self) -> &crate::error::ErrorContext { + &self.context + } + + fn line_number(&self) -> usize { + self.context.line_number + } + + fn line_above(&self) -> &str { + &self.context.line_above + } + + fn line_below(&self) -> &str { + &self.context.line_below + } + + fn line(&self) -> &str { + &self.context.line + } + + fn source(&self) -> &::Error { + &self.source + } +} diff --git a/trixy-lang_parser/src/parsing/unchecked/mod.rs b/trixy-lang_parser/src/parsing/unchecked/mod.rs new file mode 100644 index 0000000..b1175e0 --- /dev/null +++ b/trixy-lang_parser/src/parsing/unchecked/mod.rs @@ -0,0 +1,372 @@ +use std::mem; + +use crate::{ + command_spec::unchecked::{ + Attribute, CommandSpec, Declaration, DocNamedType, DocToken, Enumeration, Function, + NamedType, Namespace, Structure, Type, + }, + error::ErrorContext, + lexing::{Token, TokenKind, TokenSpan, TokenStream}, + token, +}; + +use self::error::{ParsingError, SpannedParsingError}; + +pub mod error; +#[cfg(test)] +mod test; + +impl TokenStream { + pub fn parse_unchecked(self) -> Result { + let mut parser = Parser::new(self); + parser.parse() + } +} + +pub(super) struct Parser { + token_stream: TokenStream, + active_doc_comments: Vec, + last_span: TokenSpan, +} + +impl Parser { + fn new(mut token_stream: TokenStream) -> Self { + token_stream.reverse(); + Self { + token_stream, + active_doc_comments: vec![], + last_span: TokenSpan::default(), + } + } + + fn parse(&mut self) -> Result { + let mut output = CommandSpec::default(); + while !self.token_stream.is_empty() { + let next = self.parse_next().map_err(|err| { + let span = err.get_span(); + SpannedParsingError { + source: Box::new(err), + context: ErrorContext::from_span(span, &self.token_stream.original_file), + } + })?; + match next { + Declaration::Function(function) => output.functions.push(function), + Declaration::Structure(structure) => output.structures.push(structure), + Declaration::Enumeration(enumeration) => output.enumerations.push(enumeration), + Declaration::Namespace(namespace) => output.namespaces.push(namespace), + } + } + + Ok(output) + } + + fn parse_next(&mut self) -> Result { + // Use of [peek_raw] here is fine, as we know that the function is only called, when + // something should still be contained in the token stream + match self.peek_raw().kind() { + token![nasp] => Ok(Declaration::Namespace(self.parse_namespace()?)), + token![fn] => Ok(Declaration::Function(self.parse_function()?)), + token![struct] => Ok(Declaration::Structure(self.parse_structure()?)), + token![enum] => Ok(Declaration::Enumeration(self.parse_enumeration()?)), + token![DocCommentMatch] => { + while self.expect_peek(token![DocComment]) { + let comment_to_push = { + let doc_comment = self.expect(token![DocComment])?; + let span = *doc_comment.span(); + let name = if let TokenKind::DocComment(content) = doc_comment.kind { + content + } else { + unreachable!("The expect should have accounted for that case"); + }; + + Attribute::doc { + content: name, + span, + } + }; + self.active_doc_comments.push(comment_to_push); + } + + if self.token_stream.is_empty() { + fn get_span(attr: Option<&Attribute>) -> TokenSpan { + match attr.expect("Something should be here") { + Attribute::doc { span, .. } => *span, + } + } + + let span = TokenSpan::from_range( + get_span(self.active_doc_comments.first()), + get_span(self.active_doc_comments.last()), + ); + Err(ParsingError::TrailingDocComment { + comments: mem::take(&mut self.active_doc_comments), + span, + }) + } else { + self.parse_next() + } + } + _ => { + let err = ParsingError::ExpectedKeyword { + span: *self.peek_raw().span(), + actual: self.peek_raw().kind().clone(), + }; + + Err(err) + } + } + } + + fn parse_type(&mut self) -> Result { + let identifier = self.expect(token![Ident])?; + + let mut generic_args = vec![]; + if self.expect_peek(token![<]) { + self.expect(token![<])?; + if self.expect_peek(token![Ident]) { + generic_args.push(self.parse_type()?); + } + while self.expect_peek(token![Comma]) { + generic_args.push(self.parse_type()?); + } + self.expect(token![>])?; + } + Ok(Type { + identifier, + generic_args, + }) + } + + fn parse_doc_comments(&mut self) -> Result, ParsingError> { + let mut attrs = mem::take(&mut self.active_doc_comments); + + while self.expect_peek(token![DocComment]) { + attrs.push({ + let doc_comment = self.expect(token![DocComment])?; + let span = *doc_comment.span(); + let name = if let TokenKind::DocComment(content) = doc_comment.kind { + content + } else { + unreachable!("The expect should have accounted for that case"); + }; + Attribute::doc { + content: name, + span, + } + }); + } + Ok(attrs) + } + + fn parse_namespace(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; + self.expect(token![nasp])?; + + let mut namespace = Namespace { + name: self.expect(token![Ident])?, + attributes, + ..Default::default() + }; + + self.expect(token![BraceOpen])?; + + while !self.expect_peek(token![BraceClose]) { + let next = self.parse_next()?; + match next { + Declaration::Function(function) => namespace.functions.push(function), + Declaration::Structure(structure) => namespace.structures.push(structure), + Declaration::Enumeration(enumeration) => namespace.enumerations.push(enumeration), + Declaration::Namespace(input_namespace) => { + namespace.namespaces.push(input_namespace) + } + } + } + + self.expect(token![BraceClose])?; + Ok(namespace) + } + + fn parse_enumeration(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; + self.expect(token![enum])?; + let identifier = self.expect(token![Ident])?; + self.expect(token![BraceOpen])?; + + let mut states = vec![]; + if self.expect_peek(token![Ident]) { + let attributes = self.parse_doc_comments()?; + states.push(DocToken { + token: self.expect(token![Ident])?, + attributes, + }); + } + while self.expect_peek(token![Comma]) { + self.expect(token![Comma])?; + if self.expect_peek(token![Ident]) { + let attributes = self.parse_doc_comments()?; + states.push(DocToken { + token: self.expect(token![Ident])?, + attributes, + }); + } else { + break; + } + } + self.expect(token![BraceClose])?; + self.expect(token![;])?; + Ok(Enumeration { + identifier, + states, + attributes, + }) + } + + fn parse_structure(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; + self.expect(token![struct])?; + let name = self.expect(token![Ident])?; + self.expect(token![BraceOpen])?; + + let mut contents = vec![]; + if self.expect_peek(token![Ident]) { + contents.push(self.parse_doc_named_type()?); + } + while self.expect_peek(token![Comma]) { + self.expect(token![Comma])?; + if self.expect_peek(token![Ident]) { + contents.push(self.parse_doc_named_type()?); + } else { + break; + } + } + self.expect(token![BraceClose])?; + self.expect(token![;])?; + + Ok(Structure { + identifier: name, + contents, + attributes, + }) + } + + fn parse_named_type(&mut self) -> Result { + let name = self.expect(token![Ident])?; + self.expect(token![Colon])?; + let r#type = self.parse_type()?; + Ok(NamedType { name, r#type }) + } + + fn parse_doc_named_type(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; + let name = self.expect(token![Ident])?; + self.expect(token![Colon])?; + let r#type = self.parse_type()?; + Ok(DocNamedType { + name, + r#type, + attributes, + }) + } + + fn parse_function(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; + self.expect(token![fn])?; + let name = self.expect(token![Ident])?; + self.expect(token![ParenOpen])?; + let mut inputs = vec![]; + + if self.expect_peek(token![Ident]) { + inputs.push(self.parse_named_type()?); + } + while self.expect_peek(token![Comma]) { + self.expect(token![Comma])?; + inputs.push(self.parse_named_type()?); + } + + self.expect(token![ParenClose])?; + let mut output_type = None; + if self.expect_peek(token![->]) { + self.expect(token![->])?; + output_type = Some(self.parse_type()?); + } + self.expect(token![;])?; + Ok(Function { + identifier: name, + inputs, + output: output_type, + attributes, + }) + } + + /// Expect a token in the next input position: + /// For example: + /// + /// ```dont_run + /// use trixy_lang_parser::{ + /// lexing::{Keyword, TokenKind, TokenStream}, + /// parsing::unchecked::Parser, + /// token, + /// }; + /// + /// # fn main() { + /// let token_stream = TokenStream::lex("nasp {}").unwrap(); + /// let parser = Parser::new(token_stream); + /// assert_eq!(parser.expect(token![nasp]).unwrap(), TokenKind::Keyword(Keyword::nasp)); + /// assert_eq!(parser.expect(token![BraceOpen]).unwrap(), TokenKind::BraceOpen); + /// assert_eq!(parser.expect(token![BraceClose]).unwrap(), TokenKind::BraceClose); + /// assert!(parser.expect(token![BraceClose]).is_err()); + /// # } + /// ``` + /// + pub(super) fn expect(&mut self, token: TokenKind) -> Result { + let actual_token = if let Some(token) = self.peek() { + token + } else { + return Err(ParsingError::UnexpectedEOF { + expected: token, + span: self.last_span, + }); + }; + if actual_token.kind().same_kind(&token) { + Ok(self.pop()) + } else { + let err = ParsingError::ExpectedDifferentToken { + expected: token, + actual: actual_token.kind().clone(), + span: *actual_token.span(), + }; + + Err(err) + } + } + + /// Check if the next token is of the specified TokenKind. + /// Does not alter the token_stream + fn expect_peek(&self, token: TokenKind) -> bool { + let actual_token = match self.peek() { + Some(ok) => ok, + None => return false, + }; + actual_token.kind().same_kind(&token) + } + + /// Looks at the next token without removing it + fn peek(&self) -> Option<&Token> { + self.token_stream.peek() + } + + /// Looks at the next token without removing it. + /// Unwraps the option returned from [peek], only use it, if you know that a token must exist + fn peek_raw(&self) -> &Token { + self.token_stream.peek().expect("The token should exist") + } + + /// Removes the next token + fn pop(&mut self) -> Token { + self.last_span = *self + .peek() + .expect("Calling pop should mean, that a token was first peeked for") + .span(); + self.token_stream.pop() + } +} diff --git a/trixy-lang_parser/src/parsing/unchecked/test.rs b/trixy-lang_parser/src/parsing/unchecked/test.rs new file mode 100644 index 0000000..b5568fb --- /dev/null +++ b/trixy-lang_parser/src/parsing/unchecked/test.rs @@ -0,0 +1,101 @@ +use pretty_assertions::assert_eq; + +use crate::{ + command_spec::unchecked::{CommandSpec, Function, NamedType, Namespace, Type}, + lexing::{Token, TokenKind, TokenSpan, TokenStream}, +}; + +use super::error::ParsingError; + +#[test] +fn test_failing() { + let input = " +fn print(message: CommandTransferValue); + +nasp trinitrix { {} + fn hi honner(name: String) -> String; ; +} + +"; + let parsed = TokenStream::lex(input).unwrap().parse_unchecked(); + let err = parsed.unwrap_err().source; + match *err { + ParsingError::ExpectedKeyword { .. } => {} + _ => panic!("Wrong error"), + } +} + +#[test] +fn test_full() { + let input = "fn print(message: CommandTransferValue); + +nasp trinitrix { + fn hi(name: String) -> String; +} +"; + let parsed = TokenStream::lex(input).unwrap().parse_unchecked().unwrap(); + let expected = CommandSpec { + structures: vec![], + enumerations: vec![], + functions: vec![Function { + identifier: Token { + span: TokenSpan { start: 3, end: 8 }, + kind: TokenKind::Identifier("print".to_owned()), + }, + inputs: vec![NamedType { + name: Token { + span: TokenSpan { start: 9, end: 16 }, + kind: TokenKind::Identifier("message".to_owned()), + }, + r#type: Type { + identifier: Token { + span: TokenSpan { start: 18, end: 38 }, + kind: TokenKind::Identifier("CommandTransferValue".to_owned()), + }, + generic_args: vec![], + }, + }], + output: None, + attributes: vec![], + }], + namespaces: vec![Namespace { + name: Token { + span: TokenSpan { start: 47, end: 56 }, + kind: TokenKind::Identifier("trinitrix".to_owned()), + }, + functions: vec![Function { + identifier: Token { + span: TokenSpan { start: 66, end: 68 }, + kind: TokenKind::Identifier("hi".to_owned()), + }, + inputs: vec![NamedType { + name: Token { + span: TokenSpan { start: 69, end: 73 }, + kind: TokenKind::Identifier("name".to_owned()), + }, + r#type: Type { + identifier: Token { + span: TokenSpan { start: 75, end: 81 }, + kind: TokenKind::Identifier("String".to_owned()), + }, + generic_args: vec![], + }, + }], + output: Some(Type { + identifier: Token { + span: TokenSpan { start: 86, end: 92 }, + kind: TokenKind::Identifier("String".to_owned()), + }, + generic_args: vec![], + }), + attributes: vec![], + }], + structures: vec![], + enumerations: vec![], + namespaces: vec![], + attributes: vec![], + }], + }; + + assert_eq!(parsed, expected); +}