From 3503e5250c0ae74c6c06ad19ff10a265a0539711 Mon Sep 17 00:00:00 2001 From: Soispha Date: Fri, 22 Dec 2023 14:57:39 +0100 Subject: [PATCH] feat(trixy-lang_parser): Add type checking (2nd stage parsing) --- .../example/failing_types.tri | 10 + trixy/trixy-lang_parser/example/full.tri | 14 +- .../src/command_spec/checked.rs | 140 ++++++++--- .../trixy-lang_parser/src/command_spec/mod.rs | 2 +- .../src/command_spec/unchecked.rs | 93 ++++--- trixy/trixy-lang_parser/src/error.rs | 2 +- trixy/trixy-lang_parser/src/lexing/mod.rs | 7 +- trixy/trixy-lang_parser/src/main.rs | 38 +++ .../src/parsing/checked/error.rs | 82 +++++++ .../src/parsing/checked/mod.rs | 230 ++++++++++++++++++ .../src/parsing/checked/test.rs | 134 ++++++++++ trixy/trixy-lang_parser/src/parsing/mod.rs | 4 +- trixy/trixy-lang_parser/src/parsing/test.rs | 88 ------- .../src/parsing/{ => unchecked}/error.rs | 12 +- .../{unchecked.rs => unchecked/mod.rs} | 69 +++--- .../src/parsing/unchecked/test.rs | 98 ++++++++ 16 files changed, 815 insertions(+), 208 deletions(-) create mode 100644 trixy/trixy-lang_parser/example/failing_types.tri create mode 100644 trixy/trixy-lang_parser/src/parsing/checked/error.rs create mode 100644 trixy/trixy-lang_parser/src/parsing/checked/mod.rs create mode 100644 trixy/trixy-lang_parser/src/parsing/checked/test.rs delete mode 100644 trixy/trixy-lang_parser/src/parsing/test.rs rename trixy/trixy-lang_parser/src/parsing/{ => unchecked}/error.rs (88%) rename trixy/trixy-lang_parser/src/parsing/{unchecked.rs => unchecked/mod.rs} (78%) create mode 100644 trixy/trixy-lang_parser/src/parsing/unchecked/test.rs diff --git a/trixy/trixy-lang_parser/example/failing_types.tri b/trixy/trixy-lang_parser/example/failing_types.tri new file mode 100644 index 0000000..8e5ed74 --- /dev/null +++ b/trixy/trixy-lang_parser/example/failing_types.tri @@ -0,0 +1,10 @@ +struct Callback { + func: Function, + timeout: Integer, +}; + +fn execute_callback(callback: Name); + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy/trixy-lang_parser/example/full.tri b/trixy/trixy-lang_parser/example/full.tri index 9dbebfb..9b2f065 100644 --- a/trixy/trixy-lang_parser/example/full.tri +++ b/trixy/trixy-lang_parser/example/full.tri @@ -2,7 +2,7 @@ // HACK(@soispha): The stdlib Lua `print()` function has stdout as output hardcoded, // redirecting stdout seems too much like a hack thus we are just redefining the print function // to output to a controlled output. <2023-09-09> -fn print(input: CommandTransferValue); +//fn print(input: CommandTransferValue); nasp trinitrix { /// Language specific functions, which mirror the `trinitrix.api` namespace. @@ -14,11 +14,21 @@ nasp trinitrix { /// Debug only functions, these are effectively useless nasp debug { + enum UserGreet { + Friendly, + Angrily, + Hastly + }; + struct GreetedUser { + names: Vec, + new: GreetedUser, + state: UserGreet + }; /// Greets the user fn greet(input: String) -> String; /// Returns a table of greeted users - fn greet_multiple() -> Table; + fn greet_multiple() -> GreetedUser; } /// General API to change stuff in Trinitrix diff --git a/trixy/trixy-lang_parser/src/command_spec/checked.rs b/trixy/trixy-lang_parser/src/command_spec/checked.rs index c47bf73..3da9a5c 100644 --- a/trixy/trixy-lang_parser/src/command_spec/checked.rs +++ b/trixy/trixy-lang_parser/src/command_spec/checked.rs @@ -1,29 +1,92 @@ //! This module contains the already type checked types. -//! -//! -use crate::lexing::{Keyword, TokenKind}; -pub enum PrimitiveTypes { - String, - /// Nothing - Void, +use std::fmt::Display; + +use crate::lexing::TokenKind; + +/// These are the "primitive" types used in trixy, you can use any of them to create new structures +pub const BASE_TYPES: [ConstIdentifier; 8] = [ + Identifier::from("Integer"), + Identifier::from("Float"), + Identifier::from("Decimal"), + Identifier::from("String"), + Identifier::from("Function"), + Identifier::from("Option"), + Identifier::from("Result"), + Identifier::from("Vec"), +]; + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Namespace { + pub name: Identifier, + + pub functions: Vec, + pub structures: Vec, + pub enumerations: Vec, + pub namespaces: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct CommandSpec { + pub structures: Vec, + pub enumerations: Vec, + pub functions: Vec, + pub namespaces: Vec, +} + +impl From for CommandSpec { + fn from(value: Namespace) -> Self { + Self { + structures: value.structures, + enumerations: value.enumerations, + functions: value.functions, + namespaces: value.namespaces, + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Structure { + pub identifier: Identifier, + pub contents: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Enumeration { + pub identifier: Identifier, + pub states: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Function { + pub identifier: Identifier, + pub inputs: Vec, + pub output: Option, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct Type { + pub identifier: Identifier, + pub generic_args: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct NamedType { + pub name: Identifier, + pub r#type: Type, } impl From for Identifier { fn from(value: TokenKind) -> Self { match value { - TokenKind::Identifier(ident) => Identifier(ident), - TokenKind::Keyword(_) - | TokenKind::Colon - | TokenKind::Semicolon - | TokenKind::Comma - | TokenKind::Arrow - | TokenKind::BraceOpen - | TokenKind::BraceClose - | TokenKind::ParenOpen - | TokenKind::Dummy - | TokenKind::ParenClose => { - panic!("Tried to convert a non Identifier TokenKind to a Identefier. This is a bug") + TokenKind::Identifier(ident) => Identifier { name: ident }, + _ => { + panic!( + "Tried to convert a non Identifier TokenKind to a Identefier. This is a bug + Token was: '{}' + ", + value + ) } } } @@ -34,25 +97,26 @@ impl From for Identifier { /// - Variable names /// - Function names /// - Namespace names -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct Identifier(String); +/// - Type names +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct Identifier { + pub name: String, +} -impl From for Keyword { - fn from(value: TokenKind) -> Self { - match value { - TokenKind::Keyword(keyword) => keyword, - TokenKind::Identifier(_) - | TokenKind::Colon - | TokenKind::Semicolon - | TokenKind::Comma - | TokenKind::Arrow - | TokenKind::BraceOpen - | TokenKind::BraceClose - | TokenKind::ParenOpen - | TokenKind::Dummy - | TokenKind::ParenClose => { - panic!("Tried to convert a non Keyword TokenKind to a Keyword. This is a bug") - } - } +/// A const version of [Identifier] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct ConstIdentifier { + pub name: &'static str, +} + +impl Display for Identifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.name) + } +} + +impl Identifier { + const fn from(value: &'static str) -> ConstIdentifier { + ConstIdentifier { name: value } } } diff --git a/trixy/trixy-lang_parser/src/command_spec/mod.rs b/trixy/trixy-lang_parser/src/command_spec/mod.rs index 4b35be6..1bf868c 100644 --- a/trixy/trixy-lang_parser/src/command_spec/mod.rs +++ b/trixy/trixy-lang_parser/src/command_spec/mod.rs @@ -1,2 +1,2 @@ -// pub mod checked; +pub mod checked; pub mod unchecked; diff --git a/trixy/trixy-lang_parser/src/command_spec/unchecked.rs b/trixy/trixy-lang_parser/src/command_spec/unchecked.rs index 8c73d50..7619f96 100644 --- a/trixy/trixy-lang_parser/src/command_spec/unchecked.rs +++ b/trixy/trixy-lang_parser/src/command_spec/unchecked.rs @@ -2,82 +2,101 @@ //! These are generated on the first pass of the parser, to be later converted into the checked //! ones. +use std::fmt::{Display, Write}; + use crate::lexing::Token; -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord)] pub struct CommandSpec { - pub declarations: Vec, + pub structures: Vec, + pub enumerations: Vec, + pub functions: Vec, + pub namespaces: Vec, } -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct Declaration { - pub namespace: Vec, // Will later be turned into Namespace - pub genus: Genus, -} - -impl Declaration { - pub fn new_function(function: Function, namespace: Vec) -> Self { - Declaration { - namespace, - genus: Genus::Function(function), - } - } - pub fn new_structure(structure: Structure, namespace: Vec) -> Self { - Declaration { - namespace, - genus: Genus::Structure(structure), - } - } - pub fn new_enumeration(r#enum: Enumeration, namespace: Vec) -> Self { - Declaration { - namespace, - genus: Genus::Enumeration(r#enum), +impl From for Namespace { + fn from(value: CommandSpec) -> Self { + Self { + name: Token::get_dummy(), + functions: value.functions, + structures: value.structures, + enumerations: value.enumerations, + namespaces: value.namespaces, } } } -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct Namespace { pub name: Token, // Will later become an Identifier + + pub functions: Vec, + pub structures: Vec, + pub enumerations: Vec, + pub namespaces: Vec, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum Genus { - /// Not actually a genus, but used in parsing to accommodate multiple errors - Dummy, - /// A function +pub enum Declaration { Function(Function), Structure(Structure), Enumeration(Enumeration), + Namespace(Namespace), } -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct Function { pub identifier: Token, // Will later become an Identifier pub inputs: Vec, pub output: Option, } -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct Structure { pub identifier: Token, // Will later become an Identifier pub contents: Vec, } -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct Enumeration { - pub identifier: Token, // Will later become an Identifier + pub identifier: Token, // Will later become an Identifier pub states: Vec, // Will later become an Identifier } -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct NamedType { - pub name: Token, // Will later become an Identifier + pub name: Token, // Will later become an Identifier pub r#type: Type, } -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct Type { pub identifier: Token, // Will later become an Identifier pub generic_args: Vec, } + +impl Display for Type { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let ident = match self.identifier.kind() { + crate::lexing::TokenKind::Identifier(ident) => ident, + _ => panic!("Tried to display a non identifier token in the Type display implementation. This is a bug"), + }; + + f.write_str(ident)?; + if !self.generic_args.is_empty() { + f.write_char('<')?; + let mut first_run = true; + for arg in &self.generic_args { + if !first_run { + f.write_str(", ")?; + } else { + first_run = false; + } + write!(f, "{}", arg)?; + } + f.write_char('>') + } else { + f.write_str("") + } + } +} diff --git a/trixy/trixy-lang_parser/src/error.rs b/trixy/trixy-lang_parser/src/error.rs index 3211a78..ccbc4fc 100644 --- a/trixy/trixy-lang_parser/src/error.rs +++ b/trixy/trixy-lang_parser/src/error.rs @@ -11,7 +11,7 @@ pub enum TrixyError { } /// The context of an Error. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct ErrorContext { /// The span of the error in the source file pub span: TokenSpan, diff --git a/trixy/trixy-lang_parser/src/lexing/mod.rs b/trixy/trixy-lang_parser/src/lexing/mod.rs index d77962a..58f77d6 100644 --- a/trixy/trixy-lang_parser/src/lexing/mod.rs +++ b/trixy/trixy-lang_parser/src/lexing/mod.rs @@ -72,7 +72,7 @@ impl TokenStream { /// } /// ``` /// signals, that the token starts at the 20th char in the source file and ends on the 23rd. -#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] +#[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] pub struct TokenSpan { /// The start of the token span pub start: usize, @@ -81,7 +81,7 @@ pub struct TokenSpan { } /// A Token -#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone)] +#[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone)] pub struct Token { /// The token's original location in the source file pub span: TokenSpan, @@ -109,7 +109,7 @@ impl Token { } /// Possibly kinds of tokens -#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone)] pub enum TokenKind { Keyword(Keyword), Identifier(String), @@ -124,6 +124,7 @@ pub enum TokenKind { SquareOpen, SquareClose, /// This is not a real TokenKind, but only used for error handling + #[default] Dummy, } diff --git a/trixy/trixy-lang_parser/src/main.rs b/trixy/trixy-lang_parser/src/main.rs index 277fb76..2d80a87 100644 --- a/trixy/trixy-lang_parser/src/main.rs +++ b/trixy/trixy-lang_parser/src/main.rs @@ -23,11 +23,18 @@ pub enum Command { /// The file containing the trixy code to tokenize file: PathBuf, }, + /// Check syntax, without type checking Parse { #[clap(value_parser)] /// The file containing the trixy code to parse file: PathBuf, }, + /// Type check + Process { + #[clap(value_parser)] + /// The file containing the trixy code to process + file: PathBuf, + }, } pub fn main() { @@ -68,5 +75,36 @@ pub fn main() { }; println!("{:#?}", parsed); } + Command::Process { file } => { + let input = fs::read_to_string(file).unwrap(); + + let input_tokens = match TokenStream::lex(&input) { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while tokenizing:"); + eprintln!("{}", err); + exit(1); + } + }; + + let parsed = match input_tokens.parse_unchecked() { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while doing the first (unchecked) parsing run:"); + eprintln!("{}", err); + exit(1) + } + }; + + let processed = match parsed.process(input) { + Ok(ok) => ok, + Err(err) => { + eprintln!("Error while doing the seconde (checked) parsing run:"); + eprintln!("{}", err); + exit(1) + } + }; + println!("{:#?}", processed); + }, } } diff --git a/trixy/trixy-lang_parser/src/parsing/checked/error.rs b/trixy/trixy-lang_parser/src/parsing/checked/error.rs new file mode 100644 index 0000000..51a5434 --- /dev/null +++ b/trixy/trixy-lang_parser/src/parsing/checked/error.rs @@ -0,0 +1,82 @@ +use thiserror::Error; + +use std::{error::Error, fmt::Display}; + +use crate::{ + command_spec::checked::Identifier, + error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}, + lexing::TokenSpan, + parsing::unchecked::error::SpannedParsingError as OldSpannedParsingError, +}; + +#[derive(Error, Debug)] +pub enum ParsingError { + #[error("The type ('{r#type}') was not declared before!")] + TypeNotDeclared { r#type: Identifier, span: TokenSpan }, + #[error(transparent)] + PreParseError(#[from] OldSpannedParsingError), +} + +impl ParsingError { + pub fn span(&self) -> &TokenSpan { + match self { + ParsingError::TypeNotDeclared { span, .. } => span, + ParsingError::PreParseError(err) => err.source.span(), + } + } +} + +impl AdditionalHelp for ParsingError { + fn additional_help(&self) -> String { + match self { + ParsingError::TypeNotDeclared { .. } => "This type should have been mentioned in the namespaces above, or in the namespace of this type usage".to_owned(), + ParsingError::PreParseError(err) => ErrorContextDisplay::source(err).additional_help(), + } + } +} + +#[derive(Debug)] +pub struct SpannedParsingError { + pub source: ParsingError, + pub context: ErrorContext, +} + +impl Error for SpannedParsingError { + fn source(&self) -> Option<&(dyn Error + 'static)> { + Some(&self.source) + } +} + +impl Display for SpannedParsingError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.error_fmt(f) + } +} + +impl ErrorContextDisplay for SpannedParsingError { + type Error = ParsingError; + + fn context(&self) -> &crate::error::ErrorContext { + &self.context + } + + fn line_number(&self) -> usize { + self.context.line_number + } + + fn line_above(&self) -> &str { + &self.context.line_above + } + + fn line_below(&self) -> &str { + &self.context.line_below + } + + fn line(&self) -> &str { + &self.context.line + } + + fn source(&self) -> &::Error { + &self.source + } +} diff --git a/trixy/trixy-lang_parser/src/parsing/checked/mod.rs b/trixy/trixy-lang_parser/src/parsing/checked/mod.rs new file mode 100644 index 0000000..65b1e91 --- /dev/null +++ b/trixy/trixy-lang_parser/src/parsing/checked/mod.rs @@ -0,0 +1,230 @@ +use std::mem; + +use crate::{ + command_spec::{ + checked::{ + CommandSpec, Enumeration, Function, Identifier, NamedType, Namespace, Structure, Type, + BASE_TYPES, + }, + unchecked::{ + CommandSpec as UncheckedCommandSpec, Enumeration as UncheckedEnumeration, + Function as UncheckedFunction, NamedType as UncheckedNamedType, + Namespace as UncheckedNamespace, Structure as UncheckedStructure, + Type as UncheckedType, + }, + }, + error::ErrorContext, + lexing::{TokenKind, TokenStream}, +}; + +use self::error::{ParsingError, SpannedParsingError}; + +mod error; +#[cfg(test)] +mod test; + +struct Parser { + command_spec: UncheckedCommandSpec, + structures: Vec, + enumerations: Vec, + original_file: String, +} + +impl TokenStream { + pub fn parse(mut self) -> Result { + let original_file = mem::take(&mut self.original_file); + + let unchecked = self.parse_unchecked().map_err(|err| { + let span = *err.source.span(); + SpannedParsingError { + source: ParsingError::from(err), + context: ErrorContext::from_span(span, &original_file), + } + })?; + + let checked = Parser { + command_spec: unchecked, + structures: vec![], + enumerations: vec![], + original_file, + } + .parse()?; + Ok(checked) + } +} + +impl UncheckedCommandSpec { + pub fn process(self, original_file: String) -> Result { + let checked = Parser { + command_spec: self, + structures: vec![], + enumerations: vec![], + original_file, + } + .parse()?; + Ok(checked) + } +} + +impl Parser { + fn parse(mut self) -> Result { + let namespace: UncheckedNamespace = + UncheckedNamespace::from(mem::take(&mut self.command_spec)); + let namespace = self.process_namespace(namespace).map_err(|err| { + let span = *err.span(); + SpannedParsingError { + source: err, + context: ErrorContext::from_span(span, &self.original_file), + } + })?; + Ok(namespace.into()) + } + + fn process_namespace( + &mut self, + namespace: UncheckedNamespace, + ) -> Result { + let name = match namespace.name.kind { + TokenKind::Identifier(ident) => Identifier { name: ident }, + // This is not really used, so the value put here does not matter + TokenKind::Dummy => Identifier { + name: "".to_owned(), + }, + _ => unreachable!("This should never be more than these two enum veriants"), + }; + + let mut enumerations = vec![]; + let mut enumerations_counter = 0; + for enumeration in namespace.enumerations { + enumerations.push(self.process_enumeration(enumeration)?); + enumerations_counter += 1; + } + let mut structures = vec![]; + let mut structures_counter = 0; + for structure in namespace.structures { + structures.push(self.process_structure(structure)?); + structures_counter += 1; + } + + let mut functions = vec![]; + for function in namespace.functions { + functions.push(self.process_function(function)?); + } + let mut namespaces = vec![]; + for namespace in namespace.namespaces { + namespaces.push(self.process_namespace(namespace)?); + } + + // Remove added enums and structs again + (0..structures_counter).for_each(|_| { + self.structures.pop(); + }); + (0..enumerations_counter).for_each(|_| { + self.enumerations.pop(); + }); + + Ok(Namespace { + name, + functions, + structures, + enumerations, + namespaces, + }) + } + + fn process_function( + &mut self, + mut function: UncheckedFunction, + ) -> Result { + let identifier = mem::take(&mut function.identifier.kind).into(); + let mut inputs = vec![]; + for input in function.inputs { + inputs.push(self.process_named_type(input)?); + } + let output = if let Some(r#type) = function.output { + Some(self.process_type(r#type)?) + } else { + None + }; + + Ok(Function { + identifier, + inputs, + output, + }) + } + + fn process_enumeration( + &mut self, + mut enumeration: UncheckedEnumeration, + ) -> Result { + self.enumerations.push(enumeration.clone()); + + let identifier = mem::take(&mut enumeration.identifier.kind).into(); + + let mut states = vec![]; + for mut state in enumeration.states { + states.push(mem::take(&mut state.kind).into()) + } + + Ok(Enumeration { identifier, states }) + } + + fn process_structure( + &mut self, + mut structure: UncheckedStructure, + ) -> Result { + self.structures.push(structure.clone()); + + let identifier: Identifier = mem::take(&mut structure.identifier.kind).into(); + let mut contents = vec![]; + for named_type in structure.contents { + contents.push(self.process_named_type(named_type)?); + } + + Ok(Structure { + identifier, + contents, + }) + } + + fn process_named_type( + &mut self, + mut named_type: UncheckedNamedType, + ) -> Result { + let name: Identifier = mem::take(&mut named_type.name.kind).into(); + let r#type: Type = self.process_type(named_type.r#type)?; + Ok(NamedType { name, r#type }) + } + + fn process_type(&mut self, mut r#type: UncheckedType) -> Result { + let identifier: Identifier = mem::take(&mut r#type.identifier.kind).into(); + + if !self + .structures + .iter() + .map(|r#struct| Into::::into(r#struct.identifier.kind.clone())) + .any(|ident| ident == identifier) + && !self + .enumerations + .iter() + .map(|r#enum| Into::::into(r#enum.identifier.kind.clone())) + .any(|ident| ident == identifier) + && !BASE_TYPES.iter().any(|ident| ident.name == identifier.name) + { + return Err(ParsingError::TypeNotDeclared { + r#type: identifier, + span: r#type.identifier.span, + }); + } + + let mut generic_args = vec![]; + for generic_arg in r#type.generic_args { + generic_args.push(self.process_type(generic_arg)?); + } + Ok(Type { + identifier, + generic_args, + }) + } +} diff --git a/trixy/trixy-lang_parser/src/parsing/checked/test.rs b/trixy/trixy-lang_parser/src/parsing/checked/test.rs new file mode 100644 index 0000000..2326b11 --- /dev/null +++ b/trixy/trixy-lang_parser/src/parsing/checked/test.rs @@ -0,0 +1,134 @@ +use crate::command_spec::checked::{ + CommandSpec, Enumeration, Function, Identifier, NamedType, Namespace, Structure, Type, +}; +use crate::lexing::TokenStream; + +#[test] +fn test_full() { + let input = "nasp trinitrix { + struct Callback { + func: Function, + timeout: Integer, + }; + + enum CallbackPriority { + High, + Medium, + Low, + }; + + fn execute_callback(callback: Callback, priority: CallbackPriority); +}"; + let output = TokenStream::lex(&input).unwrap().parse().unwrap(); + let expected = CommandSpec { + structures: vec![], + enumerations: vec![], + functions: vec![], + namespaces: vec![Namespace { + name: Identifier { + name: "trinitrix".to_owned(), + }, + functions: vec![Function { + identifier: Identifier { + name: "execute_callback".to_owned(), + }, + inputs: vec![ + NamedType { + name: Identifier { + name: "callback".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "Callback".to_owned(), + }, + generic_args: vec![], + }, + }, + NamedType { + name: Identifier { + name: "priority".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "CallbackPriority".to_owned(), + }, + generic_args: vec![], + }, + }, + ], + output: None, + }], + structures: vec![Structure { + identifier: Identifier { + name: "Callback".to_owned(), + }, + contents: vec![ + NamedType { + name: Identifier { + name: "func".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "Function".to_owned(), + }, + generic_args: vec![], + }, + }, + NamedType { + name: Identifier { + name: "timeout".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "Integer".to_owned(), + }, + generic_args: vec![], + }, + }, + ], + }], + enumerations: vec![Enumeration { + identifier: Identifier { + name: "CallbackPriority".to_owned(), + }, + states: vec![ + Identifier { + name: "High".to_owned(), + }, + Identifier { + name: "Medium".to_owned(), + }, + Identifier { + name: "Low".to_owned(), + }, + ], + }], + namespaces: vec![], + }], + }; + assert_eq!(output, expected); +} + +#[test] +fn test_failing() { + let input = "struct Callback { + func: Function, + timeout: Integer, +}; + +// The type \"Name\" should not be defined +fn execute_callback(callback: Name); +"; + let output = TokenStream::lex(&input).unwrap().parse(); + match output.unwrap_err().source { + super::error::ParsingError::TypeNotDeclared { r#type, .. } => { + assert_eq!( + r#type, + Identifier { + name: "Name".to_owned() + } + ) + } + _ => panic!("Wrong error in test!"), + }; +} diff --git a/trixy/trixy-lang_parser/src/parsing/mod.rs b/trixy/trixy-lang_parser/src/parsing/mod.rs index 435b2bc..d6a8fef 100644 --- a/trixy/trixy-lang_parser/src/parsing/mod.rs +++ b/trixy/trixy-lang_parser/src/parsing/mod.rs @@ -1,4 +1,2 @@ -mod error; mod unchecked; -#[cfg(test)] -mod test; +mod checked; diff --git a/trixy/trixy-lang_parser/src/parsing/test.rs b/trixy/trixy-lang_parser/src/parsing/test.rs deleted file mode 100644 index 2f73978..0000000 --- a/trixy/trixy-lang_parser/src/parsing/test.rs +++ /dev/null @@ -1,88 +0,0 @@ -use crate::{ - command_spec::unchecked::{CommandSpec, Declaration, Function, FunctionInput, Genus}, - lexing::{Token, TokenKind, TokenSpan, TokenStream}, -}; - -use super::error::ParsingError; - -use pretty_assertions::assert_eq; - -#[test] -fn test_failing() { - let input = " -fn print(message: CommandTransferValue); - -nasp trinitrix { {} - fn hi honner(name: String) -> String; ; -} - -"; - let parsed = TokenStream::lex(input).unwrap().parse_unchecked(); - let err = parsed.unwrap_err().source; - match err { - ParsingError::ExpectedDifferentToken { .. } => panic!("Wrong error"), - ParsingError::ExpectedKeyword { .. } => {} - } -} - -#[test] -fn test_full() { - let input = "fn print(message: CommandTransferValue); - -nasp trinitrix { - fn hi(name: String) -> String; -} -"; - let parsed = TokenStream::lex(input).unwrap().parse_unchecked().unwrap(); - let expected = CommandSpec { - declarations: vec![ - Declaration { - namespace: vec![], - genus: Genus::Function(Function { - identifier: Token { - span: TokenSpan { start: 3, end: 8 }, - kind: TokenKind::Identifier("print".to_owned()), - }, - inputs: vec![FunctionInput { - name: Token { - span: TokenSpan { start: 9, end: 16 }, - kind: TokenKind::Identifier("message".to_owned()), - }, - r#type: Token { - span: TokenSpan { start: 18, end: 38 }, - kind: TokenKind::Identifier("CommandTransferValue".to_owned()), - }, - }], - output: None, - }), - }, - Declaration { - namespace: vec![Token { - span: TokenSpan { start: 47, end: 56 }, - kind: TokenKind::Identifier("trinitrix".to_owned()), - }], - genus: Genus::Function(Function { - identifier: Token { - span: TokenSpan { start: 66, end: 68 }, - kind: TokenKind::Identifier("hi".to_owned()), - }, - inputs: vec![FunctionInput { - name: Token { - span: TokenSpan { start: 69, end: 73 }, - kind: TokenKind::Identifier("name".to_owned()), - }, - r#type: Token { - span: TokenSpan { start: 75, end: 81 }, - kind: TokenKind::Identifier("String".to_owned()), - }, - }], - output: Some(Token { - span: TokenSpan { start: 86, end: 92 }, - kind: TokenKind::Identifier("String".to_owned()), - }), - }), - }, - ], - }; - assert_eq!(parsed, expected); -} diff --git a/trixy/trixy-lang_parser/src/parsing/error.rs b/trixy/trixy-lang_parser/src/parsing/unchecked/error.rs similarity index 88% rename from trixy/trixy-lang_parser/src/parsing/error.rs rename to trixy/trixy-lang_parser/src/parsing/unchecked/error.rs index a6036e3..d697087 100644 --- a/trixy/trixy-lang_parser/src/parsing/error.rs +++ b/trixy/trixy-lang_parser/src/parsing/unchecked/error.rs @@ -6,7 +6,7 @@ use crate::{ lexing::{TokenKind, TokenSpan}, }; -#[derive(Error, Debug)] +#[derive(Error, Debug, Clone)] pub enum ParsingError { #[error("Expected '{expected}' but received '{actual}'")] ExpectedDifferentToken { @@ -18,6 +18,14 @@ pub enum ParsingError { #[error("Expected a Keyword to start a new declaration, but found: '{actual}'")] ExpectedKeyword { actual: TokenKind, span: TokenSpan }, } +impl ParsingError { + pub fn span(&self) -> &TokenSpan { + match self { + ParsingError::ExpectedDifferentToken { span, .. } => span, + ParsingError::ExpectedKeyword { span, .. } => span, + } + } +} impl ParsingError { pub fn get_span(&self) -> TokenSpan { @@ -46,7 +54,7 @@ impl AdditionalHelp for ParsingError { } } -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct SpannedParsingError { pub source: ParsingError, pub context: ErrorContext, diff --git a/trixy/trixy-lang_parser/src/parsing/unchecked.rs b/trixy/trixy-lang_parser/src/parsing/unchecked/mod.rs similarity index 78% rename from trixy/trixy-lang_parser/src/parsing/unchecked.rs rename to trixy/trixy-lang_parser/src/parsing/unchecked/mod.rs index a477696..622f870 100644 --- a/trixy/trixy-lang_parser/src/parsing/unchecked.rs +++ b/trixy/trixy-lang_parser/src/parsing/unchecked/mod.rs @@ -1,13 +1,17 @@ use crate::{ command_spec::unchecked::{ - CommandSpec, Declaration, Enumeration, Function, NamedType, Structure, Type, + CommandSpec, Declaration, Enumeration, Function, NamedType, Namespace, Structure, Type, }, error::ErrorContext, lexing::{Token, TokenKind, TokenStream}, token, }; -use super::error::{ParsingError, SpannedParsingError}; +use self::error::{ParsingError, SpannedParsingError}; + +pub mod error; +#[cfg(test)] +mod test; impl TokenStream { pub fn parse_unchecked(self) -> Result { @@ -18,50 +22,41 @@ impl TokenStream { pub(super) struct Parser { token_stream: TokenStream, - current_namespaces: Vec, // This should in the second pass turn into Identifiers } impl Parser { fn new(mut token_stream: TokenStream) -> Self { token_stream.reverse(); - Self { - token_stream, - current_namespaces: vec![], - } + Self { token_stream } } fn parse(&mut self) -> Result { - let mut declarations = vec![]; + let mut output = CommandSpec::default(); while !self.token_stream.is_empty() { - let mut next = self.parse_next().map_err(|err| { + let next = self.parse_next().map_err(|err| { let span = err.get_span(); SpannedParsingError { source: err, context: ErrorContext::from_span(span, &self.token_stream.original_file), } })?; - - declarations.append(&mut next); + match next { + Declaration::Function(function) => output.functions.push(function), + Declaration::Structure(structure) => output.structures.push(structure), + Declaration::Enumeration(enumeration) => output.enumerations.push(enumeration), + Declaration::Namespace(namespace) => output.namespaces.push(namespace), + } } - Ok(CommandSpec { declarations }) + Ok(output) } - fn parse_next(&mut self) -> Result, ParsingError> { + fn parse_next(&mut self) -> Result { match self.peek().kind() { - token![nasp] => Ok(self.parse_namespace()?), - token![fn] => Ok(vec![Declaration::new_function( - self.parse_function()?, - self.current_namespaces.clone(), - )]), - token![struct] => Ok(vec![Declaration::new_structure( - self.parse_structure()?, - self.current_namespaces.clone(), - )]), - token![enum] => Ok(vec![Declaration::new_enumeration( - self.parse_enumeration()?, - self.current_namespaces.clone(), - )]), + token![nasp] => Ok(Declaration::Namespace(self.parse_namespace()?)), + token![fn] => Ok(Declaration::Function(self.parse_function()?)), + token![struct] => Ok(Declaration::Structure(self.parse_structure()?)), + token![enum] => Ok(Declaration::Enumeration(self.parse_enumeration()?)), _ => { let err = ParsingError::ExpectedKeyword { span: *self.peek().span(), @@ -93,20 +88,28 @@ impl Parser { }) } - fn parse_namespace(&mut self) -> Result, ParsingError> { + fn parse_namespace(&mut self) -> Result { self.expect(token![nasp])?; - let namespace_name = self.expect(token![Ident])?; - self.current_namespaces.push(namespace_name); + + let mut namespace = Namespace::default(); + namespace.name = self.expect(token![Ident])?; + self.expect(token![BraceOpen])?; - let mut declarations = vec![]; while !self.expect_peek(token![BraceClose]) { - declarations.append(&mut self.parse_next()?); + let next = self.parse_next()?; + match next { + Declaration::Function(function) => namespace.functions.push(function), + Declaration::Structure(structure) => namespace.structures.push(structure), + Declaration::Enumeration(enumeration) => namespace.enumerations.push(enumeration), + Declaration::Namespace(input_namespace) => { + namespace.namespaces.push(input_namespace) + } + } } self.expect(token![BraceClose])?; - self.current_namespaces.pop(); - Ok(declarations) + Ok(namespace) } fn parse_enumeration(&mut self) -> Result { diff --git a/trixy/trixy-lang_parser/src/parsing/unchecked/test.rs b/trixy/trixy-lang_parser/src/parsing/unchecked/test.rs new file mode 100644 index 0000000..a6627bf --- /dev/null +++ b/trixy/trixy-lang_parser/src/parsing/unchecked/test.rs @@ -0,0 +1,98 @@ +use pretty_assertions::assert_eq; + +use crate::{ + command_spec::unchecked::{CommandSpec, Function, NamedType, Namespace, Type}, + lexing::{Token, TokenKind, TokenSpan, TokenStream}, +}; + +use super::error::ParsingError; + +#[test] +fn test_failing() { + let input = " +fn print(message: CommandTransferValue); + +nasp trinitrix { {} + fn hi honner(name: String) -> String; ; +} + +"; + let parsed = TokenStream::lex(input).unwrap().parse_unchecked(); + let err = parsed.unwrap_err().source; + match err { + ParsingError::ExpectedDifferentToken { .. } => panic!("Wrong error"), + ParsingError::ExpectedKeyword { .. } => {} + } +} + +#[test] +fn test_full() { + let input = "fn print(message: CommandTransferValue); + +nasp trinitrix { + fn hi(name: String) -> String; +} +"; + let parsed = TokenStream::lex(input).unwrap().parse_unchecked().unwrap(); + let expected = CommandSpec { + structures: vec![], + enumerations: vec![], + functions: vec![Function { + identifier: Token { + span: TokenSpan { start: 3, end: 8 }, + kind: TokenKind::Identifier("print".to_owned()), + }, + inputs: vec![NamedType { + name: Token { + span: TokenSpan { start: 9, end: 16 }, + kind: TokenKind::Identifier("message".to_owned()), + }, + r#type: Type { + identifier: Token { + span: TokenSpan { start: 18, end: 38 }, + kind: TokenKind::Identifier("CommandTransferValue".to_owned()), + }, + generic_args: vec![], + }, + }], + output: None, + }], + namespaces: vec![Namespace { + name: Token { + span: TokenSpan { start: 47, end: 56 }, + kind: TokenKind::Identifier("trinitrix".to_owned()), + }, + functions: vec![Function { + identifier: Token { + span: TokenSpan { start: 66, end: 68 }, + kind: TokenKind::Identifier("hi".to_owned()), + }, + inputs: vec![NamedType { + name: Token { + span: TokenSpan { start: 69, end: 73 }, + kind: TokenKind::Identifier("name".to_owned()), + }, + r#type: Type { + identifier: Token { + span: TokenSpan { start: 75, end: 81 }, + kind: TokenKind::Identifier("String".to_owned()), + }, + generic_args: vec![], + }, + }], + output: Some(Type { + identifier: Token { + span: TokenSpan { start: 86, end: 92 }, + kind: TokenKind::Identifier("String".to_owned()), + }, + generic_args: vec![], + }), + }], + structures: vec![], + enumerations: vec![], + namespaces: vec![], + }], + }; + + assert_eq!(parsed, expected); +}