From 98c3b009dfdce4c15aef909779d61b36ee75b9bd Mon Sep 17 00:00:00 2001 From: Soispha Date: Sun, 24 Mar 2024 19:16:52 +0100 Subject: [PATCH] feat(parser): Add support for parsing attributes --- trixy-parser/docs/grammar.ebnf | 22 ++- trixy-parser/src/command_spec/checked.rs | 11 +- trixy-parser/src/command_spec/unchecked.rs | 59 +++++- trixy-parser/src/lexing/mod.rs | 69 ++++++-- trixy-parser/src/lexing/tokenizer.rs | 156 +++++++++++++--- trixy-parser/src/parsing/checked/error.rs | 9 +- trixy-parser/src/parsing/checked/mod.rs | 60 +++++-- trixy-parser/src/parsing/unchecked/error.rs | 15 +- trixy-parser/src/parsing/unchecked/mod.rs | 187 ++++++++++++-------- 9 files changed, 444 insertions(+), 144 deletions(-) diff --git a/trixy-parser/docs/grammar.ebnf b/trixy-parser/docs/grammar.ebnf index 9d7ea7b..2e6c098 100644 --- a/trixy-parser/docs/grammar.ebnf +++ b/trixy-parser/docs/grammar.ebnf @@ -20,7 +20,6 @@ #*) - # (* # Trixy is fully whitespace independent, this means that you can # interleave whitespace in the definitions. @@ -31,22 +30,31 @@ CommandSpec = {Function | Namespace | Enumeration | Structure } ; -Function = {DocComment} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ; -Namespace = {DocComment} "mod" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ; -Structure = {DocComment} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}"; -Enumeration = {DocComment} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}"; +Function = {DocComment} {Attribute} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ; +Namespace = {DocComment} {Attribute} "mod" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ; +Structure = {DocComment} {Attribute} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}"; +Enumeration = {DocComment} {Attribute} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}"; Type = Identifier ["<" Type {"," Type} ">"]; +StringLiteral = ["r"] "\"" {ANYTHING} "\"" | "r" "#" {"#"} "\"" {ANYTHING} "#" {"#"} "\""; Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ; -DocIdentifier = {DocComment} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ; +DocIdentifier = {DocComment} {Attribute} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ; NamedType = Identifier ":" Type; -DocNamedType = {DocComment} Identifier ":" Type; +DocNamedType = {DocComment} {Attribute} Identifier ":" Type; +# (* This is syntax sugar for a `DocAttribute` *) DocComment = "///" {ANYTHING} LineEnding; +Attribute = "#" "[" AttributeValue "]" LineEnding; +AttributeValue = DeriveAttribute | DocAttribute | ErrorAttribute | MsgAttribute; +ErrorAttribute = "error"; +MsgAttribute = "msg" "(" StringLiteral ")"; +DeriveAttribute = "derive" "(" "Error" ")"; +DocAttribute = "doc" "=" StringLiteral; + Comment = "//" [ NOT ("/" {ANYTHING} LineEnding) | "//"] {ANYTHING} LineEnding; LineEnding = "\\n" | "\\r" | "\\r\\n"; diff --git a/trixy-parser/src/command_spec/checked.rs b/trixy-parser/src/command_spec/checked.rs index 262b59d..23625ce 100644 --- a/trixy-parser/src/command_spec/checked.rs +++ b/trixy-parser/src/command_spec/checked.rs @@ -176,11 +176,20 @@ impl TokenKind { pub enum Attribute { #[allow(non_camel_case_types)] doc(String), + #[allow(non_camel_case_types)] + derive(DeriveValue), + #[allow(non_camel_case_types)] + error, + #[allow(non_camel_case_types)] + msg(String), } impl From for Attribute { fn from(value: unchecked::Attribute) -> Self { match value { - unchecked::Attribute::doc { content: name, .. } => Self::doc(name), + unchecked::Attribute::doc { content: name, .. } => Self::doc(name.content), + unchecked::Attribute::derive { value, .. } => Self::derive(value), + unchecked::Attribute::error { .. } => Self::error, + unchecked::Attribute::msg { content, .. } => Self::msg(content.content), } } } diff --git a/trixy-parser/src/command_spec/unchecked.rs b/trixy-parser/src/command_spec/unchecked.rs index 8c0c67e..7712b87 100644 --- a/trixy-parser/src/command_spec/unchecked.rs +++ b/trixy-parser/src/command_spec/unchecked.rs @@ -71,7 +71,64 @@ pub enum Declaration { #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub enum Attribute { #[allow(non_camel_case_types)] - doc { content: String, span: TokenSpan }, + doc { + content: StringLiteral, + span: TokenSpan, + }, + #[allow(non_camel_case_types)] + derive { value: DeriveValue, span: TokenSpan }, + #[allow(non_camel_case_types)] + error { span: TokenSpan }, + #[allow(non_camel_case_types)] + msg { + content: StringLiteral, + span: TokenSpan, + }, +} + +impl Display for Attribute { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Attribute::doc { .. } => f.write_str("doc"), + Attribute::derive { .. } => f.write_str("derive"), + Attribute::error { .. } => f.write_str("error"), + Attribute::msg { .. } => f.write_str("msg"), + } + } +} + +impl Attribute { + pub fn span(&self) -> TokenSpan { + match self { + Attribute::doc { span, .. } => *span, + Attribute::derive { span, .. } => *span, + Attribute::error { span, .. } => *span, + Attribute::msg { span, .. } => *span, + } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct StringLiteral { + pub(crate) content: String, + pub(crate) span: TokenSpan, +} + +impl From for StringLiteral { + fn from(value: Token) -> Self { + let span = *value.span(); + let content = match value.kind { + TokenKind::StringLiteral(content) => content, + _ => unreachable!("A string literal was expected"), + }; + + Self { content, span } + } +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub enum DeriveValue { + Error, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] diff --git a/trixy-parser/src/lexing/mod.rs b/trixy-parser/src/lexing/mod.rs index 381e070..152e760 100644 --- a/trixy-parser/src/lexing/mod.rs +++ b/trixy-parser/src/lexing/mod.rs @@ -163,6 +163,7 @@ impl Token { #[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone)] pub enum TokenKind { Keyword(Keyword), + AttributeKeyword(AttributeKeyword), Identifier(String), Colon, Semicolon, @@ -174,8 +175,10 @@ pub enum TokenKind { ParenClose, SquareOpen, SquareClose, + PoundSign, + EqualsSign, + StringLiteral(String), - DocComment(String), Comment(String), /// This is not a real TokenKind, but only used for error handling @@ -190,13 +193,18 @@ impl TokenKind { return true; } } - if let TokenKind::Comment(_) = self { - if let TokenKind::Comment(_) = other { + if let TokenKind::AttributeKeyword(_) = self { + if let TokenKind::AttributeKeyword(_) = other { return true; } } - if let TokenKind::DocComment(_) = self { - if let TokenKind::DocComment(_) = other { + if let TokenKind::StringLiteral(_) = self { + if let TokenKind::StringLiteral(_) = other { + return true; + } + } + if let TokenKind::Comment(_) = self { + if let TokenKind::Comment(_) = other { return true; } } @@ -208,6 +216,7 @@ impl Display for TokenKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { TokenKind::Keyword(word) => write!(f, "KEYWORD({})", word), + TokenKind::AttributeKeyword(word) => write!(f, "ATTRIBUTE_KEYWORD({})", word), TokenKind::Identifier(ident) => { if ident.is_empty() { write!(f, "IDENTIFIER") @@ -215,6 +224,8 @@ impl Display for TokenKind { write!(f, "IDENTIFIER({})", ident) } } + TokenKind::EqualsSign => f.write_str("EQUALS_SIGN"), + TokenKind::PoundSign => f.write_str("POUND_SIGN"), TokenKind::Colon => f.write_str("COLON"), TokenKind::Semicolon => f.write_str("SEMICOLON"), TokenKind::Comma => f.write_str("COMMA"), @@ -226,7 +237,7 @@ impl Display for TokenKind { TokenKind::Dummy => f.write_str("DUMMY"), TokenKind::SquareOpen => f.write_str("SQUAREOPEN"), TokenKind::SquareClose => f.write_str("SQUARECLOSE"), - TokenKind::DocComment(text) => write!(f, "DOCCOMMENT({})", text), + TokenKind::StringLiteral(text) => write!(f, r#"STRING_LITERAL("{}")"#, text), TokenKind::Comment(text) => write!(f, "COMMENT({})", text), } } @@ -249,6 +260,26 @@ pub enum Keyword { r#enum, } +/// Keywords used in attributes: (#[()]) +#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)] +pub enum AttributeKeyword { + /// Derive a trait + #[allow(non_camel_case_types)] + derive, + + /// Document the attached item + #[allow(non_camel_case_types)] + doc, + + /// Mark the beginning of an error + #[allow(non_camel_case_types)] + error, + + /// Encompass an error message + #[allow(non_camel_case_types)] + msg, +} + impl Display for Keyword { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -260,6 +291,17 @@ impl Display for Keyword { } } +impl Display for AttributeKeyword { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + AttributeKeyword::derive => f.write_str("derive"), + AttributeKeyword::doc => f.write_str("doc"), + AttributeKeyword::error => f.write_str("error"), + AttributeKeyword::msg => f.write_str("msg"), + } + } +} + /// Shorthand macro for generating a token from *anything* which can be /// converted into a `TokenKind`, or any of the `TokenKind` variants. /// @@ -267,11 +309,11 @@ impl Display for Keyword { /// /// ``` /// use trixy_parser::token; -/// # fn main() { +///# fn main() { /// token![mod]; /// token![;]; /// token![Arrow]; -/// # } +///# } /// ``` #[macro_export] macro_rules! token { @@ -292,6 +334,10 @@ macro_rules! token { [BraceClose] => { $crate::lexing::TokenKind::BraceClose }; // [}] => { $crate::lexing::TokenKind::BraceClose }; [ParenOpen] => { $crate::lexing::TokenKind::ParenOpen }; + [PoundSign] => { $crate::lexing::TokenKind::PoundSign }; + [#] => { $crate::lexing::TokenKind::PoundSign }; + [EqualsSign] => { $crate::lexing::TokenKind::EqualsSign }; + [=] => { $crate::lexing::TokenKind::EqualsSign }; // [(] => { $crate::lexing::TokenKind::ParenthesisOpen }; [ParenClose] => { $crate::lexing::TokenKind::ParenClose }; // [)] => { $crate::lexing::TokenKind::ParenthesisClose }; @@ -301,13 +347,16 @@ macro_rules! token { [struct] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#struct) }; [enum] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#enum) }; + // The `derive` here is completely arbitrary. It is only for comparisons (see `same_kind`) + [AttributeKeyword] => { $crate::lexing::TokenKind::AttributeKeyword($crate::lexing::AttributeKeyword::derive) }; + // This is only works for checking for a identifier or comment // see the `same_kind` method on TokenKind [Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; [Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; - [DocComment] => { $crate::lexing::TokenKind::DocComment("".to_owned()) }; - [DocCommentMatch] => { $crate::lexing::TokenKind::DocComment(_doc_comment) }; + [StringLiteral] => { $crate::lexing::TokenKind::StringLiteral("".to_owned()) }; + [Comment] => { $crate::lexing::TokenKind::Comment("".to_owned()) }; } diff --git a/trixy-parser/src/lexing/tokenizer.rs b/trixy-parser/src/lexing/tokenizer.rs index cb8a6d6..9ef8c47 100644 --- a/trixy-parser/src/lexing/tokenizer.rs +++ b/trixy-parser/src/lexing/tokenizer.rs @@ -28,7 +28,7 @@ use crate::{ use super::{ error::{LexingError, SpannedLexingError}, - Token, TokenKind, + AttributeKeyword, Token, TokenKind, }; pub(super) struct Tokenizer<'a> { @@ -57,6 +57,20 @@ impl<'a> Tokenizer<'a> { SpannedLexingError { source: e, context } })?; + // if let TokenKind::StringLiteral(string) = &token_kind { + // if string == "" { + // eprintln!( + // "Got an empty StringLiteral '{}', with span: {}..{}", + // string, + // start, + // start + index + // ); + // eprintln!( + // "Removing following text: '{}'\n", + // &self.remaining_text[..index], + // ); + // } + // } self.chomp(index); // end - start let end = self.current_index; @@ -83,6 +97,10 @@ impl<'a> Tokenizer<'a> { ',' => (TokenKind::Comma, 1), '<' => (TokenKind::SquareOpen, 1), '>' => (TokenKind::SquareClose, 1), + '#' => (TokenKind::PoundSign, 1), + '=' => (TokenKind::EqualsSign, 1), + '"' => tokenize_literal_string(self.remaining_text, "\"")?, + 'r' => try_to_tokenize_raw_literal_string(self.remaining_text)?, '-' => tokenize_arrow(self.remaining_text)?, '/' => tokenize_comment(self.remaining_text)?, @@ -167,38 +185,76 @@ fn tokenize_comment(text: &str) -> Result<(TokenKind, usize), LexingError> { Err(LexingError::ExpectedComment) } else { let text: &str = &text[2..]; - if let Some('/') = text.chars().next() { - let text = &text[1..]; - if end_of_line(&text) { - Ok((TokenKind::DocComment("".to_owned()), 1 + 3)) - } else { - let (doc_comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?; - - // trim whitespace - let doc_comment = doc_comment.trim_start(); - let doc_comment = doc_comment.trim_end(); - - Ok(( - TokenKind::DocComment(doc_comment.to_owned()), - chars_read + 3, - )) - } + if end_of_line(&text) { + Ok((TokenKind::Comment("".to_owned()), 1 + 2)) } else { - if end_of_line(&text) { - Ok((TokenKind::Comment("".to_owned()), 1 + 2)) - } else { - let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?; + let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?; - // trim whitespace - let comment = comment.trim_start(); - let comment = comment.trim_end(); + // trim trailing whitespace (only at the end to avoid removing wanted whitespace) + let comment = comment.trim_end(); - Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2)) - } + Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2)) } } } +/// We check if the r is the beginning of a literal string, otherwise, we tokenize a identifier +fn try_to_tokenize_raw_literal_string(text: &str) -> Result<(TokenKind, usize), LexingError> { + // remove the 'r' at the begining + let text_without_r = &text[1..]; + + let next_char = &text_without_r[0..1]; + if next_char == "#" { + // The string is also escaped, count the hashtags + let (delimeter, chars_read) = take_while(text_without_r, |ch| ch == '#')?; + let delimeter = format!("\"{}", delimeter); + + let (token, length) = tokenize_literal_string(&text_without_r[chars_read..], &delimeter)?; + // The 1 is the size of the extra 'r' + Ok((token, length + 1)) + } else if next_char == "\"" { + // regular raw string literal + let (token, length) = tokenize_literal_string(text_without_r, "\"")?; + // The 1 is the size of the extra 'r' + Ok((token, length + 1)) + } else { + // if the 'r' is not followed by either an '#' or a '"', it must be part of an identifier + tokenize_ident(text) + } +} + +fn tokenize_literal_string(text: &str, delimeter: &str) -> Result<(TokenKind, usize), LexingError> { + // The first char is always a quote (") + assert_eq!(&text[..1], "\""); + let text_without_quote = &text[1..]; + + if &text_without_quote[0..delimeter.len()] == delimeter { + // eprintln!( + // "Got a direct delimeter, removing: '{}'", + // &text[..1 + delimeter.len()] + // ); + // eprintln!("Next up to parse: '{}'\n", &text[1 + delimeter.len()..20]); + // The literal string does not contain anything + Ok((TokenKind::StringLiteral("".to_owned()), 1 + delimeter.len())) + } else { + let mut predicates: Vec<_> = delimeter + .chars() + .map(|ch| { + // eprintln!("Condition, which needs to match: |ch| ch == '{}'", ch); + move |ch2| ch2 == ch + }) + .collect(); + let (literal, chars_read) = + take_until_succesive_match(text_without_quote, &mut predicates)?; + + // The second number read here is the last quote + Ok(( + TokenKind::StringLiteral(literal.to_owned()), + chars_read + 1 + delimeter.len(), + )) + } +} + fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> { let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?; @@ -208,6 +264,12 @@ fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> { "fn" => TokenKind::Keyword(Keyword::r#fn), "struct" => TokenKind::Keyword(Keyword::r#struct), "enum" => TokenKind::Keyword(Keyword::r#enum), + + "derive" => TokenKind::AttributeKeyword(AttributeKeyword::derive), + "doc" => TokenKind::AttributeKeyword(AttributeKeyword::doc), + "error" => TokenKind::AttributeKeyword(AttributeKeyword::error), + "msg" => TokenKind::AttributeKeyword(AttributeKeyword::msg), + other => TokenKind::Identifier(other.to_string()), }; @@ -252,6 +314,48 @@ where Ok((&data[..current_index], current_index)) } } +/// Consume bytes until all the predicates match in successive ways +fn take_until_succesive_match<'a, F>( + data: &'a str, + preds: &mut [F], +) -> Result<(&'a str, usize), LexingError> +where + F: FnMut(char) -> bool, +{ + assert!(!preds.is_empty(), "Predicates need to be provided"); + + let mut current_index = 0; + + 'outer: for ch in data.chars() { + let should_stop = preds[0](ch); + current_index += ch.len_utf8(); + + if should_stop { + // eprintln!("First predicate did match char: {:#?}", ch); + if preds.len() == 1 { + // eprintln!("Only one predicate provided, which matched: {:#?}\n", ch); + break 'outer; + } + 'inner: for pred in &mut preds[1..] { + let ch = &data.chars().nth(current_index).expect("This should exists"); + // eprintln!("Checking pred with char: {:#?}", ch); + if pred(*ch) { + // eprintln!("Predicate did match char: {:#?}\n", ch); + break 'outer; + } + // eprintln!("Predicate did not match char: {:#?}\n", ch); + current_index += ch.len_utf8(); + break 'inner; + } + } + } + + if current_index == 0 { + Err(LexingError::NoMatchesTaken) + } else { + Ok((&data[..current_index], current_index)) + } +} /// Skips input until the remaining string pattern starts with the pattern fn skip_until<'a>(mut src: &'a str, pattern: &str) -> &'a str { diff --git a/trixy-parser/src/parsing/checked/error.rs b/trixy-parser/src/parsing/checked/error.rs index 38498ea..13a1e49 100644 --- a/trixy-parser/src/parsing/checked/error.rs +++ b/trixy-parser/src/parsing/checked/error.rs @@ -24,7 +24,7 @@ use thiserror::Error; use std::{error::Error, fmt::Display}; use crate::{ - command_spec::checked::Identifier, + command_spec::{checked::Identifier, unchecked::Attribute}, error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}, lexing::TokenSpan, parsing::unchecked::error::SpannedParsingError as OldSpannedParsingError, @@ -63,6 +63,11 @@ pub enum ParsingError { r#type: Identifier, span: TokenSpan, }, + #[error("The {specified} attribute can't be used here!")] + WrongAttributeInPosition { + specified: Attribute, + span: TokenSpan, + }, } impl ParsingError { @@ -74,6 +79,7 @@ impl ParsingError { ParsingError::EnumWithNamespaceNamePascal { enum_span, .. } => enum_span, ParsingError::NotEnoughGenericArgs { span, .. } => span, ParsingError::TooManyGenericArgs { span, .. } => span, + ParsingError::WrongAttributeInPosition { span, .. } => span, } } } @@ -87,6 +93,7 @@ impl AdditionalHelp for ParsingError { | ParsingError::EnumWithNamespaceName {..} => "Change the name of this Enumeration as the generation process in trixy-macros needs to use this name".to_owned(), ParsingError::NotEnoughGenericArgs { got, expected_min, .. } => format!("Add generic args until you have gone from {} to {}", got, expected_min), ParsingError::TooManyGenericArgs { got, expected_max, .. } => format!("Remove generic args until you have gone from {} to {}", got, expected_max), + ParsingError::WrongAttributeInPosition { .. } => format!("Remove this attribute"), } } } diff --git a/trixy-parser/src/parsing/checked/mod.rs b/trixy-parser/src/parsing/checked/mod.rs index d95e361..9a3163c 100644 --- a/trixy-parser/src/parsing/checked/mod.rs +++ b/trixy-parser/src/parsing/checked/mod.rs @@ -19,7 +19,7 @@ * If not, see . */ -use std::mem; +use std::{iter, mem}; use convert_case::{Case, Casing}; use trixy_types::BASE_TYPES; @@ -27,8 +27,8 @@ use trixy_types::BASE_TYPES; use crate::{ command_spec::{ checked::{ - CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, NamedType, - Namespace, Structure, Type, + self, CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, + NamedType, Namespace, Structure, Type, }, unchecked::{ CommandSpec as UncheckedCommandSpec, DocNamedType as UncheckedDocNamedType, @@ -39,7 +39,7 @@ use crate::{ Variant, }, error::ErrorContext, - lexing::{TokenKind, TokenSpan}, + lexing::{Token, TokenKind, TokenSpan, TokenStream}, }; use self::error::{ParsingError, SpannedParsingError}; @@ -48,6 +48,39 @@ pub mod error; #[cfg(test)] mod test; +macro_rules! take_attrs { + ($name:expr, $($types:ident),*) => { + $name + .attributes + .into_iter() + .map(|val| { + take_attrs!{@process_val val, $($types),*} + }) + .collect::, _>>()? + }; + (@process_val_last $iden:ident) => { + { + let span = $iden.span(); + return Err(ParsingError::WrongAttributeInPosition { + specified: $iden, + span, + }); + } + }; + (@process_val $iden:ident, $val:ident) => { + if let $crate::command_spec::unchecked::Attribute::$val{..} = $iden { + return Ok($iden.into()); + }; + take_attrs!{@process_val_last $iden} + }; + (@process_val $iden:ident, $val:ident, $($other:tt),+ $(,)*) => { + if let $crate::command_spec::unchecked::Attribute::$val{..} = $iden { + return Ok($iden.into()); + }; + take_attrs!{@process_val $iden, $($other),*} + }; +} + struct Parser { command_spec: UncheckedCommandSpec, structures: Vec, @@ -57,6 +90,7 @@ struct Parser { impl UncheckedCommandSpec { pub fn process(self, original_file: String) -> Result { + let original_file = TokenStream::replace(&original_file).to_string(); let checked = Parser { command_spec: self, structures: vec![], @@ -68,12 +102,6 @@ impl UncheckedCommandSpec { } } -macro_rules! pass_attrs_along { - ($name:ident) => { - $name.attributes.into_iter().map(|a| a.into()).collect() - }; -} - impl Parser { fn parse(mut self) -> Result { let namespace: UncheckedNamespace = @@ -142,7 +170,7 @@ impl Parser { structures, enumerations, namespaces, - attributes: pass_attrs_along!(namespace), + attributes: take_attrs! {namespace, doc}, }) } @@ -165,7 +193,7 @@ impl Parser { identifier, inputs, output, - attributes: pass_attrs_along!(function), + attributes: take_attrs! {function, doc}, }) } @@ -202,7 +230,7 @@ impl Parser { mem::take(&mut state.token.kind).to_identifier(Variant::DocNamedType); DocIdentifier { name: ident.name, - attributes: pass_attrs_along!(state), + attributes: take_attrs! {state, doc, msg}, variant: Variant::DocNamedType, } }) @@ -211,7 +239,7 @@ impl Parser { Ok(Enumeration { identifier, states, - attributes: pass_attrs_along!(enumeration), + attributes: take_attrs! {enumeration, doc, derive, error}, }) } @@ -231,7 +259,7 @@ impl Parser { Ok(Structure { identifier, contents, - attributes: pass_attrs_along!(structure), + attributes: take_attrs! {structure, doc, derive}, }) } @@ -254,7 +282,7 @@ impl Parser { Ok(DocNamedType { name, r#type, - attributes: pass_attrs_along!(doc_named_type), + attributes: take_attrs! {doc_named_type, doc}, }) } diff --git a/trixy-parser/src/parsing/unchecked/error.rs b/trixy-parser/src/parsing/unchecked/error.rs index 1047f16..80b2342 100644 --- a/trixy-parser/src/parsing/unchecked/error.rs +++ b/trixy-parser/src/parsing/unchecked/error.rs @@ -23,7 +23,7 @@ use std::{error::Error, fmt::Display}; use thiserror::Error; use crate::{ - command_spec::unchecked::Attribute, + command_spec::unchecked::{Attribute, StringLiteral}, error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}, lexing::{TokenKind, TokenSpan}, }; @@ -46,19 +46,23 @@ pub enum ParsingError { #[error("Expected a Keyword to start a new declaration, but found: '{actual}'")] ExpectedKeyword { actual: TokenKind, span: TokenSpan }, - #[error("DocComment does not have target")] - TrailingDocComment { + #[error("Attribute does not have target")] + TrailingAttribute { comments: Vec, span: TokenSpan, }, + + #[error("Derive value is not known")] + WrongDeriveValue { specified: StringLiteral }, } impl ParsingError { pub fn span(&self) -> &TokenSpan { match self { ParsingError::ExpectedDifferentToken { span, .. } => span, ParsingError::ExpectedKeyword { span, .. } => span, - ParsingError::TrailingDocComment { span, .. } => span, + ParsingError::TrailingAttribute { span, .. } => span, ParsingError::UnexpectedEOF { span, .. } => span, + ParsingError::WrongDeriveValue { specified } => &specified.span, } } @@ -81,8 +85,9 @@ impl AdditionalHelp for ParsingError { ParsingError::ExpectedKeyword { actual, .. } => format!( "I expected a keyword (that is something like 'fn' or 'mod') but you put a '{}' there!", actual), - ParsingError::TrailingDocComment { .. } => "I expected some target (a function, namespace, enum, or something like this) which this doc comment annotates, but you put nothing there".to_owned(), + ParsingError::TrailingAttribute { .. } => "I expected some target (a function, namespace, enum, or something like this) which this attribute annotates, but you put nothing there".to_owned(), ParsingError::UnexpectedEOF { expected, .. } => format!("Put the expected token ('{expected}') here."), + ParsingError::WrongDeriveValue { specified } => format!("'{}' is not a valid derive value! Take a look a the grammar file", specified.content), } } } diff --git a/trixy-parser/src/parsing/unchecked/mod.rs b/trixy-parser/src/parsing/unchecked/mod.rs index 153ff74..c50456f 100644 --- a/trixy-parser/src/parsing/unchecked/mod.rs +++ b/trixy-parser/src/parsing/unchecked/mod.rs @@ -19,15 +19,15 @@ * If not, see . */ -use std::mem; +use std::{iter::once, mem}; use crate::{ command_spec::unchecked::{ - Attribute, CommandSpec, Declaration, DocNamedType, DocToken, Enumeration, Function, - NamedType, Namespace, Structure, Type, + Attribute, CommandSpec, Declaration, DeriveValue, DocNamedType, DocToken, Enumeration, + Function, NamedType, Namespace, StringLiteral, Structure, Type, }, error::ErrorContext, - lexing::{Token, TokenKind, TokenSpan, TokenStream}, + lexing::{AttributeKeyword, Token, TokenKind, TokenSpan, TokenStream}, token, }; @@ -46,7 +46,7 @@ impl TokenStream { pub(super) struct Parser { token_stream: TokenStream, - active_doc_comments: Vec, + current_attributes: Vec, last_span: TokenSpan, } @@ -55,7 +55,7 @@ impl Parser { token_stream.reverse(); Self { token_stream, - active_doc_comments: vec![], + current_attributes: vec![], last_span: TokenSpan::default(), } } @@ -89,38 +89,21 @@ impl Parser { token![fn] => Ok(Declaration::Function(self.parse_function()?)), token![struct] => Ok(Declaration::Structure(self.parse_structure()?)), token![enum] => Ok(Declaration::Enumeration(self.parse_enumeration()?)), - token![DocCommentMatch] => { - while self.expect_peek(token![DocComment]) { - let comment_to_push = { - let doc_comment = self.expect(token![DocComment])?; - let span = *doc_comment.span(); - let name = if let TokenKind::DocComment(content) = doc_comment.kind { - content - } else { - unreachable!("The expect should have accounted for that case"); - }; - - Attribute::doc { - content: name, - span, - } - }; - self.active_doc_comments.push(comment_to_push); - } + token![#] => { + let attributes = self.parse_attributes()?; + self.current_attributes.extend(attributes); if self.token_stream.is_empty() { fn get_span(attr: Option<&Attribute>) -> TokenSpan { - match attr.expect("Something should be here") { - Attribute::doc { span, .. } => *span, - } + attr.expect("Something should be here").span() } let span = TokenSpan::from_range( - get_span(self.active_doc_comments.first()), - get_span(self.active_doc_comments.last()), + get_span(self.current_attributes.first()), + get_span(self.current_attributes.last()), ); - Err(ParsingError::TrailingDocComment { - comments: mem::take(&mut self.active_doc_comments), + Err(ParsingError::TrailingAttribute { + comments: mem::take(&mut self.current_attributes), span, }) } else { @@ -159,29 +142,78 @@ impl Parser { }) } - fn parse_doc_comments(&mut self) -> Result, ParsingError> { - let mut attrs = mem::take(&mut self.active_doc_comments); + fn parse_bracket_string_literal(&mut self) -> Result { + self.expect(token![CurvedBracketOpen])?; + let string_literal = self.expect(token![StringLiteral])?; + self.expect(token![CurvedBracketClose])?; + let string_literal = Into::::into(string_literal); + Ok(string_literal) + } - while self.expect_peek(token![DocComment]) { - attrs.push({ - let doc_comment = self.expect(token![DocComment])?; - let span = *doc_comment.span(); - let name = if let TokenKind::DocComment(content) = doc_comment.kind { - content - } else { - unreachable!("The expect should have accounted for that case"); - }; - Attribute::doc { - content: name, - span, + fn parse_attribute_value(&mut self) -> Result { + let ident = self.expect(token![AttributeKeyword])?; + let span = *ident.span(); + let TokenKind::AttributeKeyword(keyword) = ident.kind() else { + unreachable!("This is checked in the `expect` above") + }; + + let attribute = match keyword { + AttributeKeyword::derive => { + let string_literal = self.parse_bracket_string_literal()?; + match string_literal.content.as_str() { + "Error" => Ok(Attribute::derive { + value: DeriveValue::Error, + span, + }), + _ => Err(error::ParsingError::WrongDeriveValue { + specified: string_literal, + }), } + } + AttributeKeyword::doc => { + self.expect(token![=])?; + let string_literal = self.expect(token![StringLiteral])?; + let string_literal = Into::::into(string_literal); + if self.expect_peek(token![PoundSign]) { + dbg!(&self.token_stream); + } + Ok(Attribute::doc { + content: string_literal, + span, + }) + } + AttributeKeyword::error => Ok(Attribute::error { span }), + AttributeKeyword::msg => { + let string_literal = self.parse_bracket_string_literal()?; + Ok(Attribute::msg { + content: string_literal, + span, + }) + } + }?; + + Ok(attribute) + } + + fn parse_attributes(&mut self) -> Result, ParsingError> { + let mut attrs = mem::take(&mut self.current_attributes); + + while self.expect_peek(token![#]) { + attrs.push({ + self.expect(token![#])?; + self.expect(token![SquareBracketOpen])?; + + let attribue = self.parse_attribute_value()?; + + self.expect(token![SquareBracketClose])?; + attribue }); } Ok(attrs) } fn parse_namespace(&mut self) -> Result { - let attributes = self.parse_doc_comments()?; + let attributes = self.parse_attributes()?; self.expect(token![mod])?; let mut namespace = Namespace { @@ -209,29 +241,30 @@ impl Parser { } fn parse_enumeration(&mut self) -> Result { - let attributes = self.parse_doc_comments()?; + let attributes = self.parse_attributes()?; self.expect(token![enum])?; let identifier = self.expect(token![Ident])?; self.expect(token![BraceOpen])?; let mut states = vec![]; - if self.expect_peek(token![Ident]) { - let attributes = self.parse_doc_comments()?; + if self.expect_peek(token![Ident]) || self.expect_peek(token![#]) { + let attributes = self.parse_attributes()?; states.push(DocToken { token: self.expect(token![Ident])?, attributes, }); - } - while self.expect_peek(token![Comma]) { - self.expect(token![Comma])?; - if self.expect_peek(token![Ident]) { - let attributes = self.parse_doc_comments()?; - states.push(DocToken { - token: self.expect(token![Ident])?, - attributes, - }); - } else { - break; + + while self.expect_peek(token![Comma]) { + self.expect(token![Comma])?; + if self.expect_peek(token![Ident]) || self.expect_peek(token![#]) { + let attributes = self.parse_attributes()?; + states.push(DocToken { + token: self.expect(token![Ident])?, + attributes, + }); + } else { + break; + } } } self.expect(token![BraceClose])?; @@ -243,7 +276,7 @@ impl Parser { } fn parse_structure(&mut self) -> Result { - let attributes = self.parse_doc_comments()?; + let attributes = self.parse_attributes()?; self.expect(token![struct])?; let name = self.expect(token![Ident])?; self.expect(token![BraceOpen])?; @@ -254,7 +287,7 @@ impl Parser { } while self.expect_peek(token![Comma]) { self.expect(token![Comma])?; - if self.expect_peek(token![Ident]) || self.expect_peek(token![DocComment]) { + if self.expect_peek(token![Ident]) || self.expect_peek(token![PoundSign]) { contents.push(self.parse_doc_named_type()?); } else { break; @@ -277,7 +310,7 @@ impl Parser { } fn parse_doc_named_type(&mut self) -> Result { - let attributes = self.parse_doc_comments()?; + let attributes = self.parse_attributes()?; let name = self.expect(token![Ident])?; self.expect(token![Colon])?; let r#type = self.parse_type()?; @@ -289,7 +322,7 @@ impl Parser { } fn parse_function(&mut self) -> Result { - let attributes = self.parse_doc_comments()?; + let attributes = self.parse_attributes()?; self.expect(token![fn])?; let name = self.expect(token![Ident])?; self.expect(token![ParenOpen])?; @@ -322,20 +355,20 @@ impl Parser { /// For example: /// /// ```dont_run - /// use trixy_lang_parser::{ - /// lexing::{Keyword, TokenKind, TokenStream}, - /// parsing::unchecked::Parser, - /// token, - /// }; + ///#use trixy_lang_parser::{ + ///# lexing::{Keyword, TokenKind, TokenStream}, + ///# parsing::unchecked::Parser, + ///# token, + ///#}; /// - /// # fn main() { - /// let token_stream = TokenStream::lex("mod {}").unwrap(); - /// let parser = Parser::new(token_stream); - /// assert_eq!(parser.expect(token![mod]).unwrap(), TokenKind::Keyword(Keyword::mod)); - /// assert_eq!(parser.expect(token![BraceOpen]).unwrap(), TokenKind::BraceOpen); - /// assert_eq!(parser.expect(token![BraceClose]).unwrap(), TokenKind::BraceClose); - /// assert!(parser.expect(token![BraceClose]).is_err()); - /// # } + ///# fn main() { + /// let token_stream = TokenStream::lex("mod {}").unwrap(); + /// let parser = Parser::new(token_stream); + /// assert_eq!(parser.expect(token![mod]).unwrap(), TokenKind::Keyword(Keyword::mod)); + /// assert_eq!(parser.expect(token![CurlyBracketOpen]).unwrap(), TokenKind::BraceOpen); + /// assert_eq!(parser.expect(token![CurlyBracketClose]).unwrap(), TokenKind::BraceClose); + /// assert!(parser.expect(token![CurlyBracketClose]).is_err()); + ///# } /// ``` /// pub(super) fn expect(&mut self, token: TokenKind) -> Result {