diff --git a/trixy/trixy-lang_parser/docs/grammar.ebnf b/trixy/trixy-lang_parser/docs/grammar.ebnf index abe1be8..d495fc3 100644 --- a/trixy/trixy-lang_parser/docs/grammar.ebnf +++ b/trixy/trixy-lang_parser/docs/grammar.ebnf @@ -6,15 +6,27 @@ # - Block comments (`/* */`). # *) -CommandSpec = { Function | Namespace | Enumeration | Structure } ; -Function = "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ; -Namespace = "nasp" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ; -Structure = "struct" Identifier "{" [NamedType {"," NamedType } [","]] "}" ";"; -Enumeration = "enum" Identifier "{" [Identifier {"," Identifier} [","]] "}" ";"; -Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ; -NamedType = Identifier ":" Type; +CommandSpec = {Function | Namespace | Enumeration | Structure } ; + +Function = {DocComment} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ; +Namespace = {DocComment} "nasp" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ; +Structure = {DocComment} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}" ";"; +Enumeration = {DocComment} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}" ";"; + Type = Identifier ["<" Type {"," Type} ">"]; +Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ; +DocIdentifier = {DocComment} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ; + +NamedType = Identifier ":" Type; +DocNamedType = {DocComment} Identifier ":" Type; + + +DocComment = "///" {ANYTHING} LineEnding; + +Comment = "//" [ NOT ("/" {ANYTHING} LineEnding) | "//"] {ANYTHING} LineEnding; +LineEnding = "\\n" | "\\r" | "\\r\\n"; + # (* # vim: ft=ebnf # *) diff --git a/trixy/trixy-lang_parser/docs/grammar.pdf b/trixy/trixy-lang_parser/docs/grammar.pdf index 97ec4e9..716a39f 100644 Binary files a/trixy/trixy-lang_parser/docs/grammar.pdf and b/trixy/trixy-lang_parser/docs/grammar.pdf differ diff --git a/trixy/trixy-lang_parser/example/comments.tri b/trixy/trixy-lang_parser/example/comments.tri new file mode 100644 index 0000000..597996a --- /dev/null +++ b/trixy/trixy-lang_parser/example/comments.tri @@ -0,0 +1,12 @@ +fn print(message: String); + +/// First doc comment +// Some more text +nasp trinitrix { + /// Second doc comment + fn hi(name: String) -> String; +} + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy/trixy-lang_parser/example/failing_comments.tri b/trixy/trixy-lang_parser/example/failing_comments.tri new file mode 100644 index 0000000..7aa985b --- /dev/null +++ b/trixy/trixy-lang_parser/example/failing_comments.tri @@ -0,0 +1,13 @@ +fn print(message: CommandTransferValue); + +/// Some doc comment +// Some more text +nasp trinitrix { + fn hi(name: String) -> String; +} + +/// Trailing doc comment (I will fail) + + +// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing: +// vim: syntax=rust diff --git a/trixy/trixy-lang_parser/generate_docs b/trixy/trixy-lang_parser/generate_docs index e48d336..f84a636 100755 --- a/trixy/trixy-lang_parser/generate_docs +++ b/trixy/trixy-lang_parser/generate_docs @@ -2,8 +2,8 @@ -ebnf2pdf "./docs/grammar.ebnf" -mv out.pdf ./docs/grammar.pdf +ebnf2pdf make "./docs/grammar.ebnf" +mv grammar.ebnf.pdf ./docs/grammar.pdf # vim: ft=sh diff --git a/trixy/trixy-lang_parser/src/command_spec/checked.rs b/trixy/trixy-lang_parser/src/command_spec/checked.rs index 3da9a5c..30d0eda 100644 --- a/trixy/trixy-lang_parser/src/command_spec/checked.rs +++ b/trixy/trixy-lang_parser/src/command_spec/checked.rs @@ -4,6 +4,8 @@ use std::fmt::Display; use crate::lexing::TokenKind; +use super::unchecked; + /// These are the "primitive" types used in trixy, you can use any of them to create new structures pub const BASE_TYPES: [ConstIdentifier; 8] = [ Identifier::from("Integer"), @@ -24,6 +26,7 @@ pub struct Namespace { pub structures: Vec, pub enumerations: Vec, pub namespaces: Vec, + pub attributes: Vec, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -48,13 +51,15 @@ impl From for CommandSpec { #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Structure { pub identifier: Identifier, - pub contents: Vec, + pub contents: Vec, + pub attributes: Vec, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct Enumeration { pub identifier: Identifier, - pub states: Vec, + pub states: Vec, + pub attributes: Vec, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -62,6 +67,7 @@ pub struct Function { pub identifier: Identifier, pub inputs: Vec, pub output: Option, + pub attributes: Vec, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -76,6 +82,13 @@ pub struct NamedType { pub r#type: Type, } +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] +pub struct DocNamedType { + pub name: Identifier, + pub r#type: Type, + pub attributes: Vec, +} + impl From for Identifier { fn from(value: TokenKind) -> Self { match value { @@ -92,6 +105,19 @@ impl From for Identifier { } } +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub enum Attribute { + #[allow(non_camel_case_types)] + doc(String), +} +impl From for Attribute { + fn from(value: unchecked::Attribute) -> Self { + match value { + unchecked::Attribute::doc { content: name, .. } => Self::doc(name), + } + } +} + /// An Identifier /// These include /// - Variable names @@ -103,6 +129,12 @@ pub struct Identifier { pub name: String, } +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct DocIdentifier { + pub name: String, + pub attributes: Vec, +} + /// A const version of [Identifier] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] pub struct ConstIdentifier { diff --git a/trixy/trixy-lang_parser/src/command_spec/unchecked.rs b/trixy/trixy-lang_parser/src/command_spec/unchecked.rs index 7619f96..ef88fb7 100644 --- a/trixy/trixy-lang_parser/src/command_spec/unchecked.rs +++ b/trixy/trixy-lang_parser/src/command_spec/unchecked.rs @@ -4,7 +4,7 @@ use std::fmt::{Display, Write}; -use crate::lexing::Token; +use crate::lexing::{Token, TokenSpan}; #[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord)] pub struct CommandSpec { @@ -22,6 +22,7 @@ impl From for Namespace { structures: value.structures, enumerations: value.enumerations, namespaces: value.namespaces, + attributes: vec![], } } } @@ -34,6 +35,8 @@ pub struct Namespace { pub structures: Vec, pub enumerations: Vec, pub namespaces: Vec, + + pub attributes: Vec, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] @@ -44,23 +47,45 @@ pub enum Declaration { Namespace(Namespace), } +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub enum Attribute { + #[allow(non_camel_case_types)] + doc{content: String, span: TokenSpan}, +} + #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct Function { pub identifier: Token, // Will later become an Identifier pub inputs: Vec, pub output: Option, + pub attributes: Vec, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct Structure { pub identifier: Token, // Will later become an Identifier - pub contents: Vec, + pub contents: Vec, + pub attributes: Vec, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] pub struct Enumeration { - pub identifier: Token, // Will later become an Identifier - pub states: Vec, // Will later become an Identifier + pub identifier: Token, // Will later become an Identifier + pub states: Vec, // Will later become an Identifier + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct DocToken { + pub token: Token, + pub attributes: Vec, +} + +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] +pub struct DocNamedType { + pub name: Token, // Will later become an Identifier + pub r#type: Type, + pub attributes: Vec, } #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] diff --git a/trixy/trixy-lang_parser/src/error.rs b/trixy/trixy-lang_parser/src/error.rs index ccbc4fc..c6fd486 100644 --- a/trixy/trixy-lang_parser/src/error.rs +++ b/trixy/trixy-lang_parser/src/error.rs @@ -55,7 +55,7 @@ impl ErrorContext { }; let line_above; - if line_number == 0 { + if line_number == 1 { // We only have one line, so no line above line_above = "".to_owned(); } else { diff --git a/trixy/trixy-lang_parser/src/lexing/error.rs b/trixy/trixy-lang_parser/src/lexing/error.rs index 98f3699..fed9d09 100644 --- a/trixy/trixy-lang_parser/src/lexing/error.rs +++ b/trixy/trixy-lang_parser/src/lexing/error.rs @@ -13,10 +13,12 @@ pub enum LexingError { UnknownCharacter(char), #[error("The Arrow token must be of the form: ->")] ExpectedArrow, + #[error("The Comment token must start with two slashes")] + ExpectedComment, } impl AdditionalHelp for LexingError { - fn additional_help(& self) -> String { + fn additional_help(&self) -> String { let out = match self { LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(), LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(), @@ -24,6 +26,7 @@ impl AdditionalHelp for LexingError { LexingError::UnknownCharacter(char) => { format!("This char: `{char}`; is not a valid token") }, + LexingError::ExpectedComment => "The '/' started comment parsing, but I could not find a matching '/'".to_owned(), }; out } diff --git a/trixy/trixy-lang_parser/src/lexing/mod.rs b/trixy/trixy-lang_parser/src/lexing/mod.rs index 58f77d6..cbaec89 100644 --- a/trixy/trixy-lang_parser/src/lexing/mod.rs +++ b/trixy/trixy-lang_parser/src/lexing/mod.rs @@ -28,6 +28,18 @@ impl TokenStream { tokens.push(tok); } + // filter out comments + let tokens = tokens + .into_iter() + .filter(|token| { + if let TokenKind::Comment(_) = token.kind { + false + } else { + true + } + }) + .collect(); + Ok(Self { tokens, original_file: src.to_owned(), @@ -40,8 +52,8 @@ impl TokenStream { } /// Get a reference to the uppermost token, without modifying the token list - pub fn peek(&self) -> &Token { - self.tokens.last().expect("This should not be emtpy") + pub fn peek(&self) -> Option<&Token> { + self.tokens.last() } /// Remove to the uppermost token @@ -80,6 +92,15 @@ pub struct TokenSpan { pub end: usize, } +impl TokenSpan { + pub fn from_range(start: TokenSpan, end: TokenSpan) -> Self { + Self { + start: start.start, + end: end.end, + } + } +} + /// A Token #[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone)] pub struct Token { @@ -123,6 +144,10 @@ pub enum TokenKind { ParenClose, SquareOpen, SquareClose, + + DocComment(String), + Comment(String), + /// This is not a real TokenKind, but only used for error handling #[default] Dummy, @@ -135,6 +160,16 @@ impl TokenKind { return true; } } + if let TokenKind::Comment(_) = self { + if let TokenKind::Comment(_) = other { + return true; + } + } + if let TokenKind::DocComment(_) = self { + if let TokenKind::DocComment(_) = other { + return true; + } + } self == other } } @@ -161,6 +196,8 @@ impl Display for TokenKind { TokenKind::Dummy => f.write_str("DUMMY"), TokenKind::SquareOpen => f.write_str("SQUAREOPEN"), TokenKind::SquareClose => f.write_str("SQUARECLOSE"), + TokenKind::DocComment(text) => write!(f, "DOCCOMMENT({})", text), + TokenKind::Comment(text) => write!(f, "COMMENT({})", text), } } } @@ -234,10 +271,13 @@ macro_rules! token { [struct] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#struct) }; [enum] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#enum) }; - // This is only works for checking for a identifier + // This is only works for checking for a identifier or comment // see the `same_kind` method on TokenKind [Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; [Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; + [DocComment] => { $crate::lexing::TokenKind::DocComment("".to_owned()) }; + [DocCommentMatch] => { $crate::lexing::TokenKind::DocComment(_doc_comment) }; + [Comment] => { $crate::lexing::TokenKind::Comment("".to_owned()) }; } #[cfg(test)] diff --git a/trixy/trixy-lang_parser/src/lexing/tokenizer.rs b/trixy/trixy-lang_parser/src/lexing/tokenizer.rs index a4eb885..c7a9882 100644 --- a/trixy/trixy-lang_parser/src/lexing/tokenizer.rs +++ b/trixy/trixy-lang_parser/src/lexing/tokenizer.rs @@ -62,7 +62,9 @@ impl<'a> Tokenizer<'a> { ',' => (TokenKind::Comma, 1), '<' => (TokenKind::SquareOpen, 1), '>' => (TokenKind::SquareClose, 1), + '-' => tokenize_arrow(self.remaining_text)?, + '/' => tokenize_comment(self.remaining_text)?, // can't use a OR (`|`) here, as the guard takes precedence c if c.is_alphabetic() => tokenize_ident(self.remaining_text)?, @@ -74,17 +76,17 @@ impl<'a> Tokenizer<'a> { Ok((tok, length)) } - /// Skip past any whitespace characters or comments. fn skip_ignored_tokens(&mut self) { loop { let ws = self.skip_whitespace(); - let comments = self.skip_comments(); - + let comments = self.skip_block_comment(); if ws + comments == 0 { return; } } } + + /// Skip past any whitespace characters fn skip_whitespace(&mut self) -> usize { let mut remaining = self.remaining_text; @@ -102,21 +104,21 @@ impl<'a> Tokenizer<'a> { self.chomp(skip); skip } + fn skip_block_comment(&mut self) -> usize { + let pairs = [("/*", "*/")]; - fn skip_comments(&mut self) -> usize { - let remaining = self.remaining_text; - let pairs = [("//", "\n"), ("/*", "*/")]; + let src = self.remaining_text; - let mut skip = 0; for &(pattern, matcher) in &pairs { - if remaining.starts_with(pattern) { - let leftovers = skip_until(remaining, matcher); - skip = remaining.len() - leftovers.len(); - break; + if src.starts_with(pattern) { + let leftovers = skip_until(src, matcher); + let skip = src.len() - leftovers.len(); + self.chomp(skip); + return skip; } } - self.chomp(skip); - skip + + 0 } fn chomp(&mut self, chars_to_chomp: usize) { @@ -125,6 +127,36 @@ impl<'a> Tokenizer<'a> { } } +fn tokenize_comment(text: &str) -> Result<(TokenKind, usize), LexingError> { + // every token starts with two slashes + let slashes: &str = &text[..2]; + if slashes != "//" { + Err(LexingError::ExpectedComment) + } else { + let text: &str = &text[2..]; + if let Some('/') = text.chars().next() { + let text = &text[1..]; + let (doc_comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?; + + // trim whitespace + let doc_comment = doc_comment.trim_start(); + let doc_comment = doc_comment.trim_end(); + + return Ok(( + TokenKind::DocComment(doc_comment.to_owned()), + chars_read + 3, + )); + } + let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?; + + // trim whitespace + let comment = comment.trim_start(); + let comment = comment.trim_end(); + + return Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2)); + } +} + fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> { let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?; diff --git a/trixy/trixy-lang_parser/src/parsing/checked/mod.rs b/trixy/trixy-lang_parser/src/parsing/checked/mod.rs index 65b1e91..1e44d4f 100644 --- a/trixy/trixy-lang_parser/src/parsing/checked/mod.rs +++ b/trixy/trixy-lang_parser/src/parsing/checked/mod.rs @@ -3,14 +3,14 @@ use std::mem; use crate::{ command_spec::{ checked::{ - CommandSpec, Enumeration, Function, Identifier, NamedType, Namespace, Structure, Type, - BASE_TYPES, + CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, NamedType, + Namespace, Structure, Type, BASE_TYPES, }, unchecked::{ - CommandSpec as UncheckedCommandSpec, Enumeration as UncheckedEnumeration, - Function as UncheckedFunction, NamedType as UncheckedNamedType, - Namespace as UncheckedNamespace, Structure as UncheckedStructure, - Type as UncheckedType, + CommandSpec as UncheckedCommandSpec, DocNamedType as UncheckedDocNamedType, + Enumeration as UncheckedEnumeration, Function as UncheckedFunction, + NamedType as UncheckedNamedType, Namespace as UncheckedNamespace, + Structure as UncheckedStructure, Type as UncheckedType, }, }, error::ErrorContext, @@ -66,6 +66,12 @@ impl UncheckedCommandSpec { } } +macro_rules! pass_attrs_along { + ($name:ident) => { + $name.attributes.into_iter().map(|a| a.into()).collect() + }; +} + impl Parser { fn parse(mut self) -> Result { let namespace: UncheckedNamespace = @@ -129,6 +135,7 @@ impl Parser { structures, enumerations, namespaces, + attributes: pass_attrs_along!(namespace), }) } @@ -151,6 +158,7 @@ impl Parser { identifier, inputs, output, + attributes: pass_attrs_along!(function), }) } @@ -164,10 +172,20 @@ impl Parser { let mut states = vec![]; for mut state in enumeration.states { - states.push(mem::take(&mut state.kind).into()) + states.push({ + let ident: Identifier = mem::take(&mut state.token.kind).into(); + DocIdentifier { + name: ident.name, + attributes: pass_attrs_along!(state), + } + }) } - Ok(Enumeration { identifier, states }) + Ok(Enumeration { + identifier, + states, + attributes: pass_attrs_along!(enumeration), + }) } fn process_structure( @@ -179,12 +197,13 @@ impl Parser { let identifier: Identifier = mem::take(&mut structure.identifier.kind).into(); let mut contents = vec![]; for named_type in structure.contents { - contents.push(self.process_named_type(named_type)?); + contents.push(self.process_doc_named_type(named_type)?); } Ok(Structure { identifier, contents, + attributes: pass_attrs_along!(structure), }) } @@ -196,6 +215,18 @@ impl Parser { let r#type: Type = self.process_type(named_type.r#type)?; Ok(NamedType { name, r#type }) } + fn process_doc_named_type( + &mut self, + mut doc_named_type: UncheckedDocNamedType, + ) -> Result { + let name: Identifier = mem::take(&mut doc_named_type.name.kind).into(); + let r#type: Type = self.process_type(doc_named_type.r#type)?; + Ok(DocNamedType { + name, + r#type, + attributes: pass_attrs_along!(doc_named_type), + }) + } fn process_type(&mut self, mut r#type: UncheckedType) -> Result { let identifier: Identifier = mem::take(&mut r#type.identifier.kind).into(); diff --git a/trixy/trixy-lang_parser/src/parsing/checked/test.rs b/trixy/trixy-lang_parser/src/parsing/checked/test.rs index 2326b11..53e27a9 100644 --- a/trixy/trixy-lang_parser/src/parsing/checked/test.rs +++ b/trixy/trixy-lang_parser/src/parsing/checked/test.rs @@ -1,8 +1,11 @@ use crate::command_spec::checked::{ - CommandSpec, Enumeration, Function, Identifier, NamedType, Namespace, Structure, Type, + Attribute, CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, + NamedType, Namespace, Structure, Type, }; use crate::lexing::TokenStream; +use pretty_assertions::assert_eq; + #[test] fn test_full() { let input = "nasp trinitrix { @@ -57,13 +60,14 @@ fn test_full() { }, ], output: None, + attributes: vec![], }], structures: vec![Structure { identifier: Identifier { name: "Callback".to_owned(), }, contents: vec![ - NamedType { + DocNamedType { name: Identifier { name: "func".to_owned(), }, @@ -73,8 +77,9 @@ fn test_full() { }, generic_args: vec![], }, + attributes: vec![], }, - NamedType { + DocNamedType { name: Identifier { name: "timeout".to_owned(), }, @@ -84,26 +89,33 @@ fn test_full() { }, generic_args: vec![], }, + attributes: vec![], }, ], + attributes: vec![], }], enumerations: vec![Enumeration { identifier: Identifier { name: "CallbackPriority".to_owned(), }, states: vec![ - Identifier { + DocIdentifier { name: "High".to_owned(), + attributes: vec![], }, - Identifier { + DocIdentifier { name: "Medium".to_owned(), + attributes: vec![], }, - Identifier { + DocIdentifier { name: "Low".to_owned(), + attributes: vec![], }, ], + attributes: vec![], }], namespaces: vec![], + attributes: vec![], }], }; assert_eq!(output, expected); @@ -132,3 +144,72 @@ fn execute_callback(callback: Name); _ => panic!("Wrong error in test!"), }; } + +#[test] +fn test_comments() { + let input = "fn print(message: String); + +/// First doc comment +// Some more text +nasp trinitrix { + /// Second doc comment + fn hi(name: String) -> String; +} +"; + let output = TokenStream::lex(&input).unwrap().parse().unwrap(); + let expected = CommandSpec { + structures: vec![], + enumerations: vec![], + functions: vec![Function { + identifier: Identifier { + name: "print".to_owned(), + }, + inputs: vec![NamedType { + name: Identifier { + name: "message".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "String".to_owned(), + }, + generic_args: vec![], + }, + }], + output: None, + attributes: vec![], + }], + namespaces: vec![Namespace { + name: Identifier { + name: "trinitrix".to_owned(), + }, + functions: vec![Function { + identifier: Identifier { + name: "hi".to_owned(), + }, + inputs: vec![NamedType { + name: Identifier { + name: "name".to_owned(), + }, + r#type: Type { + identifier: Identifier { + name: "String".to_owned(), + }, + generic_args: vec![], + }, + }], + output: Some(Type { + identifier: Identifier { + name: "String".to_owned(), + }, + generic_args: vec![], + }), + attributes: vec![Attribute::doc("Second doc comment".to_owned())], + }], + structures: vec![], + enumerations: vec![], + namespaces: vec![], + attributes: vec![Attribute::doc("First doc comment".to_owned())], + }], + }; + assert_eq!(output, expected); +} diff --git a/trixy/trixy-lang_parser/src/parsing/unchecked/error.rs b/trixy/trixy-lang_parser/src/parsing/unchecked/error.rs index d697087..5d5270c 100644 --- a/trixy/trixy-lang_parser/src/parsing/unchecked/error.rs +++ b/trixy/trixy-lang_parser/src/parsing/unchecked/error.rs @@ -2,37 +2,47 @@ use std::{error::Error, fmt::Display}; use thiserror::Error; use crate::{ + command_spec::unchecked::Attribute, error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}, lexing::{TokenKind, TokenSpan}, }; #[derive(Error, Debug, Clone)] pub enum ParsingError { - #[error("Expected '{expected}' but received '{actual}'")] + #[error("Expected '{expected}', but received: '{actual}'")] ExpectedDifferentToken { expected: TokenKind, actual: TokenKind, span: TokenSpan, }, + #[error("Expected '{expected}', but the token stream stopped")] + UnexpectedEOF { + expected: TokenKind, + span: TokenSpan, + }, + #[error("Expected a Keyword to start a new declaration, but found: '{actual}'")] ExpectedKeyword { actual: TokenKind, span: TokenSpan }, + + #[error("DocComment does not have target")] + TrailingDocComment { + comments: Vec, + span: TokenSpan, + }, } impl ParsingError { pub fn span(&self) -> &TokenSpan { match self { ParsingError::ExpectedDifferentToken { span, .. } => span, ParsingError::ExpectedKeyword { span, .. } => span, + ParsingError::TrailingDocComment { span, .. } => span, + ParsingError::UnexpectedEOF { span, .. } => span, } } -} -impl ParsingError { pub fn get_span(&self) -> TokenSpan { - match self { - ParsingError::ExpectedDifferentToken { span, .. } => *span, - ParsingError::ExpectedKeyword { span, .. } => *span, - } + *self.span() } } @@ -50,6 +60,8 @@ impl AdditionalHelp for ParsingError { ParsingError::ExpectedKeyword { actual, .. } => format!( "I expected a keyword (that is something like 'fn' or 'nasp') but you put a '{}' there!", actual), + ParsingError::TrailingDocComment { .. } => "I expected some target (a function, namespace, enum, or something like this) which this doc comment annotates, but you put nothing there".to_owned(), + ParsingError::UnexpectedEOF { expected, .. } => format!("Put the expected token ('{expected}') here."), } } } diff --git a/trixy/trixy-lang_parser/src/parsing/unchecked/mod.rs b/trixy/trixy-lang_parser/src/parsing/unchecked/mod.rs index 622f870..c6db5a2 100644 --- a/trixy/trixy-lang_parser/src/parsing/unchecked/mod.rs +++ b/trixy/trixy-lang_parser/src/parsing/unchecked/mod.rs @@ -1,9 +1,12 @@ +use std::mem; + use crate::{ command_spec::unchecked::{ - CommandSpec, Declaration, Enumeration, Function, NamedType, Namespace, Structure, Type, + Attribute, CommandSpec, Declaration, DocNamedType, DocToken, Enumeration, Function, + NamedType, Namespace, Structure, Type, }, error::ErrorContext, - lexing::{Token, TokenKind, TokenStream}, + lexing::{Token, TokenKind, TokenSpan, TokenStream}, token, }; @@ -22,12 +25,18 @@ impl TokenStream { pub(super) struct Parser { token_stream: TokenStream, + active_doc_comments: Vec, + last_span: TokenSpan, } impl Parser { fn new(mut token_stream: TokenStream) -> Self { token_stream.reverse(); - Self { token_stream } + Self { + token_stream, + active_doc_comments: vec![], + last_span: TokenSpan::default(), + } } fn parse(&mut self) -> Result { @@ -52,15 +61,55 @@ impl Parser { } fn parse_next(&mut self) -> Result { - match self.peek().kind() { + // Use of [peek_raw] here is fine, as we know that the function is only called, when + // something should still be contained in the token stream + match self.peek_raw().kind() { token![nasp] => Ok(Declaration::Namespace(self.parse_namespace()?)), token![fn] => Ok(Declaration::Function(self.parse_function()?)), token![struct] => Ok(Declaration::Structure(self.parse_structure()?)), token![enum] => Ok(Declaration::Enumeration(self.parse_enumeration()?)), + token![DocCommentMatch] => { + while self.expect_peek(token![DocComment]) { + let comment_to_push = { + let doc_comment = self.expect(token![DocComment])?; + let span = *doc_comment.span(); + let name = if let TokenKind::DocComment(content) = doc_comment.kind { + content + } else { + unreachable!("The expect should have accounted for that case"); + }; + + Attribute::doc { + content: name, + span, + } + }; + self.active_doc_comments.push(comment_to_push); + } + + if self.token_stream.is_empty() { + fn get_span(attr: Option<&Attribute>) -> TokenSpan { + match attr.expect("Something should be here") { + Attribute::doc { span, .. } => *span, + } + } + + let span = TokenSpan::from_range( + get_span(self.active_doc_comments.first()), + get_span(self.active_doc_comments.last()), + ); + Err(ParsingError::TrailingDocComment { + comments: mem::take(&mut self.active_doc_comments), + span, + }) + } else { + self.parse_next() + } + } _ => { let err = ParsingError::ExpectedKeyword { - span: *self.peek().span(), - actual: self.peek().kind().clone(), + span: *self.peek_raw().span(), + actual: self.peek_raw().kind().clone(), }; return Err(err); @@ -88,11 +137,34 @@ impl Parser { }) } + fn parse_doc_comments(&mut self) -> Result, ParsingError> { + let mut attrs = mem::take(&mut self.active_doc_comments); + + while self.expect_peek(token![DocComment]) { + attrs.push({ + let doc_comment = self.expect(token![DocComment])?; + let span = *doc_comment.span(); + let name = if let TokenKind::DocComment(content) = doc_comment.kind { + content + } else { + unreachable!("The expect should have accounted for that case"); + }; + Attribute::doc { + content: name, + span, + } + }); + } + Ok(attrs) + } + fn parse_namespace(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; self.expect(token![nasp])?; let mut namespace = Namespace::default(); namespace.name = self.expect(token![Ident])?; + namespace.attributes = attributes; self.expect(token![BraceOpen])?; @@ -113,40 +185,54 @@ impl Parser { } fn parse_enumeration(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; self.expect(token![enum])?; let identifier = self.expect(token![Ident])?; self.expect(token![BraceOpen])?; let mut states = vec![]; if self.expect_peek(token![Ident]) { - states.push(self.expect(token![Ident])?); + let attributes = self.parse_doc_comments()?; + states.push(DocToken { + token: self.expect(token![Ident])?, + attributes, + }); } while self.expect_peek(token![Comma]) { self.expect(token![Comma])?; if self.expect_peek(token![Ident]) { - states.push(self.expect(token![Ident])?); + let attributes = self.parse_doc_comments()?; + states.push(DocToken { + token: self.expect(token![Ident])?, + attributes, + }); } else { break; } } self.expect(token![BraceClose])?; self.expect(token![;])?; - Ok(Enumeration { identifier, states }) + Ok(Enumeration { + identifier, + states, + attributes, + }) } fn parse_structure(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; self.expect(token![struct])?; let name = self.expect(token![Ident])?; self.expect(token![BraceOpen])?; let mut contents = vec![]; if self.expect_peek(token![Ident]) { - contents.push(self.parse_named_type()?); + contents.push(self.parse_doc_named_type()?); } while self.expect_peek(token![Comma]) { self.expect(token![Comma])?; if self.expect_peek(token![Ident]) { - contents.push(self.parse_named_type()?); + contents.push(self.parse_doc_named_type()?); } else { break; } @@ -157,6 +243,7 @@ impl Parser { Ok(Structure { identifier: name, contents, + attributes, }) } @@ -167,7 +254,20 @@ impl Parser { Ok(NamedType { name, r#type }) } + fn parse_doc_named_type(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; + let name = self.expect(token![Ident])?; + self.expect(token![Colon])?; + let r#type = self.parse_type()?; + Ok(DocNamedType { + name, + r#type, + attributes, + }) + } + fn parse_function(&mut self) -> Result { + let attributes = self.parse_doc_comments()?; self.expect(token![fn])?; let name = self.expect(token![Ident])?; self.expect(token![ParenOpen])?; @@ -192,6 +292,7 @@ impl Parser { identifier: name, inputs, output: output_type, + attributes, }) } @@ -216,7 +317,14 @@ impl Parser { /// ``` /// pub(super) fn expect(&mut self, token: TokenKind) -> Result { - let actual_token = self.peek(); + let actual_token = if let Some(token) = self.peek() { + token + } else { + return Err(ParsingError::UnexpectedEOF { + expected: token, + span: self.last_span, + }); + }; if actual_token.kind().same_kind(&token) { Ok(self.pop()) } else { @@ -233,7 +341,10 @@ impl Parser { /// Check if the next token is of the specified TokenKind. /// Does not alter the token_stream fn expect_peek(&self, token: TokenKind) -> bool { - let actual_token = self.peek(); + let actual_token = match self.peek() { + Some(ok) => ok, + None => return false, + }; if actual_token.kind().same_kind(&token) { true } else { @@ -242,12 +353,22 @@ impl Parser { } /// Looks at the next token without removing it - fn peek(&self) -> &Token { + fn peek(&self) -> Option<&Token> { self.token_stream.peek() } + /// Looks at the next token without removing it. + /// Unwraps the option returned from [peek], only use it, if you know that a token must exist + fn peek_raw(&self) -> &Token { + self.token_stream.peek().expect("The token should exist") + } + /// Removes the next token fn pop(&mut self) -> Token { + self.last_span = *self + .peek() + .expect("Calling pop should mean, that a token was first peeked for") + .span(); self.token_stream.pop() } } diff --git a/trixy/trixy-lang_parser/src/parsing/unchecked/test.rs b/trixy/trixy-lang_parser/src/parsing/unchecked/test.rs index a6627bf..ccf9b69 100644 --- a/trixy/trixy-lang_parser/src/parsing/unchecked/test.rs +++ b/trixy/trixy-lang_parser/src/parsing/unchecked/test.rs @@ -20,8 +20,8 @@ nasp trinitrix { {} let parsed = TokenStream::lex(input).unwrap().parse_unchecked(); let err = parsed.unwrap_err().source; match err { - ParsingError::ExpectedDifferentToken { .. } => panic!("Wrong error"), ParsingError::ExpectedKeyword { .. } => {} + _ => panic!("Wrong error"), } } @@ -56,6 +56,7 @@ nasp trinitrix { }, }], output: None, + attributes: vec![], }], namespaces: vec![Namespace { name: Token { @@ -87,10 +88,12 @@ nasp trinitrix { }, generic_args: vec![], }), + attributes: vec![], }], structures: vec![], enumerations: vec![], namespaces: vec![], + attributes: vec![], }], };