feat(trixy-lang_parser): Add support for doc comments

Parsing right now works by simply comparing the input string:
```
"/" -> <comment_tokenizer> -> "/" -> <normal_comment>
                          |-> "//" -> <doc_comment>
```

A better method to do this though would be to turn "//" and "///" into
keywords and simply leave the parsing to the parser module not the
tokenizer.
This commit is contained in:
Benedikt Peetz 2023-12-22 20:32:43 +01:00
parent 3503e5250c
commit 70c4cc6f18
Signed by: bpeetz
GPG Key ID: A5E94010C3A642AD
16 changed files with 487 additions and 70 deletions

View File

@ -6,15 +6,27 @@
# - Block comments (`/* */`). # - Block comments (`/* */`).
# *) # *)
CommandSpec = { Function | Namespace | Enumeration | Structure } ; CommandSpec = {Function | Namespace | Enumeration | Structure } ;
Function = "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ;
Namespace = "nasp" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ; Function = {DocComment} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ;
Structure = "struct" Identifier "{" [NamedType {"," NamedType } [","]] "}" ";"; Namespace = {DocComment} "nasp" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ;
Enumeration = "enum" Identifier "{" [Identifier {"," Identifier} [","]] "}" ";"; Structure = {DocComment} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}" ";";
Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ; Enumeration = {DocComment} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}" ";";
NamedType = Identifier ":" Type;
Type = Identifier ["<" Type {"," Type} ">"]; Type = Identifier ["<" Type {"," Type} ">"];
Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
DocIdentifier = {DocComment} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
NamedType = Identifier ":" Type;
DocNamedType = {DocComment} Identifier ":" Type;
DocComment = "///" {ANYTHING} LineEnding;
Comment = "//" [ NOT ("/" {ANYTHING} LineEnding) | "//"] {ANYTHING} LineEnding;
LineEnding = "\\n" | "\\r" | "\\r\\n";
# (* # (*
# vim: ft=ebnf # vim: ft=ebnf
# *) # *)

View File

@ -0,0 +1,12 @@
fn print(message: String);
/// First doc comment
// Some more text
nasp trinitrix {
/// Second doc comment
fn hi(name: String) -> String;
}
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
// vim: syntax=rust

View File

@ -0,0 +1,13 @@
fn print(message: CommandTransferValue);
/// Some doc comment
// Some more text
nasp trinitrix {
fn hi(name: String) -> String;
}
/// Trailing doc comment (I will fail)
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
// vim: syntax=rust

View File

@ -2,8 +2,8 @@
ebnf2pdf "./docs/grammar.ebnf" ebnf2pdf make "./docs/grammar.ebnf"
mv out.pdf ./docs/grammar.pdf mv grammar.ebnf.pdf ./docs/grammar.pdf
# vim: ft=sh # vim: ft=sh

View File

@ -4,6 +4,8 @@ use std::fmt::Display;
use crate::lexing::TokenKind; use crate::lexing::TokenKind;
use super::unchecked;
/// These are the "primitive" types used in trixy, you can use any of them to create new structures /// These are the "primitive" types used in trixy, you can use any of them to create new structures
pub const BASE_TYPES: [ConstIdentifier; 8] = [ pub const BASE_TYPES: [ConstIdentifier; 8] = [
Identifier::from("Integer"), Identifier::from("Integer"),
@ -24,6 +26,7 @@ pub struct Namespace {
pub structures: Vec<Structure>, pub structures: Vec<Structure>,
pub enumerations: Vec<Enumeration>, pub enumerations: Vec<Enumeration>,
pub namespaces: Vec<Namespace>, pub namespaces: Vec<Namespace>,
pub attributes: Vec<Attribute>,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
@ -48,13 +51,15 @@ impl From<Namespace> for CommandSpec {
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Structure { pub struct Structure {
pub identifier: Identifier, pub identifier: Identifier,
pub contents: Vec<NamedType>, pub contents: Vec<DocNamedType>,
pub attributes: Vec<Attribute>,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Enumeration { pub struct Enumeration {
pub identifier: Identifier, pub identifier: Identifier,
pub states: Vec<Identifier>, pub states: Vec<DocIdentifier>,
pub attributes: Vec<Attribute>,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
@ -62,6 +67,7 @@ pub struct Function {
pub identifier: Identifier, pub identifier: Identifier,
pub inputs: Vec<NamedType>, pub inputs: Vec<NamedType>,
pub output: Option<Type>, pub output: Option<Type>,
pub attributes: Vec<Attribute>,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
@ -76,6 +82,13 @@ pub struct NamedType {
pub r#type: Type, pub r#type: Type,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct DocNamedType {
pub name: Identifier,
pub r#type: Type,
pub attributes: Vec<Attribute>,
}
impl From<TokenKind> for Identifier { impl From<TokenKind> for Identifier {
fn from(value: TokenKind) -> Self { fn from(value: TokenKind) -> Self {
match value { match value {
@ -92,6 +105,19 @@ impl From<TokenKind> for Identifier {
} }
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum Attribute {
#[allow(non_camel_case_types)]
doc(String),
}
impl From<unchecked::Attribute> for Attribute {
fn from(value: unchecked::Attribute) -> Self {
match value {
unchecked::Attribute::doc { content: name, .. } => Self::doc(name),
}
}
}
/// An Identifier /// An Identifier
/// These include /// These include
/// - Variable names /// - Variable names
@ -103,6 +129,12 @@ pub struct Identifier {
pub name: String, pub name: String,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct DocIdentifier {
pub name: String,
pub attributes: Vec<Attribute>,
}
/// A const version of [Identifier] /// A const version of [Identifier]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct ConstIdentifier { pub struct ConstIdentifier {

View File

@ -4,7 +4,7 @@
use std::fmt::{Display, Write}; use std::fmt::{Display, Write};
use crate::lexing::Token; use crate::lexing::{Token, TokenSpan};
#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct CommandSpec { pub struct CommandSpec {
@ -22,6 +22,7 @@ impl From<CommandSpec> for Namespace {
structures: value.structures, structures: value.structures,
enumerations: value.enumerations, enumerations: value.enumerations,
namespaces: value.namespaces, namespaces: value.namespaces,
attributes: vec![],
} }
} }
} }
@ -34,6 +35,8 @@ pub struct Namespace {
pub structures: Vec<Structure>, pub structures: Vec<Structure>,
pub enumerations: Vec<Enumeration>, pub enumerations: Vec<Enumeration>,
pub namespaces: Vec<Namespace>, pub namespaces: Vec<Namespace>,
pub attributes: Vec<Attribute>,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
@ -44,23 +47,45 @@ pub enum Declaration {
Namespace(Namespace), Namespace(Namespace),
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum Attribute {
#[allow(non_camel_case_types)]
doc{content: String, span: TokenSpan},
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct Function { pub struct Function {
pub identifier: Token, // Will later become an Identifier pub identifier: Token, // Will later become an Identifier
pub inputs: Vec<NamedType>, pub inputs: Vec<NamedType>,
pub output: Option<Type>, pub output: Option<Type>,
pub attributes: Vec<Attribute>,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct Structure { pub struct Structure {
pub identifier: Token, // Will later become an Identifier pub identifier: Token, // Will later become an Identifier
pub contents: Vec<NamedType>, pub contents: Vec<DocNamedType>,
pub attributes: Vec<Attribute>,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct Enumeration { pub struct Enumeration {
pub identifier: Token, // Will later become an Identifier pub identifier: Token, // Will later become an Identifier
pub states: Vec<Token>, // Will later become an Identifier pub states: Vec<DocToken>, // Will later become an Identifier
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct DocToken {
pub token: Token,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct DocNamedType {
pub name: Token, // Will later become an Identifier
pub r#type: Type,
pub attributes: Vec<Attribute>,
} }
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)] #[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]

View File

@ -55,7 +55,7 @@ impl ErrorContext {
}; };
let line_above; let line_above;
if line_number == 0 { if line_number == 1 {
// We only have one line, so no line above // We only have one line, so no line above
line_above = "".to_owned(); line_above = "".to_owned();
} else { } else {

View File

@ -13,10 +13,12 @@ pub enum LexingError {
UnknownCharacter(char), UnknownCharacter(char),
#[error("The Arrow token must be of the form: ->")] #[error("The Arrow token must be of the form: ->")]
ExpectedArrow, ExpectedArrow,
#[error("The Comment token must start with two slashes")]
ExpectedComment,
} }
impl AdditionalHelp for LexingError { impl AdditionalHelp for LexingError {
fn additional_help(& self) -> String { fn additional_help(&self) -> String {
let out = match self { let out = match self {
LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(), LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(), LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
@ -24,6 +26,7 @@ impl AdditionalHelp for LexingError {
LexingError::UnknownCharacter(char) => { LexingError::UnknownCharacter(char) => {
format!("This char: `{char}`; is not a valid token") format!("This char: `{char}`; is not a valid token")
}, },
LexingError::ExpectedComment => "The '/' started comment parsing, but I could not find a matching '/'".to_owned(),
}; };
out out
} }

View File

@ -28,6 +28,18 @@ impl TokenStream {
tokens.push(tok); tokens.push(tok);
} }
// filter out comments
let tokens = tokens
.into_iter()
.filter(|token| {
if let TokenKind::Comment(_) = token.kind {
false
} else {
true
}
})
.collect();
Ok(Self { Ok(Self {
tokens, tokens,
original_file: src.to_owned(), original_file: src.to_owned(),
@ -40,8 +52,8 @@ impl TokenStream {
} }
/// Get a reference to the uppermost token, without modifying the token list /// Get a reference to the uppermost token, without modifying the token list
pub fn peek(&self) -> &Token { pub fn peek(&self) -> Option<&Token> {
self.tokens.last().expect("This should not be emtpy") self.tokens.last()
} }
/// Remove to the uppermost token /// Remove to the uppermost token
@ -80,6 +92,15 @@ pub struct TokenSpan {
pub end: usize, pub end: usize,
} }
impl TokenSpan {
pub fn from_range(start: TokenSpan, end: TokenSpan) -> Self {
Self {
start: start.start,
end: end.end,
}
}
}
/// A Token /// A Token
#[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone)] #[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone)]
pub struct Token { pub struct Token {
@ -123,6 +144,10 @@ pub enum TokenKind {
ParenClose, ParenClose,
SquareOpen, SquareOpen,
SquareClose, SquareClose,
DocComment(String),
Comment(String),
/// This is not a real TokenKind, but only used for error handling /// This is not a real TokenKind, but only used for error handling
#[default] #[default]
Dummy, Dummy,
@ -135,6 +160,16 @@ impl TokenKind {
return true; return true;
} }
} }
if let TokenKind::Comment(_) = self {
if let TokenKind::Comment(_) = other {
return true;
}
}
if let TokenKind::DocComment(_) = self {
if let TokenKind::DocComment(_) = other {
return true;
}
}
self == other self == other
} }
} }
@ -161,6 +196,8 @@ impl Display for TokenKind {
TokenKind::Dummy => f.write_str("DUMMY"), TokenKind::Dummy => f.write_str("DUMMY"),
TokenKind::SquareOpen => f.write_str("SQUAREOPEN"), TokenKind::SquareOpen => f.write_str("SQUAREOPEN"),
TokenKind::SquareClose => f.write_str("SQUARECLOSE"), TokenKind::SquareClose => f.write_str("SQUARECLOSE"),
TokenKind::DocComment(text) => write!(f, "DOCCOMMENT({})", text),
TokenKind::Comment(text) => write!(f, "COMMENT({})", text),
} }
} }
} }
@ -234,10 +271,13 @@ macro_rules! token {
[struct] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#struct) }; [struct] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#struct) };
[enum] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#enum) }; [enum] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#enum) };
// This is only works for checking for a identifier // This is only works for checking for a identifier or comment
// see the `same_kind` method on TokenKind // see the `same_kind` method on TokenKind
[Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; [Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
[Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) }; [Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
[DocComment] => { $crate::lexing::TokenKind::DocComment("".to_owned()) };
[DocCommentMatch] => { $crate::lexing::TokenKind::DocComment(_doc_comment) };
[Comment] => { $crate::lexing::TokenKind::Comment("".to_owned()) };
} }
#[cfg(test)] #[cfg(test)]

View File

@ -62,7 +62,9 @@ impl<'a> Tokenizer<'a> {
',' => (TokenKind::Comma, 1), ',' => (TokenKind::Comma, 1),
'<' => (TokenKind::SquareOpen, 1), '<' => (TokenKind::SquareOpen, 1),
'>' => (TokenKind::SquareClose, 1), '>' => (TokenKind::SquareClose, 1),
'-' => tokenize_arrow(self.remaining_text)?, '-' => tokenize_arrow(self.remaining_text)?,
'/' => tokenize_comment(self.remaining_text)?,
// can't use a OR (`|`) here, as the guard takes precedence // can't use a OR (`|`) here, as the guard takes precedence
c if c.is_alphabetic() => tokenize_ident(self.remaining_text)?, c if c.is_alphabetic() => tokenize_ident(self.remaining_text)?,
@ -74,17 +76,17 @@ impl<'a> Tokenizer<'a> {
Ok((tok, length)) Ok((tok, length))
} }
/// Skip past any whitespace characters or comments.
fn skip_ignored_tokens(&mut self) { fn skip_ignored_tokens(&mut self) {
loop { loop {
let ws = self.skip_whitespace(); let ws = self.skip_whitespace();
let comments = self.skip_comments(); let comments = self.skip_block_comment();
if ws + comments == 0 { if ws + comments == 0 {
return; return;
} }
} }
} }
/// Skip past any whitespace characters
fn skip_whitespace(&mut self) -> usize { fn skip_whitespace(&mut self) -> usize {
let mut remaining = self.remaining_text; let mut remaining = self.remaining_text;
@ -102,21 +104,21 @@ impl<'a> Tokenizer<'a> {
self.chomp(skip); self.chomp(skip);
skip skip
} }
fn skip_block_comment(&mut self) -> usize {
let pairs = [("/*", "*/")];
fn skip_comments(&mut self) -> usize { let src = self.remaining_text;
let remaining = self.remaining_text;
let pairs = [("//", "\n"), ("/*", "*/")];
let mut skip = 0;
for &(pattern, matcher) in &pairs { for &(pattern, matcher) in &pairs {
if remaining.starts_with(pattern) { if src.starts_with(pattern) {
let leftovers = skip_until(remaining, matcher); let leftovers = skip_until(src, matcher);
skip = remaining.len() - leftovers.len(); let skip = src.len() - leftovers.len();
break; self.chomp(skip);
return skip;
} }
} }
self.chomp(skip);
skip 0
} }
fn chomp(&mut self, chars_to_chomp: usize) { fn chomp(&mut self, chars_to_chomp: usize) {
@ -125,6 +127,36 @@ impl<'a> Tokenizer<'a> {
} }
} }
fn tokenize_comment(text: &str) -> Result<(TokenKind, usize), LexingError> {
// every token starts with two slashes
let slashes: &str = &text[..2];
if slashes != "//" {
Err(LexingError::ExpectedComment)
} else {
let text: &str = &text[2..];
if let Some('/') = text.chars().next() {
let text = &text[1..];
let (doc_comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
// trim whitespace
let doc_comment = doc_comment.trim_start();
let doc_comment = doc_comment.trim_end();
return Ok((
TokenKind::DocComment(doc_comment.to_owned()),
chars_read + 3,
));
}
let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
// trim whitespace
let comment = comment.trim_start();
let comment = comment.trim_end();
return Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2));
}
}
fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> { fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> {
let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?; let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?;

View File

@ -3,14 +3,14 @@ use std::mem;
use crate::{ use crate::{
command_spec::{ command_spec::{
checked::{ checked::{
CommandSpec, Enumeration, Function, Identifier, NamedType, Namespace, Structure, Type, CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, NamedType,
BASE_TYPES, Namespace, Structure, Type, BASE_TYPES,
}, },
unchecked::{ unchecked::{
CommandSpec as UncheckedCommandSpec, Enumeration as UncheckedEnumeration, CommandSpec as UncheckedCommandSpec, DocNamedType as UncheckedDocNamedType,
Function as UncheckedFunction, NamedType as UncheckedNamedType, Enumeration as UncheckedEnumeration, Function as UncheckedFunction,
Namespace as UncheckedNamespace, Structure as UncheckedStructure, NamedType as UncheckedNamedType, Namespace as UncheckedNamespace,
Type as UncheckedType, Structure as UncheckedStructure, Type as UncheckedType,
}, },
}, },
error::ErrorContext, error::ErrorContext,
@ -66,6 +66,12 @@ impl UncheckedCommandSpec {
} }
} }
macro_rules! pass_attrs_along {
($name:ident) => {
$name.attributes.into_iter().map(|a| a.into()).collect()
};
}
impl Parser { impl Parser {
fn parse(mut self) -> Result<CommandSpec, SpannedParsingError> { fn parse(mut self) -> Result<CommandSpec, SpannedParsingError> {
let namespace: UncheckedNamespace = let namespace: UncheckedNamespace =
@ -129,6 +135,7 @@ impl Parser {
structures, structures,
enumerations, enumerations,
namespaces, namespaces,
attributes: pass_attrs_along!(namespace),
}) })
} }
@ -151,6 +158,7 @@ impl Parser {
identifier, identifier,
inputs, inputs,
output, output,
attributes: pass_attrs_along!(function),
}) })
} }
@ -164,10 +172,20 @@ impl Parser {
let mut states = vec![]; let mut states = vec![];
for mut state in enumeration.states { for mut state in enumeration.states {
states.push(mem::take(&mut state.kind).into()) states.push({
let ident: Identifier = mem::take(&mut state.token.kind).into();
DocIdentifier {
name: ident.name,
attributes: pass_attrs_along!(state),
}
})
} }
Ok(Enumeration { identifier, states }) Ok(Enumeration {
identifier,
states,
attributes: pass_attrs_along!(enumeration),
})
} }
fn process_structure( fn process_structure(
@ -179,12 +197,13 @@ impl Parser {
let identifier: Identifier = mem::take(&mut structure.identifier.kind).into(); let identifier: Identifier = mem::take(&mut structure.identifier.kind).into();
let mut contents = vec![]; let mut contents = vec![];
for named_type in structure.contents { for named_type in structure.contents {
contents.push(self.process_named_type(named_type)?); contents.push(self.process_doc_named_type(named_type)?);
} }
Ok(Structure { Ok(Structure {
identifier, identifier,
contents, contents,
attributes: pass_attrs_along!(structure),
}) })
} }
@ -196,6 +215,18 @@ impl Parser {
let r#type: Type = self.process_type(named_type.r#type)?; let r#type: Type = self.process_type(named_type.r#type)?;
Ok(NamedType { name, r#type }) Ok(NamedType { name, r#type })
} }
fn process_doc_named_type(
&mut self,
mut doc_named_type: UncheckedDocNamedType,
) -> Result<DocNamedType, ParsingError> {
let name: Identifier = mem::take(&mut doc_named_type.name.kind).into();
let r#type: Type = self.process_type(doc_named_type.r#type)?;
Ok(DocNamedType {
name,
r#type,
attributes: pass_attrs_along!(doc_named_type),
})
}
fn process_type(&mut self, mut r#type: UncheckedType) -> Result<Type, ParsingError> { fn process_type(&mut self, mut r#type: UncheckedType) -> Result<Type, ParsingError> {
let identifier: Identifier = mem::take(&mut r#type.identifier.kind).into(); let identifier: Identifier = mem::take(&mut r#type.identifier.kind).into();

View File

@ -1,8 +1,11 @@
use crate::command_spec::checked::{ use crate::command_spec::checked::{
CommandSpec, Enumeration, Function, Identifier, NamedType, Namespace, Structure, Type, Attribute, CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier,
NamedType, Namespace, Structure, Type,
}; };
use crate::lexing::TokenStream; use crate::lexing::TokenStream;
use pretty_assertions::assert_eq;
#[test] #[test]
fn test_full() { fn test_full() {
let input = "nasp trinitrix { let input = "nasp trinitrix {
@ -57,13 +60,14 @@ fn test_full() {
}, },
], ],
output: None, output: None,
attributes: vec![],
}], }],
structures: vec![Structure { structures: vec![Structure {
identifier: Identifier { identifier: Identifier {
name: "Callback".to_owned(), name: "Callback".to_owned(),
}, },
contents: vec![ contents: vec![
NamedType { DocNamedType {
name: Identifier { name: Identifier {
name: "func".to_owned(), name: "func".to_owned(),
}, },
@ -73,8 +77,9 @@ fn test_full() {
}, },
generic_args: vec![], generic_args: vec![],
}, },
attributes: vec![],
}, },
NamedType { DocNamedType {
name: Identifier { name: Identifier {
name: "timeout".to_owned(), name: "timeout".to_owned(),
}, },
@ -84,26 +89,33 @@ fn test_full() {
}, },
generic_args: vec![], generic_args: vec![],
}, },
attributes: vec![],
}, },
], ],
attributes: vec![],
}], }],
enumerations: vec![Enumeration { enumerations: vec![Enumeration {
identifier: Identifier { identifier: Identifier {
name: "CallbackPriority".to_owned(), name: "CallbackPriority".to_owned(),
}, },
states: vec![ states: vec![
Identifier { DocIdentifier {
name: "High".to_owned(), name: "High".to_owned(),
attributes: vec![],
}, },
Identifier { DocIdentifier {
name: "Medium".to_owned(), name: "Medium".to_owned(),
attributes: vec![],
}, },
Identifier { DocIdentifier {
name: "Low".to_owned(), name: "Low".to_owned(),
attributes: vec![],
}, },
], ],
attributes: vec![],
}], }],
namespaces: vec![], namespaces: vec![],
attributes: vec![],
}], }],
}; };
assert_eq!(output, expected); assert_eq!(output, expected);
@ -132,3 +144,72 @@ fn execute_callback(callback: Name);
_ => panic!("Wrong error in test!"), _ => panic!("Wrong error in test!"),
}; };
} }
#[test]
fn test_comments() {
let input = "fn print(message: String);
/// First doc comment
// Some more text
nasp trinitrix {
/// Second doc comment
fn hi(name: String) -> String;
}
";
let output = TokenStream::lex(&input).unwrap().parse().unwrap();
let expected = CommandSpec {
structures: vec![],
enumerations: vec![],
functions: vec![Function {
identifier: Identifier {
name: "print".to_owned(),
},
inputs: vec![NamedType {
name: Identifier {
name: "message".to_owned(),
},
r#type: Type {
identifier: Identifier {
name: "String".to_owned(),
},
generic_args: vec![],
},
}],
output: None,
attributes: vec![],
}],
namespaces: vec![Namespace {
name: Identifier {
name: "trinitrix".to_owned(),
},
functions: vec![Function {
identifier: Identifier {
name: "hi".to_owned(),
},
inputs: vec![NamedType {
name: Identifier {
name: "name".to_owned(),
},
r#type: Type {
identifier: Identifier {
name: "String".to_owned(),
},
generic_args: vec![],
},
}],
output: Some(Type {
identifier: Identifier {
name: "String".to_owned(),
},
generic_args: vec![],
}),
attributes: vec![Attribute::doc("Second doc comment".to_owned())],
}],
structures: vec![],
enumerations: vec![],
namespaces: vec![],
attributes: vec![Attribute::doc("First doc comment".to_owned())],
}],
};
assert_eq!(output, expected);
}

View File

@ -2,37 +2,47 @@ use std::{error::Error, fmt::Display};
use thiserror::Error; use thiserror::Error;
use crate::{ use crate::{
command_spec::unchecked::Attribute,
error::{AdditionalHelp, ErrorContext, ErrorContextDisplay}, error::{AdditionalHelp, ErrorContext, ErrorContextDisplay},
lexing::{TokenKind, TokenSpan}, lexing::{TokenKind, TokenSpan},
}; };
#[derive(Error, Debug, Clone)] #[derive(Error, Debug, Clone)]
pub enum ParsingError { pub enum ParsingError {
#[error("Expected '{expected}' but received '{actual}'")] #[error("Expected '{expected}', but received: '{actual}'")]
ExpectedDifferentToken { ExpectedDifferentToken {
expected: TokenKind, expected: TokenKind,
actual: TokenKind, actual: TokenKind,
span: TokenSpan, span: TokenSpan,
}, },
#[error("Expected '{expected}', but the token stream stopped")]
UnexpectedEOF {
expected: TokenKind,
span: TokenSpan,
},
#[error("Expected a Keyword to start a new declaration, but found: '{actual}'")] #[error("Expected a Keyword to start a new declaration, but found: '{actual}'")]
ExpectedKeyword { actual: TokenKind, span: TokenSpan }, ExpectedKeyword { actual: TokenKind, span: TokenSpan },
#[error("DocComment does not have target")]
TrailingDocComment {
comments: Vec<Attribute>,
span: TokenSpan,
},
} }
impl ParsingError { impl ParsingError {
pub fn span(&self) -> &TokenSpan { pub fn span(&self) -> &TokenSpan {
match self { match self {
ParsingError::ExpectedDifferentToken { span, .. } => span, ParsingError::ExpectedDifferentToken { span, .. } => span,
ParsingError::ExpectedKeyword { span, .. } => span, ParsingError::ExpectedKeyword { span, .. } => span,
ParsingError::TrailingDocComment { span, .. } => span,
ParsingError::UnexpectedEOF { span, .. } => span,
} }
} }
}
impl ParsingError {
pub fn get_span(&self) -> TokenSpan { pub fn get_span(&self) -> TokenSpan {
match self { *self.span()
ParsingError::ExpectedDifferentToken { span, .. } => *span,
ParsingError::ExpectedKeyword { span, .. } => *span,
}
} }
} }
@ -50,6 +60,8 @@ impl AdditionalHelp for ParsingError {
ParsingError::ExpectedKeyword { actual, .. } => format!( ParsingError::ExpectedKeyword { actual, .. } => format!(
"I expected a keyword (that is something like 'fn' or 'nasp') but you put a '{}' there!", "I expected a keyword (that is something like 'fn' or 'nasp') but you put a '{}' there!",
actual), actual),
ParsingError::TrailingDocComment { .. } => "I expected some target (a function, namespace, enum, or something like this) which this doc comment annotates, but you put nothing there".to_owned(),
ParsingError::UnexpectedEOF { expected, .. } => format!("Put the expected token ('{expected}') here."),
} }
} }
} }

View File

@ -1,9 +1,12 @@
use std::mem;
use crate::{ use crate::{
command_spec::unchecked::{ command_spec::unchecked::{
CommandSpec, Declaration, Enumeration, Function, NamedType, Namespace, Structure, Type, Attribute, CommandSpec, Declaration, DocNamedType, DocToken, Enumeration, Function,
NamedType, Namespace, Structure, Type,
}, },
error::ErrorContext, error::ErrorContext,
lexing::{Token, TokenKind, TokenStream}, lexing::{Token, TokenKind, TokenSpan, TokenStream},
token, token,
}; };
@ -22,12 +25,18 @@ impl TokenStream {
pub(super) struct Parser { pub(super) struct Parser {
token_stream: TokenStream, token_stream: TokenStream,
active_doc_comments: Vec<Attribute>,
last_span: TokenSpan,
} }
impl Parser { impl Parser {
fn new(mut token_stream: TokenStream) -> Self { fn new(mut token_stream: TokenStream) -> Self {
token_stream.reverse(); token_stream.reverse();
Self { token_stream } Self {
token_stream,
active_doc_comments: vec![],
last_span: TokenSpan::default(),
}
} }
fn parse(&mut self) -> Result<CommandSpec, SpannedParsingError> { fn parse(&mut self) -> Result<CommandSpec, SpannedParsingError> {
@ -52,15 +61,55 @@ impl Parser {
} }
fn parse_next(&mut self) -> Result<Declaration, ParsingError> { fn parse_next(&mut self) -> Result<Declaration, ParsingError> {
match self.peek().kind() { // Use of [peek_raw] here is fine, as we know that the function is only called, when
// something should still be contained in the token stream
match self.peek_raw().kind() {
token![nasp] => Ok(Declaration::Namespace(self.parse_namespace()?)), token![nasp] => Ok(Declaration::Namespace(self.parse_namespace()?)),
token![fn] => Ok(Declaration::Function(self.parse_function()?)), token![fn] => Ok(Declaration::Function(self.parse_function()?)),
token![struct] => Ok(Declaration::Structure(self.parse_structure()?)), token![struct] => Ok(Declaration::Structure(self.parse_structure()?)),
token![enum] => Ok(Declaration::Enumeration(self.parse_enumeration()?)), token![enum] => Ok(Declaration::Enumeration(self.parse_enumeration()?)),
token![DocCommentMatch] => {
while self.expect_peek(token![DocComment]) {
let comment_to_push = {
let doc_comment = self.expect(token![DocComment])?;
let span = *doc_comment.span();
let name = if let TokenKind::DocComment(content) = doc_comment.kind {
content
} else {
unreachable!("The expect should have accounted for that case");
};
Attribute::doc {
content: name,
span,
}
};
self.active_doc_comments.push(comment_to_push);
}
if self.token_stream.is_empty() {
fn get_span(attr: Option<&Attribute>) -> TokenSpan {
match attr.expect("Something should be here") {
Attribute::doc { span, .. } => *span,
}
}
let span = TokenSpan::from_range(
get_span(self.active_doc_comments.first()),
get_span(self.active_doc_comments.last()),
);
Err(ParsingError::TrailingDocComment {
comments: mem::take(&mut self.active_doc_comments),
span,
})
} else {
self.parse_next()
}
}
_ => { _ => {
let err = ParsingError::ExpectedKeyword { let err = ParsingError::ExpectedKeyword {
span: *self.peek().span(), span: *self.peek_raw().span(),
actual: self.peek().kind().clone(), actual: self.peek_raw().kind().clone(),
}; };
return Err(err); return Err(err);
@ -88,11 +137,34 @@ impl Parser {
}) })
} }
fn parse_doc_comments(&mut self) -> Result<Vec<Attribute>, ParsingError> {
let mut attrs = mem::take(&mut self.active_doc_comments);
while self.expect_peek(token![DocComment]) {
attrs.push({
let doc_comment = self.expect(token![DocComment])?;
let span = *doc_comment.span();
let name = if let TokenKind::DocComment(content) = doc_comment.kind {
content
} else {
unreachable!("The expect should have accounted for that case");
};
Attribute::doc {
content: name,
span,
}
});
}
Ok(attrs)
}
fn parse_namespace(&mut self) -> Result<Namespace, ParsingError> { fn parse_namespace(&mut self) -> Result<Namespace, ParsingError> {
let attributes = self.parse_doc_comments()?;
self.expect(token![nasp])?; self.expect(token![nasp])?;
let mut namespace = Namespace::default(); let mut namespace = Namespace::default();
namespace.name = self.expect(token![Ident])?; namespace.name = self.expect(token![Ident])?;
namespace.attributes = attributes;
self.expect(token![BraceOpen])?; self.expect(token![BraceOpen])?;
@ -113,40 +185,54 @@ impl Parser {
} }
fn parse_enumeration(&mut self) -> Result<Enumeration, ParsingError> { fn parse_enumeration(&mut self) -> Result<Enumeration, ParsingError> {
let attributes = self.parse_doc_comments()?;
self.expect(token![enum])?; self.expect(token![enum])?;
let identifier = self.expect(token![Ident])?; let identifier = self.expect(token![Ident])?;
self.expect(token![BraceOpen])?; self.expect(token![BraceOpen])?;
let mut states = vec![]; let mut states = vec![];
if self.expect_peek(token![Ident]) { if self.expect_peek(token![Ident]) {
states.push(self.expect(token![Ident])?); let attributes = self.parse_doc_comments()?;
states.push(DocToken {
token: self.expect(token![Ident])?,
attributes,
});
} }
while self.expect_peek(token![Comma]) { while self.expect_peek(token![Comma]) {
self.expect(token![Comma])?; self.expect(token![Comma])?;
if self.expect_peek(token![Ident]) { if self.expect_peek(token![Ident]) {
states.push(self.expect(token![Ident])?); let attributes = self.parse_doc_comments()?;
states.push(DocToken {
token: self.expect(token![Ident])?,
attributes,
});
} else { } else {
break; break;
} }
} }
self.expect(token![BraceClose])?; self.expect(token![BraceClose])?;
self.expect(token![;])?; self.expect(token![;])?;
Ok(Enumeration { identifier, states }) Ok(Enumeration {
identifier,
states,
attributes,
})
} }
fn parse_structure(&mut self) -> Result<Structure, ParsingError> { fn parse_structure(&mut self) -> Result<Structure, ParsingError> {
let attributes = self.parse_doc_comments()?;
self.expect(token![struct])?; self.expect(token![struct])?;
let name = self.expect(token![Ident])?; let name = self.expect(token![Ident])?;
self.expect(token![BraceOpen])?; self.expect(token![BraceOpen])?;
let mut contents = vec![]; let mut contents = vec![];
if self.expect_peek(token![Ident]) { if self.expect_peek(token![Ident]) {
contents.push(self.parse_named_type()?); contents.push(self.parse_doc_named_type()?);
} }
while self.expect_peek(token![Comma]) { while self.expect_peek(token![Comma]) {
self.expect(token![Comma])?; self.expect(token![Comma])?;
if self.expect_peek(token![Ident]) { if self.expect_peek(token![Ident]) {
contents.push(self.parse_named_type()?); contents.push(self.parse_doc_named_type()?);
} else { } else {
break; break;
} }
@ -157,6 +243,7 @@ impl Parser {
Ok(Structure { Ok(Structure {
identifier: name, identifier: name,
contents, contents,
attributes,
}) })
} }
@ -167,7 +254,20 @@ impl Parser {
Ok(NamedType { name, r#type }) Ok(NamedType { name, r#type })
} }
fn parse_doc_named_type(&mut self) -> Result<DocNamedType, ParsingError> {
let attributes = self.parse_doc_comments()?;
let name = self.expect(token![Ident])?;
self.expect(token![Colon])?;
let r#type = self.parse_type()?;
Ok(DocNamedType {
name,
r#type,
attributes,
})
}
fn parse_function(&mut self) -> Result<Function, ParsingError> { fn parse_function(&mut self) -> Result<Function, ParsingError> {
let attributes = self.parse_doc_comments()?;
self.expect(token![fn])?; self.expect(token![fn])?;
let name = self.expect(token![Ident])?; let name = self.expect(token![Ident])?;
self.expect(token![ParenOpen])?; self.expect(token![ParenOpen])?;
@ -192,6 +292,7 @@ impl Parser {
identifier: name, identifier: name,
inputs, inputs,
output: output_type, output: output_type,
attributes,
}) })
} }
@ -216,7 +317,14 @@ impl Parser {
/// ``` /// ```
/// ///
pub(super) fn expect(&mut self, token: TokenKind) -> Result<Token, ParsingError> { pub(super) fn expect(&mut self, token: TokenKind) -> Result<Token, ParsingError> {
let actual_token = self.peek(); let actual_token = if let Some(token) = self.peek() {
token
} else {
return Err(ParsingError::UnexpectedEOF {
expected: token,
span: self.last_span,
});
};
if actual_token.kind().same_kind(&token) { if actual_token.kind().same_kind(&token) {
Ok(self.pop()) Ok(self.pop())
} else { } else {
@ -233,7 +341,10 @@ impl Parser {
/// Check if the next token is of the specified TokenKind. /// Check if the next token is of the specified TokenKind.
/// Does not alter the token_stream /// Does not alter the token_stream
fn expect_peek(&self, token: TokenKind) -> bool { fn expect_peek(&self, token: TokenKind) -> bool {
let actual_token = self.peek(); let actual_token = match self.peek() {
Some(ok) => ok,
None => return false,
};
if actual_token.kind().same_kind(&token) { if actual_token.kind().same_kind(&token) {
true true
} else { } else {
@ -242,12 +353,22 @@ impl Parser {
} }
/// Looks at the next token without removing it /// Looks at the next token without removing it
fn peek(&self) -> &Token { fn peek(&self) -> Option<&Token> {
self.token_stream.peek() self.token_stream.peek()
} }
/// Looks at the next token without removing it.
/// Unwraps the option returned from [peek], only use it, if you know that a token must exist
fn peek_raw(&self) -> &Token {
self.token_stream.peek().expect("The token should exist")
}
/// Removes the next token /// Removes the next token
fn pop(&mut self) -> Token { fn pop(&mut self) -> Token {
self.last_span = *self
.peek()
.expect("Calling pop should mean, that a token was first peeked for")
.span();
self.token_stream.pop() self.token_stream.pop()
} }
} }

View File

@ -20,8 +20,8 @@ nasp trinitrix { {}
let parsed = TokenStream::lex(input).unwrap().parse_unchecked(); let parsed = TokenStream::lex(input).unwrap().parse_unchecked();
let err = parsed.unwrap_err().source; let err = parsed.unwrap_err().source;
match err { match err {
ParsingError::ExpectedDifferentToken { .. } => panic!("Wrong error"),
ParsingError::ExpectedKeyword { .. } => {} ParsingError::ExpectedKeyword { .. } => {}
_ => panic!("Wrong error"),
} }
} }
@ -56,6 +56,7 @@ nasp trinitrix {
}, },
}], }],
output: None, output: None,
attributes: vec![],
}], }],
namespaces: vec![Namespace { namespaces: vec![Namespace {
name: Token { name: Token {
@ -87,10 +88,12 @@ nasp trinitrix {
}, },
generic_args: vec![], generic_args: vec![],
}), }),
attributes: vec![],
}], }],
structures: vec![], structures: vec![],
enumerations: vec![], enumerations: vec![],
namespaces: vec![], namespaces: vec![],
attributes: vec![],
}], }],
}; };