forked from trinitrix/core
1
0
Fork 0

feat(trixy-lang_parser): Add support for doc comments

Parsing right now works by simply comparing the input string:
```
"/" -> <comment_tokenizer> -> "/" -> <normal_comment>
                          |-> "//" -> <doc_comment>
```

A better method to do this though would be to turn "//" and "///" into
keywords and simply leave the parsing to the parser module not the
tokenizer.
This commit is contained in:
Benedikt Peetz 2023-12-22 20:32:43 +01:00
parent 3503e5250c
commit 70c4cc6f18
Signed by: bpeetz
GPG Key ID: A5E94010C3A642AD
16 changed files with 487 additions and 70 deletions

View File

@ -6,15 +6,27 @@
# - Block comments (`/* */`).
# *)
CommandSpec = { Function | Namespace | Enumeration | Structure } ;
Function = "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ;
Namespace = "nasp" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ;
Structure = "struct" Identifier "{" [NamedType {"," NamedType } [","]] "}" ";";
Enumeration = "enum" Identifier "{" [Identifier {"," Identifier} [","]] "}" ";";
Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
NamedType = Identifier ":" Type;
CommandSpec = {Function | Namespace | Enumeration | Structure } ;
Function = {DocComment} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ;
Namespace = {DocComment} "nasp" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ;
Structure = {DocComment} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}" ";";
Enumeration = {DocComment} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}" ";";
Type = Identifier ["<" Type {"," Type} ">"];
Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
DocIdentifier = {DocComment} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
NamedType = Identifier ":" Type;
DocNamedType = {DocComment} Identifier ":" Type;
DocComment = "///" {ANYTHING} LineEnding;
Comment = "//" [ NOT ("/" {ANYTHING} LineEnding) | "//"] {ANYTHING} LineEnding;
LineEnding = "\\n" | "\\r" | "\\r\\n";
# (*
# vim: ft=ebnf
# *)

View File

@ -0,0 +1,12 @@
fn print(message: String);
/// First doc comment
// Some more text
nasp trinitrix {
/// Second doc comment
fn hi(name: String) -> String;
}
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
// vim: syntax=rust

View File

@ -0,0 +1,13 @@
fn print(message: CommandTransferValue);
/// Some doc comment
// Some more text
nasp trinitrix {
fn hi(name: String) -> String;
}
/// Trailing doc comment (I will fail)
// That's a flat out lie, but it results in a rather nice syntax highlight compared to nothing:
// vim: syntax=rust

View File

@ -2,8 +2,8 @@
ebnf2pdf "./docs/grammar.ebnf"
mv out.pdf ./docs/grammar.pdf
ebnf2pdf make "./docs/grammar.ebnf"
mv grammar.ebnf.pdf ./docs/grammar.pdf
# vim: ft=sh

View File

@ -4,6 +4,8 @@ use std::fmt::Display;
use crate::lexing::TokenKind;
use super::unchecked;
/// These are the "primitive" types used in trixy, you can use any of them to create new structures
pub const BASE_TYPES: [ConstIdentifier; 8] = [
Identifier::from("Integer"),
@ -24,6 +26,7 @@ pub struct Namespace {
pub structures: Vec<Structure>,
pub enumerations: Vec<Enumeration>,
pub namespaces: Vec<Namespace>,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
@ -48,13 +51,15 @@ impl From<Namespace> for CommandSpec {
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Structure {
pub identifier: Identifier,
pub contents: Vec<NamedType>,
pub contents: Vec<DocNamedType>,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct Enumeration {
pub identifier: Identifier,
pub states: Vec<Identifier>,
pub states: Vec<DocIdentifier>,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
@ -62,6 +67,7 @@ pub struct Function {
pub identifier: Identifier,
pub inputs: Vec<NamedType>,
pub output: Option<Type>,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
@ -76,6 +82,13 @@ pub struct NamedType {
pub r#type: Type,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct DocNamedType {
pub name: Identifier,
pub r#type: Type,
pub attributes: Vec<Attribute>,
}
impl From<TokenKind> for Identifier {
fn from(value: TokenKind) -> Self {
match value {
@ -92,6 +105,19 @@ impl From<TokenKind> for Identifier {
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum Attribute {
#[allow(non_camel_case_types)]
doc(String),
}
impl From<unchecked::Attribute> for Attribute {
fn from(value: unchecked::Attribute) -> Self {
match value {
unchecked::Attribute::doc { content: name, .. } => Self::doc(name),
}
}
}
/// An Identifier
/// These include
/// - Variable names
@ -103,6 +129,12 @@ pub struct Identifier {
pub name: String,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct DocIdentifier {
pub name: String,
pub attributes: Vec<Attribute>,
}
/// A const version of [Identifier]
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
pub struct ConstIdentifier {

View File

@ -4,7 +4,7 @@
use std::fmt::{Display, Write};
use crate::lexing::Token;
use crate::lexing::{Token, TokenSpan};
#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub struct CommandSpec {
@ -22,6 +22,7 @@ impl From<CommandSpec> for Namespace {
structures: value.structures,
enumerations: value.enumerations,
namespaces: value.namespaces,
attributes: vec![],
}
}
}
@ -34,6 +35,8 @@ pub struct Namespace {
pub structures: Vec<Structure>,
pub enumerations: Vec<Enumeration>,
pub namespaces: Vec<Namespace>,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
@ -44,23 +47,45 @@ pub enum Declaration {
Namespace(Namespace),
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum Attribute {
#[allow(non_camel_case_types)]
doc{content: String, span: TokenSpan},
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct Function {
pub identifier: Token, // Will later become an Identifier
pub inputs: Vec<NamedType>,
pub output: Option<Type>,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct Structure {
pub identifier: Token, // Will later become an Identifier
pub contents: Vec<NamedType>,
pub contents: Vec<DocNamedType>,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct Enumeration {
pub identifier: Token, // Will later become an Identifier
pub states: Vec<Token>, // Will later become an Identifier
pub identifier: Token, // Will later become an Identifier
pub states: Vec<DocToken>, // Will later become an Identifier
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct DocToken {
pub token: Token,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct DocNamedType {
pub name: Token, // Will later become an Identifier
pub r#type: Type,
pub attributes: Vec<Attribute>,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]

View File

@ -55,7 +55,7 @@ impl ErrorContext {
};
let line_above;
if line_number == 0 {
if line_number == 1 {
// We only have one line, so no line above
line_above = "".to_owned();
} else {

View File

@ -13,10 +13,12 @@ pub enum LexingError {
UnknownCharacter(char),
#[error("The Arrow token must be of the form: ->")]
ExpectedArrow,
#[error("The Comment token must start with two slashes")]
ExpectedComment,
}
impl AdditionalHelp for LexingError {
fn additional_help(& self) -> String {
fn additional_help(&self) -> String {
let out = match self {
LexingError::NoMatchesTaken => "This token does not produce a possible match".to_owned(),
LexingError::UnexpectedEOF => "This eof was completely unexpected".to_owned(),
@ -24,6 +26,7 @@ impl AdditionalHelp for LexingError {
LexingError::UnknownCharacter(char) => {
format!("This char: `{char}`; is not a valid token")
},
LexingError::ExpectedComment => "The '/' started comment parsing, but I could not find a matching '/'".to_owned(),
};
out
}

View File

@ -28,6 +28,18 @@ impl TokenStream {
tokens.push(tok);
}
// filter out comments
let tokens = tokens
.into_iter()
.filter(|token| {
if let TokenKind::Comment(_) = token.kind {
false
} else {
true
}
})
.collect();
Ok(Self {
tokens,
original_file: src.to_owned(),
@ -40,8 +52,8 @@ impl TokenStream {
}
/// Get a reference to the uppermost token, without modifying the token list
pub fn peek(&self) -> &Token {
self.tokens.last().expect("This should not be emtpy")
pub fn peek(&self) -> Option<&Token> {
self.tokens.last()
}
/// Remove to the uppermost token
@ -80,6 +92,15 @@ pub struct TokenSpan {
pub end: usize,
}
impl TokenSpan {
pub fn from_range(start: TokenSpan, end: TokenSpan) -> Self {
Self {
start: start.start,
end: end.end,
}
}
}
/// A Token
#[derive(Debug, Default, PartialEq, PartialOrd, Ord, Eq, Clone)]
pub struct Token {
@ -123,6 +144,10 @@ pub enum TokenKind {
ParenClose,
SquareOpen,
SquareClose,
DocComment(String),
Comment(String),
/// This is not a real TokenKind, but only used for error handling
#[default]
Dummy,
@ -135,6 +160,16 @@ impl TokenKind {
return true;
}
}
if let TokenKind::Comment(_) = self {
if let TokenKind::Comment(_) = other {
return true;
}
}
if let TokenKind::DocComment(_) = self {
if let TokenKind::DocComment(_) = other {
return true;
}
}
self == other
}
}
@ -161,6 +196,8 @@ impl Display for TokenKind {
TokenKind::Dummy => f.write_str("DUMMY"),
TokenKind::SquareOpen => f.write_str("SQUAREOPEN"),
TokenKind::SquareClose => f.write_str("SQUARECLOSE"),
TokenKind::DocComment(text) => write!(f, "DOCCOMMENT({})", text),
TokenKind::Comment(text) => write!(f, "COMMENT({})", text),
}
}
}
@ -234,10 +271,13 @@ macro_rules! token {
[struct] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#struct) };
[enum] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#enum) };
// This is only works for checking for a identifier
// This is only works for checking for a identifier or comment
// see the `same_kind` method on TokenKind
[Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
[Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
[DocComment] => { $crate::lexing::TokenKind::DocComment("".to_owned()) };
[DocCommentMatch] => { $crate::lexing::TokenKind::DocComment(_doc_comment) };
[Comment] => { $crate::lexing::TokenKind::Comment("".to_owned()) };
}
#[cfg(test)]

View File

@ -62,7 +62,9 @@ impl<'a> Tokenizer<'a> {
',' => (TokenKind::Comma, 1),
'<' => (TokenKind::SquareOpen, 1),
'>' => (TokenKind::SquareClose, 1),
'-' => tokenize_arrow(self.remaining_text)?,
'/' => tokenize_comment(self.remaining_text)?,
// can't use a OR (`|`) here, as the guard takes precedence
c if c.is_alphabetic() => tokenize_ident(self.remaining_text)?,
@ -74,17 +76,17 @@ impl<'a> Tokenizer<'a> {
Ok((tok, length))
}
/// Skip past any whitespace characters or comments.
fn skip_ignored_tokens(&mut self) {
loop {
let ws = self.skip_whitespace();
let comments = self.skip_comments();
let comments = self.skip_block_comment();
if ws + comments == 0 {
return;
}
}
}
/// Skip past any whitespace characters
fn skip_whitespace(&mut self) -> usize {
let mut remaining = self.remaining_text;
@ -102,21 +104,21 @@ impl<'a> Tokenizer<'a> {
self.chomp(skip);
skip
}
fn skip_block_comment(&mut self) -> usize {
let pairs = [("/*", "*/")];
fn skip_comments(&mut self) -> usize {
let remaining = self.remaining_text;
let pairs = [("//", "\n"), ("/*", "*/")];
let src = self.remaining_text;
let mut skip = 0;
for &(pattern, matcher) in &pairs {
if remaining.starts_with(pattern) {
let leftovers = skip_until(remaining, matcher);
skip = remaining.len() - leftovers.len();
break;
if src.starts_with(pattern) {
let leftovers = skip_until(src, matcher);
let skip = src.len() - leftovers.len();
self.chomp(skip);
return skip;
}
}
self.chomp(skip);
skip
0
}
fn chomp(&mut self, chars_to_chomp: usize) {
@ -125,6 +127,36 @@ impl<'a> Tokenizer<'a> {
}
}
fn tokenize_comment(text: &str) -> Result<(TokenKind, usize), LexingError> {
// every token starts with two slashes
let slashes: &str = &text[..2];
if slashes != "//" {
Err(LexingError::ExpectedComment)
} else {
let text: &str = &text[2..];
if let Some('/') = text.chars().next() {
let text = &text[1..];
let (doc_comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
// trim whitespace
let doc_comment = doc_comment.trim_start();
let doc_comment = doc_comment.trim_end();
return Ok((
TokenKind::DocComment(doc_comment.to_owned()),
chars_read + 3,
));
}
let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
// trim whitespace
let comment = comment.trim_start();
let comment = comment.trim_end();
return Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2));
}
}
fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> {
let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?;

View File

@ -3,14 +3,14 @@ use std::mem;
use crate::{
command_spec::{
checked::{
CommandSpec, Enumeration, Function, Identifier, NamedType, Namespace, Structure, Type,
BASE_TYPES,
CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, NamedType,
Namespace, Structure, Type, BASE_TYPES,
},
unchecked::{
CommandSpec as UncheckedCommandSpec, Enumeration as UncheckedEnumeration,
Function as UncheckedFunction, NamedType as UncheckedNamedType,
Namespace as UncheckedNamespace, Structure as UncheckedStructure,
Type as UncheckedType,
CommandSpec as UncheckedCommandSpec, DocNamedType as UncheckedDocNamedType,
Enumeration as UncheckedEnumeration, Function as UncheckedFunction,
NamedType as UncheckedNamedType, Namespace as UncheckedNamespace,
Structure as UncheckedStructure, Type as UncheckedType,
},
},
error::ErrorContext,
@ -66,6 +66,12 @@ impl UncheckedCommandSpec {
}
}
macro_rules! pass_attrs_along {
($name:ident) => {
$name.attributes.into_iter().map(|a| a.into()).collect()
};
}
impl Parser {
fn parse(mut self) -> Result<CommandSpec, SpannedParsingError> {
let namespace: UncheckedNamespace =
@ -129,6 +135,7 @@ impl Parser {
structures,
enumerations,
namespaces,
attributes: pass_attrs_along!(namespace),
})
}
@ -151,6 +158,7 @@ impl Parser {
identifier,
inputs,
output,
attributes: pass_attrs_along!(function),
})
}
@ -164,10 +172,20 @@ impl Parser {
let mut states = vec![];
for mut state in enumeration.states {
states.push(mem::take(&mut state.kind).into())
states.push({
let ident: Identifier = mem::take(&mut state.token.kind).into();
DocIdentifier {
name: ident.name,
attributes: pass_attrs_along!(state),
}
})
}
Ok(Enumeration { identifier, states })
Ok(Enumeration {
identifier,
states,
attributes: pass_attrs_along!(enumeration),
})
}
fn process_structure(
@ -179,12 +197,13 @@ impl Parser {
let identifier: Identifier = mem::take(&mut structure.identifier.kind).into();
let mut contents = vec![];
for named_type in structure.contents {
contents.push(self.process_named_type(named_type)?);
contents.push(self.process_doc_named_type(named_type)?);
}
Ok(Structure {
identifier,
contents,
attributes: pass_attrs_along!(structure),
})
}
@ -196,6 +215,18 @@ impl Parser {
let r#type: Type = self.process_type(named_type.r#type)?;
Ok(NamedType { name, r#type })
}
fn process_doc_named_type(
&mut self,
mut doc_named_type: UncheckedDocNamedType,
) -> Result<DocNamedType, ParsingError> {
let name: Identifier = mem::take(&mut doc_named_type.name.kind).into();
let r#type: Type = self.process_type(doc_named_type.r#type)?;
Ok(DocNamedType {
name,
r#type,
attributes: pass_attrs_along!(doc_named_type),
})
}
fn process_type(&mut self, mut r#type: UncheckedType) -> Result<Type, ParsingError> {
let identifier: Identifier = mem::take(&mut r#type.identifier.kind).into();

View File

@ -1,8 +1,11 @@
use crate::command_spec::checked::{
CommandSpec, Enumeration, Function, Identifier, NamedType, Namespace, Structure, Type,
Attribute, CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier,
NamedType, Namespace, Structure, Type,
};
use crate::lexing::TokenStream;
use pretty_assertions::assert_eq;
#[test]
fn test_full() {
let input = "nasp trinitrix {
@ -57,13 +60,14 @@ fn test_full() {
},
],
output: None,
attributes: vec![],
}],
structures: vec![Structure {
identifier: Identifier {
name: "Callback".to_owned(),
},
contents: vec![
NamedType {
DocNamedType {
name: Identifier {
name: "func".to_owned(),
},
@ -73,8 +77,9 @@ fn test_full() {
},
generic_args: vec![],
},
attributes: vec![],
},
NamedType {
DocNamedType {
name: Identifier {
name: "timeout".to_owned(),
},
@ -84,26 +89,33 @@ fn test_full() {
},
generic_args: vec![],
},
attributes: vec![],
},
],
attributes: vec![],
}],
enumerations: vec![Enumeration {
identifier: Identifier {
name: "CallbackPriority".to_owned(),
},
states: vec![
Identifier {
DocIdentifier {
name: "High".to_owned(),
attributes: vec![],
},
Identifier {
DocIdentifier {
name: "Medium".to_owned(),
attributes: vec![],
},
Identifier {
DocIdentifier {
name: "Low".to_owned(),
attributes: vec![],
},
],
attributes: vec![],
}],
namespaces: vec![],
attributes: vec![],
}],
};
assert_eq!(output, expected);
@ -132,3 +144,72 @@ fn execute_callback(callback: Name);
_ => panic!("Wrong error in test!"),
};
}
#[test]
fn test_comments() {
let input = "fn print(message: String);
/// First doc comment
// Some more text
nasp trinitrix {
/// Second doc comment
fn hi(name: String) -> String;
}
";
let output = TokenStream::lex(&input).unwrap().parse().unwrap();
let expected = CommandSpec {
structures: vec![],
enumerations: vec![],
functions: vec![Function {
identifier: Identifier {
name: "print".to_owned(),
},
inputs: vec![NamedType {
name: Identifier {
name: "message".to_owned(),
},
r#type: Type {
identifier: Identifier {
name: "String".to_owned(),
},
generic_args: vec![],
},
}],
output: None,
attributes: vec![],
}],
namespaces: vec![Namespace {
name: Identifier {
name: "trinitrix".to_owned(),
},
functions: vec![Function {
identifier: Identifier {
name: "hi".to_owned(),
},
inputs: vec![NamedType {
name: Identifier {
name: "name".to_owned(),
},
r#type: Type {
identifier: Identifier {
name: "String".to_owned(),
},
generic_args: vec![],
},
}],
output: Some(Type {
identifier: Identifier {
name: "String".to_owned(),
},
generic_args: vec![],
}),
attributes: vec![Attribute::doc("Second doc comment".to_owned())],
}],
structures: vec![],
enumerations: vec![],
namespaces: vec![],
attributes: vec![Attribute::doc("First doc comment".to_owned())],
}],
};
assert_eq!(output, expected);
}

View File

@ -2,37 +2,47 @@ use std::{error::Error, fmt::Display};
use thiserror::Error;
use crate::{
command_spec::unchecked::Attribute,
error::{AdditionalHelp, ErrorContext, ErrorContextDisplay},
lexing::{TokenKind, TokenSpan},
};
#[derive(Error, Debug, Clone)]
pub enum ParsingError {
#[error("Expected '{expected}' but received '{actual}'")]
#[error("Expected '{expected}', but received: '{actual}'")]
ExpectedDifferentToken {
expected: TokenKind,
actual: TokenKind,
span: TokenSpan,
},
#[error("Expected '{expected}', but the token stream stopped")]
UnexpectedEOF {
expected: TokenKind,
span: TokenSpan,
},
#[error("Expected a Keyword to start a new declaration, but found: '{actual}'")]
ExpectedKeyword { actual: TokenKind, span: TokenSpan },
#[error("DocComment does not have target")]
TrailingDocComment {
comments: Vec<Attribute>,
span: TokenSpan,
},
}
impl ParsingError {
pub fn span(&self) -> &TokenSpan {
match self {
ParsingError::ExpectedDifferentToken { span, .. } => span,
ParsingError::ExpectedKeyword { span, .. } => span,
ParsingError::TrailingDocComment { span, .. } => span,
ParsingError::UnexpectedEOF { span, .. } => span,
}
}
}
impl ParsingError {
pub fn get_span(&self) -> TokenSpan {
match self {
ParsingError::ExpectedDifferentToken { span, .. } => *span,
ParsingError::ExpectedKeyword { span, .. } => *span,
}
*self.span()
}
}
@ -50,6 +60,8 @@ impl AdditionalHelp for ParsingError {
ParsingError::ExpectedKeyword { actual, .. } => format!(
"I expected a keyword (that is something like 'fn' or 'nasp') but you put a '{}' there!",
actual),
ParsingError::TrailingDocComment { .. } => "I expected some target (a function, namespace, enum, or something like this) which this doc comment annotates, but you put nothing there".to_owned(),
ParsingError::UnexpectedEOF { expected, .. } => format!("Put the expected token ('{expected}') here."),
}
}
}

View File

@ -1,9 +1,12 @@
use std::mem;
use crate::{
command_spec::unchecked::{
CommandSpec, Declaration, Enumeration, Function, NamedType, Namespace, Structure, Type,
Attribute, CommandSpec, Declaration, DocNamedType, DocToken, Enumeration, Function,
NamedType, Namespace, Structure, Type,
},
error::ErrorContext,
lexing::{Token, TokenKind, TokenStream},
lexing::{Token, TokenKind, TokenSpan, TokenStream},
token,
};
@ -22,12 +25,18 @@ impl TokenStream {
pub(super) struct Parser {
token_stream: TokenStream,
active_doc_comments: Vec<Attribute>,
last_span: TokenSpan,
}
impl Parser {
fn new(mut token_stream: TokenStream) -> Self {
token_stream.reverse();
Self { token_stream }
Self {
token_stream,
active_doc_comments: vec![],
last_span: TokenSpan::default(),
}
}
fn parse(&mut self) -> Result<CommandSpec, SpannedParsingError> {
@ -52,15 +61,55 @@ impl Parser {
}
fn parse_next(&mut self) -> Result<Declaration, ParsingError> {
match self.peek().kind() {
// Use of [peek_raw] here is fine, as we know that the function is only called, when
// something should still be contained in the token stream
match self.peek_raw().kind() {
token![nasp] => Ok(Declaration::Namespace(self.parse_namespace()?)),
token![fn] => Ok(Declaration::Function(self.parse_function()?)),
token![struct] => Ok(Declaration::Structure(self.parse_structure()?)),
token![enum] => Ok(Declaration::Enumeration(self.parse_enumeration()?)),
token![DocCommentMatch] => {
while self.expect_peek(token![DocComment]) {
let comment_to_push = {
let doc_comment = self.expect(token![DocComment])?;
let span = *doc_comment.span();
let name = if let TokenKind::DocComment(content) = doc_comment.kind {
content
} else {
unreachable!("The expect should have accounted for that case");
};
Attribute::doc {
content: name,
span,
}
};
self.active_doc_comments.push(comment_to_push);
}
if self.token_stream.is_empty() {
fn get_span(attr: Option<&Attribute>) -> TokenSpan {
match attr.expect("Something should be here") {
Attribute::doc { span, .. } => *span,
}
}
let span = TokenSpan::from_range(
get_span(self.active_doc_comments.first()),
get_span(self.active_doc_comments.last()),
);
Err(ParsingError::TrailingDocComment {
comments: mem::take(&mut self.active_doc_comments),
span,
})
} else {
self.parse_next()
}
}
_ => {
let err = ParsingError::ExpectedKeyword {
span: *self.peek().span(),
actual: self.peek().kind().clone(),
span: *self.peek_raw().span(),
actual: self.peek_raw().kind().clone(),
};
return Err(err);
@ -88,11 +137,34 @@ impl Parser {
})
}
fn parse_doc_comments(&mut self) -> Result<Vec<Attribute>, ParsingError> {
let mut attrs = mem::take(&mut self.active_doc_comments);
while self.expect_peek(token![DocComment]) {
attrs.push({
let doc_comment = self.expect(token![DocComment])?;
let span = *doc_comment.span();
let name = if let TokenKind::DocComment(content) = doc_comment.kind {
content
} else {
unreachable!("The expect should have accounted for that case");
};
Attribute::doc {
content: name,
span,
}
});
}
Ok(attrs)
}
fn parse_namespace(&mut self) -> Result<Namespace, ParsingError> {
let attributes = self.parse_doc_comments()?;
self.expect(token![nasp])?;
let mut namespace = Namespace::default();
namespace.name = self.expect(token![Ident])?;
namespace.attributes = attributes;
self.expect(token![BraceOpen])?;
@ -113,40 +185,54 @@ impl Parser {
}
fn parse_enumeration(&mut self) -> Result<Enumeration, ParsingError> {
let attributes = self.parse_doc_comments()?;
self.expect(token![enum])?;
let identifier = self.expect(token![Ident])?;
self.expect(token![BraceOpen])?;
let mut states = vec![];
if self.expect_peek(token![Ident]) {
states.push(self.expect(token![Ident])?);
let attributes = self.parse_doc_comments()?;
states.push(DocToken {
token: self.expect(token![Ident])?,
attributes,
});
}
while self.expect_peek(token![Comma]) {
self.expect(token![Comma])?;
if self.expect_peek(token![Ident]) {
states.push(self.expect(token![Ident])?);
let attributes = self.parse_doc_comments()?;
states.push(DocToken {
token: self.expect(token![Ident])?,
attributes,
});
} else {
break;
}
}
self.expect(token![BraceClose])?;
self.expect(token![;])?;
Ok(Enumeration { identifier, states })
Ok(Enumeration {
identifier,
states,
attributes,
})
}
fn parse_structure(&mut self) -> Result<Structure, ParsingError> {
let attributes = self.parse_doc_comments()?;
self.expect(token![struct])?;
let name = self.expect(token![Ident])?;
self.expect(token![BraceOpen])?;
let mut contents = vec![];
if self.expect_peek(token![Ident]) {
contents.push(self.parse_named_type()?);
contents.push(self.parse_doc_named_type()?);
}
while self.expect_peek(token![Comma]) {
self.expect(token![Comma])?;
if self.expect_peek(token![Ident]) {
contents.push(self.parse_named_type()?);
contents.push(self.parse_doc_named_type()?);
} else {
break;
}
@ -157,6 +243,7 @@ impl Parser {
Ok(Structure {
identifier: name,
contents,
attributes,
})
}
@ -167,7 +254,20 @@ impl Parser {
Ok(NamedType { name, r#type })
}
fn parse_doc_named_type(&mut self) -> Result<DocNamedType, ParsingError> {
let attributes = self.parse_doc_comments()?;
let name = self.expect(token![Ident])?;
self.expect(token![Colon])?;
let r#type = self.parse_type()?;
Ok(DocNamedType {
name,
r#type,
attributes,
})
}
fn parse_function(&mut self) -> Result<Function, ParsingError> {
let attributes = self.parse_doc_comments()?;
self.expect(token![fn])?;
let name = self.expect(token![Ident])?;
self.expect(token![ParenOpen])?;
@ -192,6 +292,7 @@ impl Parser {
identifier: name,
inputs,
output: output_type,
attributes,
})
}
@ -216,7 +317,14 @@ impl Parser {
/// ```
///
pub(super) fn expect(&mut self, token: TokenKind) -> Result<Token, ParsingError> {
let actual_token = self.peek();
let actual_token = if let Some(token) = self.peek() {
token
} else {
return Err(ParsingError::UnexpectedEOF {
expected: token,
span: self.last_span,
});
};
if actual_token.kind().same_kind(&token) {
Ok(self.pop())
} else {
@ -233,7 +341,10 @@ impl Parser {
/// Check if the next token is of the specified TokenKind.
/// Does not alter the token_stream
fn expect_peek(&self, token: TokenKind) -> bool {
let actual_token = self.peek();
let actual_token = match self.peek() {
Some(ok) => ok,
None => return false,
};
if actual_token.kind().same_kind(&token) {
true
} else {
@ -242,12 +353,22 @@ impl Parser {
}
/// Looks at the next token without removing it
fn peek(&self) -> &Token {
fn peek(&self) -> Option<&Token> {
self.token_stream.peek()
}
/// Looks at the next token without removing it.
/// Unwraps the option returned from [peek], only use it, if you know that a token must exist
fn peek_raw(&self) -> &Token {
self.token_stream.peek().expect("The token should exist")
}
/// Removes the next token
fn pop(&mut self) -> Token {
self.last_span = *self
.peek()
.expect("Calling pop should mean, that a token was first peeked for")
.span();
self.token_stream.pop()
}
}

View File

@ -20,8 +20,8 @@ nasp trinitrix { {}
let parsed = TokenStream::lex(input).unwrap().parse_unchecked();
let err = parsed.unwrap_err().source;
match err {
ParsingError::ExpectedDifferentToken { .. } => panic!("Wrong error"),
ParsingError::ExpectedKeyword { .. } => {}
_ => panic!("Wrong error"),
}
}
@ -56,6 +56,7 @@ nasp trinitrix {
},
}],
output: None,
attributes: vec![],
}],
namespaces: vec![Namespace {
name: Token {
@ -87,10 +88,12 @@ nasp trinitrix {
},
generic_args: vec![],
}),
attributes: vec![],
}],
structures: vec![],
enumerations: vec![],
namespaces: vec![],
attributes: vec![],
}],
};