feat(parser): Add support for parsing attributes
This commit is contained in:
parent
f1e9087f40
commit
add0d170eb
|
@ -20,7 +20,6 @@
|
|||
#*)
|
||||
|
||||
|
||||
|
||||
# (*
|
||||
# Trixy is fully whitespace independent, this means that you can
|
||||
# interleave whitespace in the definitions.
|
||||
|
@ -31,22 +30,31 @@
|
|||
|
||||
CommandSpec = {Function | Namespace | Enumeration | Structure } ;
|
||||
|
||||
Function = {DocComment} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ;
|
||||
Namespace = {DocComment} "mod" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ;
|
||||
Structure = {DocComment} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}";
|
||||
Enumeration = {DocComment} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}";
|
||||
Function = {DocComment} {Attribute} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ;
|
||||
Namespace = {DocComment} {Attribute} "mod" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ;
|
||||
Structure = {DocComment} {Attribute} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}";
|
||||
Enumeration = {DocComment} {Attribute} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}";
|
||||
|
||||
Type = Identifier ["<" Type {"," Type} ">"];
|
||||
StringLiteral = ["r"] "\"" {ANYTHING} "\"" | "r" "#" {"#"} "\"" {ANYTHING} "#" {"#"} "\"";
|
||||
|
||||
Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
|
||||
DocIdentifier = {DocComment} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
|
||||
DocIdentifier = {DocComment} {Attribute} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
|
||||
|
||||
NamedType = Identifier ":" Type;
|
||||
DocNamedType = {DocComment} Identifier ":" Type;
|
||||
DocNamedType = {DocComment} {Attribute} Identifier ":" Type;
|
||||
|
||||
|
||||
# (* This is syntax sugar for a `DocAttribute` *)
|
||||
DocComment = "///" {ANYTHING} LineEnding;
|
||||
|
||||
Attribute = "#" "[" AttributeValue "]" LineEnding;
|
||||
AttributeValue = DeriveAttribute | DocAttribute | ErrorAttribute | MsgAttribute;
|
||||
ErrorAttribute = "error";
|
||||
MsgAttribute = "msg" "(" StringLiteral ")";
|
||||
DeriveAttribute = "derive" "(" "Error" ")";
|
||||
DocAttribute = "doc" "=" StringLiteral;
|
||||
|
||||
Comment = "//" [ NOT ("/" {ANYTHING} LineEnding) | "//"] {ANYTHING} LineEnding;
|
||||
LineEnding = "\\n" | "\\r" | "\\r\\n";
|
||||
|
||||
|
|
|
@ -176,11 +176,20 @@ impl TokenKind {
|
|||
pub enum Attribute {
|
||||
#[allow(non_camel_case_types)]
|
||||
doc(String),
|
||||
#[allow(non_camel_case_types)]
|
||||
derive(DeriveValue),
|
||||
#[allow(non_camel_case_types)]
|
||||
error,
|
||||
#[allow(non_camel_case_types)]
|
||||
msg(String),
|
||||
}
|
||||
impl From<unchecked::Attribute> for Attribute {
|
||||
fn from(value: unchecked::Attribute) -> Self {
|
||||
match value {
|
||||
unchecked::Attribute::doc { content: name, .. } => Self::doc(name),
|
||||
unchecked::Attribute::doc { content: name, .. } => Self::doc(name.content),
|
||||
unchecked::Attribute::derive { value, .. } => Self::derive(value),
|
||||
unchecked::Attribute::error { .. } => Self::error,
|
||||
unchecked::Attribute::msg { content, .. } => Self::msg(content.content),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -71,7 +71,64 @@ pub enum Declaration {
|
|||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub enum Attribute {
|
||||
#[allow(non_camel_case_types)]
|
||||
doc { content: String, span: TokenSpan },
|
||||
doc {
|
||||
content: StringLiteral,
|
||||
span: TokenSpan,
|
||||
},
|
||||
#[allow(non_camel_case_types)]
|
||||
derive { value: DeriveValue, span: TokenSpan },
|
||||
#[allow(non_camel_case_types)]
|
||||
error { span: TokenSpan },
|
||||
#[allow(non_camel_case_types)]
|
||||
msg {
|
||||
content: StringLiteral,
|
||||
span: TokenSpan,
|
||||
},
|
||||
}
|
||||
|
||||
impl Display for Attribute {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Attribute::doc { .. } => f.write_str("doc"),
|
||||
Attribute::derive { .. } => f.write_str("derive"),
|
||||
Attribute::error { .. } => f.write_str("error"),
|
||||
Attribute::msg { .. } => f.write_str("msg"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Attribute {
|
||||
pub fn span(&self) -> TokenSpan {
|
||||
match self {
|
||||
Attribute::doc { span, .. } => *span,
|
||||
Attribute::derive { span, .. } => *span,
|
||||
Attribute::error { span, .. } => *span,
|
||||
Attribute::msg { span, .. } => *span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub struct StringLiteral {
|
||||
pub(crate) content: String,
|
||||
pub(crate) span: TokenSpan,
|
||||
}
|
||||
|
||||
impl From<Token> for StringLiteral {
|
||||
fn from(value: Token) -> Self {
|
||||
let span = *value.span();
|
||||
let content = match value.kind {
|
||||
TokenKind::StringLiteral(content) => content,
|
||||
_ => unreachable!("A string literal was expected"),
|
||||
};
|
||||
|
||||
Self { content, span }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub enum DeriveValue {
|
||||
Error,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
|
|
|
@ -163,6 +163,7 @@ impl Token {
|
|||
#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone)]
|
||||
pub enum TokenKind {
|
||||
Keyword(Keyword),
|
||||
AttributeKeyword(AttributeKeyword),
|
||||
Identifier(String),
|
||||
Colon,
|
||||
Semicolon,
|
||||
|
@ -174,8 +175,10 @@ pub enum TokenKind {
|
|||
ParenClose,
|
||||
SquareOpen,
|
||||
SquareClose,
|
||||
PoundSign,
|
||||
EqualsSign,
|
||||
StringLiteral(String),
|
||||
|
||||
DocComment(String),
|
||||
Comment(String),
|
||||
|
||||
/// This is not a real TokenKind, but only used for error handling
|
||||
|
@ -190,13 +193,18 @@ impl TokenKind {
|
|||
return true;
|
||||
}
|
||||
}
|
||||
if let TokenKind::Comment(_) = self {
|
||||
if let TokenKind::Comment(_) = other {
|
||||
if let TokenKind::AttributeKeyword(_) = self {
|
||||
if let TokenKind::AttributeKeyword(_) = other {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if let TokenKind::DocComment(_) = self {
|
||||
if let TokenKind::DocComment(_) = other {
|
||||
if let TokenKind::StringLiteral(_) = self {
|
||||
if let TokenKind::StringLiteral(_) = other {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if let TokenKind::Comment(_) = self {
|
||||
if let TokenKind::Comment(_) = other {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -208,6 +216,7 @@ impl Display for TokenKind {
|
|||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
TokenKind::Keyword(word) => write!(f, "KEYWORD({})", word),
|
||||
TokenKind::AttributeKeyword(word) => write!(f, "ATTRIBUTE_KEYWORD({})", word),
|
||||
TokenKind::Identifier(ident) => {
|
||||
if ident.is_empty() {
|
||||
write!(f, "IDENTIFIER")
|
||||
|
@ -215,6 +224,8 @@ impl Display for TokenKind {
|
|||
write!(f, "IDENTIFIER({})", ident)
|
||||
}
|
||||
}
|
||||
TokenKind::EqualsSign => f.write_str("EQUALS_SIGN"),
|
||||
TokenKind::PoundSign => f.write_str("POUND_SIGN"),
|
||||
TokenKind::Colon => f.write_str("COLON"),
|
||||
TokenKind::Semicolon => f.write_str("SEMICOLON"),
|
||||
TokenKind::Comma => f.write_str("COMMA"),
|
||||
|
@ -226,7 +237,7 @@ impl Display for TokenKind {
|
|||
TokenKind::Dummy => f.write_str("DUMMY"),
|
||||
TokenKind::SquareOpen => f.write_str("SQUAREOPEN"),
|
||||
TokenKind::SquareClose => f.write_str("SQUARECLOSE"),
|
||||
TokenKind::DocComment(text) => write!(f, "DOCCOMMENT({})", text),
|
||||
TokenKind::StringLiteral(text) => write!(f, r#"STRING_LITERAL("{}")"#, text),
|
||||
TokenKind::Comment(text) => write!(f, "COMMENT({})", text),
|
||||
}
|
||||
}
|
||||
|
@ -249,6 +260,26 @@ pub enum Keyword {
|
|||
r#enum,
|
||||
}
|
||||
|
||||
/// Keywords used in attributes: (#[<keyword>(<value>)])
|
||||
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
|
||||
pub enum AttributeKeyword {
|
||||
/// Derive a trait
|
||||
#[allow(non_camel_case_types)]
|
||||
derive,
|
||||
|
||||
/// Document the attached item
|
||||
#[allow(non_camel_case_types)]
|
||||
doc,
|
||||
|
||||
/// Mark the beginning of an error
|
||||
#[allow(non_camel_case_types)]
|
||||
error,
|
||||
|
||||
/// Encompass an error message
|
||||
#[allow(non_camel_case_types)]
|
||||
msg,
|
||||
}
|
||||
|
||||
impl Display for Keyword {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
|
@ -260,6 +291,17 @@ impl Display for Keyword {
|
|||
}
|
||||
}
|
||||
|
||||
impl Display for AttributeKeyword {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
AttributeKeyword::derive => f.write_str("derive"),
|
||||
AttributeKeyword::doc => f.write_str("doc"),
|
||||
AttributeKeyword::error => f.write_str("error"),
|
||||
AttributeKeyword::msg => f.write_str("msg"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Shorthand macro for generating a token from *anything* which can be
|
||||
/// converted into a `TokenKind`, or any of the `TokenKind` variants.
|
||||
///
|
||||
|
@ -267,11 +309,11 @@ impl Display for Keyword {
|
|||
///
|
||||
/// ```
|
||||
/// use trixy_parser::token;
|
||||
/// # fn main() {
|
||||
///# fn main() {
|
||||
/// token![mod];
|
||||
/// token![;];
|
||||
/// token![Arrow];
|
||||
/// # }
|
||||
///# }
|
||||
/// ```
|
||||
#[macro_export]
|
||||
macro_rules! token {
|
||||
|
@ -292,6 +334,10 @@ macro_rules! token {
|
|||
[BraceClose] => { $crate::lexing::TokenKind::BraceClose };
|
||||
// [}] => { $crate::lexing::TokenKind::BraceClose };
|
||||
[ParenOpen] => { $crate::lexing::TokenKind::ParenOpen };
|
||||
[PoundSign] => { $crate::lexing::TokenKind::PoundSign };
|
||||
[#] => { $crate::lexing::TokenKind::PoundSign };
|
||||
[EqualsSign] => { $crate::lexing::TokenKind::EqualsSign };
|
||||
[=] => { $crate::lexing::TokenKind::EqualsSign };
|
||||
// [(] => { $crate::lexing::TokenKind::ParenthesisOpen };
|
||||
[ParenClose] => { $crate::lexing::TokenKind::ParenClose };
|
||||
// [)] => { $crate::lexing::TokenKind::ParenthesisClose };
|
||||
|
@ -301,13 +347,16 @@ macro_rules! token {
|
|||
[struct] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#struct) };
|
||||
[enum] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#enum) };
|
||||
|
||||
// The `derive` here is completely arbitrary. It is only for comparisons (see `same_kind`)
|
||||
[AttributeKeyword] => { $crate::lexing::TokenKind::AttributeKeyword($crate::lexing::AttributeKeyword::derive) };
|
||||
|
||||
// This is only works for checking for a identifier or comment
|
||||
// see the `same_kind` method on TokenKind
|
||||
[Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
|
||||
[Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
|
||||
|
||||
[DocComment] => { $crate::lexing::TokenKind::DocComment("".to_owned()) };
|
||||
[DocCommentMatch] => { $crate::lexing::TokenKind::DocComment(_doc_comment) };
|
||||
[StringLiteral] => { $crate::lexing::TokenKind::StringLiteral("".to_owned()) };
|
||||
|
||||
[Comment] => { $crate::lexing::TokenKind::Comment("".to_owned()) };
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ use crate::{
|
|||
|
||||
use super::{
|
||||
error::{LexingError, SpannedLexingError},
|
||||
Token, TokenKind,
|
||||
AttributeKeyword, Token, TokenKind,
|
||||
};
|
||||
|
||||
pub(super) struct Tokenizer<'a> {
|
||||
|
@ -57,6 +57,20 @@ impl<'a> Tokenizer<'a> {
|
|||
|
||||
SpannedLexingError { source: e, context }
|
||||
})?;
|
||||
// if let TokenKind::StringLiteral(string) = &token_kind {
|
||||
// if string == "" {
|
||||
// eprintln!(
|
||||
// "Got an empty StringLiteral '{}', with span: {}..{}",
|
||||
// string,
|
||||
// start,
|
||||
// start + index
|
||||
// );
|
||||
// eprintln!(
|
||||
// "Removing following text: '{}'\n",
|
||||
// &self.remaining_text[..index],
|
||||
// );
|
||||
// }
|
||||
// }
|
||||
|
||||
self.chomp(index); // end - start
|
||||
let end = self.current_index;
|
||||
|
@ -83,6 +97,10 @@ impl<'a> Tokenizer<'a> {
|
|||
',' => (TokenKind::Comma, 1),
|
||||
'<' => (TokenKind::SquareOpen, 1),
|
||||
'>' => (TokenKind::SquareClose, 1),
|
||||
'#' => (TokenKind::PoundSign, 1),
|
||||
'=' => (TokenKind::EqualsSign, 1),
|
||||
'"' => tokenize_literal_string(self.remaining_text, "\"")?,
|
||||
'r' => try_to_tokenize_raw_literal_string(self.remaining_text)?,
|
||||
|
||||
'-' => tokenize_arrow(self.remaining_text)?,
|
||||
'/' => tokenize_comment(self.remaining_text)?,
|
||||
|
@ -167,38 +185,76 @@ fn tokenize_comment(text: &str) -> Result<(TokenKind, usize), LexingError> {
|
|||
Err(LexingError::ExpectedComment)
|
||||
} else {
|
||||
let text: &str = &text[2..];
|
||||
if let Some('/') = text.chars().next() {
|
||||
let text = &text[1..];
|
||||
if end_of_line(&text) {
|
||||
Ok((TokenKind::DocComment("".to_owned()), 1 + 3))
|
||||
} else {
|
||||
let (doc_comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
|
||||
|
||||
// trim whitespace
|
||||
let doc_comment = doc_comment.trim_start();
|
||||
let doc_comment = doc_comment.trim_end();
|
||||
|
||||
Ok((
|
||||
TokenKind::DocComment(doc_comment.to_owned()),
|
||||
chars_read + 3,
|
||||
))
|
||||
}
|
||||
if end_of_line(&text) {
|
||||
Ok((TokenKind::Comment("".to_owned()), 1 + 2))
|
||||
} else {
|
||||
if end_of_line(&text) {
|
||||
Ok((TokenKind::Comment("".to_owned()), 1 + 2))
|
||||
} else {
|
||||
let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
|
||||
let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
|
||||
|
||||
// trim whitespace
|
||||
let comment = comment.trim_start();
|
||||
let comment = comment.trim_end();
|
||||
// trim trailing whitespace (only at the end to avoid removing wanted whitespace)
|
||||
let comment = comment.trim_end();
|
||||
|
||||
Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2))
|
||||
}
|
||||
Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// We check if the r is the beginning of a literal string, otherwise, we tokenize a identifier
|
||||
fn try_to_tokenize_raw_literal_string(text: &str) -> Result<(TokenKind, usize), LexingError> {
|
||||
// remove the 'r' at the begining
|
||||
let text_without_r = &text[1..];
|
||||
|
||||
let next_char = &text_without_r[0..1];
|
||||
if next_char == "#" {
|
||||
// The string is also escaped, count the hashtags
|
||||
let (delimeter, chars_read) = take_while(text_without_r, |ch| ch == '#')?;
|
||||
let delimeter = format!("\"{}", delimeter);
|
||||
|
||||
let (token, length) = tokenize_literal_string(&text_without_r[chars_read..], &delimeter)?;
|
||||
// The 1 is the size of the extra 'r'
|
||||
Ok((token, length + 1))
|
||||
} else if next_char == "\"" {
|
||||
// regular raw string literal
|
||||
let (token, length) = tokenize_literal_string(text_without_r, "\"")?;
|
||||
// The 1 is the size of the extra 'r'
|
||||
Ok((token, length + 1))
|
||||
} else {
|
||||
// if the 'r' is not followed by either an '#' or a '"', it must be part of an identifier
|
||||
tokenize_ident(text)
|
||||
}
|
||||
}
|
||||
|
||||
fn tokenize_literal_string(text: &str, delimeter: &str) -> Result<(TokenKind, usize), LexingError> {
|
||||
// The first char is always a quote (")
|
||||
assert_eq!(&text[..1], "\"");
|
||||
let text_without_quote = &text[1..];
|
||||
|
||||
if &text_without_quote[0..delimeter.len()] == delimeter {
|
||||
// eprintln!(
|
||||
// "Got a direct delimeter, removing: '{}'",
|
||||
// &text[..1 + delimeter.len()]
|
||||
// );
|
||||
// eprintln!("Next up to parse: '{}'\n", &text[1 + delimeter.len()..20]);
|
||||
// The literal string does not contain anything
|
||||
Ok((TokenKind::StringLiteral("".to_owned()), 1 + delimeter.len()))
|
||||
} else {
|
||||
let mut predicates: Vec<_> = delimeter
|
||||
.chars()
|
||||
.map(|ch| {
|
||||
// eprintln!("Condition, which needs to match: |ch| ch == '{}'", ch);
|
||||
move |ch2| ch2 == ch
|
||||
})
|
||||
.collect();
|
||||
let (literal, chars_read) =
|
||||
take_until_succesive_match(text_without_quote, &mut predicates)?;
|
||||
|
||||
// The second number read here is the last quote
|
||||
Ok((
|
||||
TokenKind::StringLiteral(literal.to_owned()),
|
||||
chars_read + 1 + delimeter.len(),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> {
|
||||
let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?;
|
||||
|
||||
|
@ -208,6 +264,12 @@ fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> {
|
|||
"fn" => TokenKind::Keyword(Keyword::r#fn),
|
||||
"struct" => TokenKind::Keyword(Keyword::r#struct),
|
||||
"enum" => TokenKind::Keyword(Keyword::r#enum),
|
||||
|
||||
"derive" => TokenKind::AttributeKeyword(AttributeKeyword::derive),
|
||||
"doc" => TokenKind::AttributeKeyword(AttributeKeyword::doc),
|
||||
"error" => TokenKind::AttributeKeyword(AttributeKeyword::error),
|
||||
"msg" => TokenKind::AttributeKeyword(AttributeKeyword::msg),
|
||||
|
||||
other => TokenKind::Identifier(other.to_string()),
|
||||
};
|
||||
|
||||
|
@ -252,6 +314,48 @@ where
|
|||
Ok((&data[..current_index], current_index))
|
||||
}
|
||||
}
|
||||
/// Consume bytes until all the predicates match in successive ways
|
||||
fn take_until_succesive_match<'a, F>(
|
||||
data: &'a str,
|
||||
preds: &mut [F],
|
||||
) -> Result<(&'a str, usize), LexingError>
|
||||
where
|
||||
F: FnMut(char) -> bool,
|
||||
{
|
||||
assert!(!preds.is_empty(), "Predicates need to be provided");
|
||||
|
||||
let mut current_index = 0;
|
||||
|
||||
'outer: for ch in data.chars() {
|
||||
let should_stop = preds[0](ch);
|
||||
current_index += ch.len_utf8();
|
||||
|
||||
if should_stop {
|
||||
// eprintln!("First predicate did match char: {:#?}", ch);
|
||||
if preds.len() == 1 {
|
||||
// eprintln!("Only one predicate provided, which matched: {:#?}\n", ch);
|
||||
break 'outer;
|
||||
}
|
||||
'inner: for pred in &mut preds[1..] {
|
||||
let ch = &data.chars().nth(current_index).expect("This should exists");
|
||||
// eprintln!("Checking pred with char: {:#?}", ch);
|
||||
if pred(*ch) {
|
||||
// eprintln!("Predicate did match char: {:#?}\n", ch);
|
||||
break 'outer;
|
||||
}
|
||||
// eprintln!("Predicate did not match char: {:#?}\n", ch);
|
||||
current_index += ch.len_utf8();
|
||||
break 'inner;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if current_index == 0 {
|
||||
Err(LexingError::NoMatchesTaken)
|
||||
} else {
|
||||
Ok((&data[..current_index], current_index))
|
||||
}
|
||||
}
|
||||
|
||||
/// Skips input until the remaining string pattern starts with the pattern
|
||||
fn skip_until<'a>(mut src: &'a str, pattern: &str) -> &'a str {
|
||||
|
|
|
@ -24,7 +24,7 @@ use thiserror::Error;
|
|||
use std::{error::Error, fmt::Display};
|
||||
|
||||
use crate::{
|
||||
command_spec::checked::Identifier,
|
||||
command_spec::{checked::Identifier, unchecked::Attribute},
|
||||
error::{AdditionalHelp, ErrorContext, ErrorContextDisplay},
|
||||
lexing::TokenSpan,
|
||||
parsing::unchecked::error::SpannedParsingError as OldSpannedParsingError,
|
||||
|
@ -63,6 +63,11 @@ pub enum ParsingError {
|
|||
r#type: Identifier,
|
||||
span: TokenSpan,
|
||||
},
|
||||
#[error("The {specified} attribute can't be used here!")]
|
||||
WrongAttributeInPosition {
|
||||
specified: Attribute,
|
||||
span: TokenSpan,
|
||||
},
|
||||
}
|
||||
|
||||
impl ParsingError {
|
||||
|
@ -74,6 +79,7 @@ impl ParsingError {
|
|||
ParsingError::EnumWithNamespaceNamePascal { enum_span, .. } => enum_span,
|
||||
ParsingError::NotEnoughGenericArgs { span, .. } => span,
|
||||
ParsingError::TooManyGenericArgs { span, .. } => span,
|
||||
ParsingError::WrongAttributeInPosition { span, .. } => span,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -87,6 +93,7 @@ impl AdditionalHelp for ParsingError {
|
|||
| ParsingError::EnumWithNamespaceName {..} => "Change the name of this Enumeration as the generation process in trixy-macros needs to use this name".to_owned(),
|
||||
ParsingError::NotEnoughGenericArgs { got, expected_min, .. } => format!("Add generic args until you have gone from {} to {}", got, expected_min),
|
||||
ParsingError::TooManyGenericArgs { got, expected_max, .. } => format!("Remove generic args until you have gone from {} to {}", got, expected_max),
|
||||
ParsingError::WrongAttributeInPosition { .. } => format!("Remove this attribute"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@
|
|||
* If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use std::mem;
|
||||
use std::{iter, mem};
|
||||
|
||||
use convert_case::{Case, Casing};
|
||||
use trixy_types::BASE_TYPES;
|
||||
|
@ -27,8 +27,8 @@ use trixy_types::BASE_TYPES;
|
|||
use crate::{
|
||||
command_spec::{
|
||||
checked::{
|
||||
CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, NamedType,
|
||||
Namespace, Structure, Type,
|
||||
self, CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier,
|
||||
NamedType, Namespace, Structure, Type,
|
||||
},
|
||||
unchecked::{
|
||||
CommandSpec as UncheckedCommandSpec, DocNamedType as UncheckedDocNamedType,
|
||||
|
@ -39,7 +39,7 @@ use crate::{
|
|||
Variant,
|
||||
},
|
||||
error::ErrorContext,
|
||||
lexing::{TokenKind, TokenSpan},
|
||||
lexing::{Token, TokenKind, TokenSpan, TokenStream},
|
||||
};
|
||||
|
||||
use self::error::{ParsingError, SpannedParsingError};
|
||||
|
@ -48,6 +48,39 @@ pub mod error;
|
|||
#[cfg(test)]
|
||||
mod test;
|
||||
|
||||
macro_rules! take_attrs {
|
||||
($name:expr, $($types:ident),*) => {
|
||||
$name
|
||||
.attributes
|
||||
.into_iter()
|
||||
.map(|val| {
|
||||
take_attrs!{@process_val val, $($types),*}
|
||||
})
|
||||
.collect::<Result<Vec<_>, _>>()?
|
||||
};
|
||||
(@process_val_last $iden:ident) => {
|
||||
{
|
||||
let span = $iden.span();
|
||||
return Err(ParsingError::WrongAttributeInPosition {
|
||||
specified: $iden,
|
||||
span,
|
||||
});
|
||||
}
|
||||
};
|
||||
(@process_val $iden:ident, $val:ident) => {
|
||||
if let $crate::command_spec::unchecked::Attribute::$val{..} = $iden {
|
||||
return Ok($iden.into());
|
||||
};
|
||||
take_attrs!{@process_val_last $iden}
|
||||
};
|
||||
(@process_val $iden:ident, $val:ident, $($other:tt),+ $(,)*) => {
|
||||
if let $crate::command_spec::unchecked::Attribute::$val{..} = $iden {
|
||||
return Ok($iden.into());
|
||||
};
|
||||
take_attrs!{@process_val $iden, $($other),*}
|
||||
};
|
||||
}
|
||||
|
||||
struct Parser {
|
||||
command_spec: UncheckedCommandSpec,
|
||||
structures: Vec<UncheckedStructure>,
|
||||
|
@ -57,6 +90,7 @@ struct Parser {
|
|||
|
||||
impl UncheckedCommandSpec {
|
||||
pub fn process(self, original_file: String) -> Result<CommandSpec, SpannedParsingError> {
|
||||
let original_file = TokenStream::replace(&original_file).to_string();
|
||||
let checked = Parser {
|
||||
command_spec: self,
|
||||
structures: vec![],
|
||||
|
@ -68,12 +102,6 @@ impl UncheckedCommandSpec {
|
|||
}
|
||||
}
|
||||
|
||||
macro_rules! pass_attrs_along {
|
||||
($name:ident) => {
|
||||
$name.attributes.into_iter().map(|a| a.into()).collect()
|
||||
};
|
||||
}
|
||||
|
||||
impl Parser {
|
||||
fn parse(mut self) -> Result<CommandSpec, SpannedParsingError> {
|
||||
let namespace: UncheckedNamespace =
|
||||
|
@ -142,7 +170,7 @@ impl Parser {
|
|||
structures,
|
||||
enumerations,
|
||||
namespaces,
|
||||
attributes: pass_attrs_along!(namespace),
|
||||
attributes: take_attrs! {namespace, doc},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -165,7 +193,7 @@ impl Parser {
|
|||
identifier,
|
||||
inputs,
|
||||
output,
|
||||
attributes: pass_attrs_along!(function),
|
||||
attributes: take_attrs! {function, doc},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -202,7 +230,7 @@ impl Parser {
|
|||
mem::take(&mut state.token.kind).to_identifier(Variant::DocNamedType);
|
||||
DocIdentifier {
|
||||
name: ident.name,
|
||||
attributes: pass_attrs_along!(state),
|
||||
attributes: take_attrs! {state, doc, msg},
|
||||
variant: Variant::DocNamedType,
|
||||
}
|
||||
})
|
||||
|
@ -211,7 +239,7 @@ impl Parser {
|
|||
Ok(Enumeration {
|
||||
identifier,
|
||||
states,
|
||||
attributes: pass_attrs_along!(enumeration),
|
||||
attributes: take_attrs! {enumeration, doc, derive, error},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -231,7 +259,7 @@ impl Parser {
|
|||
Ok(Structure {
|
||||
identifier,
|
||||
contents,
|
||||
attributes: pass_attrs_along!(structure),
|
||||
attributes: take_attrs! {structure, doc, derive},
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -254,7 +282,7 @@ impl Parser {
|
|||
Ok(DocNamedType {
|
||||
name,
|
||||
r#type,
|
||||
attributes: pass_attrs_along!(doc_named_type),
|
||||
attributes: take_attrs! {doc_named_type, doc},
|
||||
})
|
||||
}
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ use std::{error::Error, fmt::Display};
|
|||
use thiserror::Error;
|
||||
|
||||
use crate::{
|
||||
command_spec::unchecked::Attribute,
|
||||
command_spec::unchecked::{Attribute, StringLiteral},
|
||||
error::{AdditionalHelp, ErrorContext, ErrorContextDisplay},
|
||||
lexing::{TokenKind, TokenSpan},
|
||||
};
|
||||
|
@ -46,19 +46,23 @@ pub enum ParsingError {
|
|||
#[error("Expected a Keyword to start a new declaration, but found: '{actual}'")]
|
||||
ExpectedKeyword { actual: TokenKind, span: TokenSpan },
|
||||
|
||||
#[error("DocComment does not have target")]
|
||||
TrailingDocComment {
|
||||
#[error("Attribute does not have target")]
|
||||
TrailingAttribute {
|
||||
comments: Vec<Attribute>,
|
||||
span: TokenSpan,
|
||||
},
|
||||
|
||||
#[error("Derive value is not known")]
|
||||
WrongDeriveValue { specified: StringLiteral },
|
||||
}
|
||||
impl ParsingError {
|
||||
pub fn span(&self) -> &TokenSpan {
|
||||
match self {
|
||||
ParsingError::ExpectedDifferentToken { span, .. } => span,
|
||||
ParsingError::ExpectedKeyword { span, .. } => span,
|
||||
ParsingError::TrailingDocComment { span, .. } => span,
|
||||
ParsingError::TrailingAttribute { span, .. } => span,
|
||||
ParsingError::UnexpectedEOF { span, .. } => span,
|
||||
ParsingError::WrongDeriveValue { specified } => &specified.span,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -81,8 +85,9 @@ impl AdditionalHelp for ParsingError {
|
|||
ParsingError::ExpectedKeyword { actual, .. } => format!(
|
||||
"I expected a keyword (that is something like 'fn' or 'mod') but you put a '{}' there!",
|
||||
actual),
|
||||
ParsingError::TrailingDocComment { .. } => "I expected some target (a function, namespace, enum, or something like this) which this doc comment annotates, but you put nothing there".to_owned(),
|
||||
ParsingError::TrailingAttribute { .. } => "I expected some target (a function, namespace, enum, or something like this) which this attribute annotates, but you put nothing there".to_owned(),
|
||||
ParsingError::UnexpectedEOF { expected, .. } => format!("Put the expected token ('{expected}') here."),
|
||||
ParsingError::WrongDeriveValue { specified } => format!("'{}' is not a valid derive value! Take a look a the grammar file", specified.content),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,15 +19,15 @@
|
|||
* If not, see <https://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
use std::mem;
|
||||
use std::{iter::once, mem};
|
||||
|
||||
use crate::{
|
||||
command_spec::unchecked::{
|
||||
Attribute, CommandSpec, Declaration, DocNamedType, DocToken, Enumeration, Function,
|
||||
NamedType, Namespace, Structure, Type,
|
||||
Attribute, CommandSpec, Declaration, DeriveValue, DocNamedType, DocToken, Enumeration,
|
||||
Function, NamedType, Namespace, StringLiteral, Structure, Type,
|
||||
},
|
||||
error::ErrorContext,
|
||||
lexing::{Token, TokenKind, TokenSpan, TokenStream},
|
||||
lexing::{AttributeKeyword, Token, TokenKind, TokenSpan, TokenStream},
|
||||
token,
|
||||
};
|
||||
|
||||
|
@ -46,7 +46,7 @@ impl TokenStream {
|
|||
|
||||
pub(super) struct Parser {
|
||||
token_stream: TokenStream,
|
||||
active_doc_comments: Vec<Attribute>,
|
||||
current_attributes: Vec<Attribute>,
|
||||
last_span: TokenSpan,
|
||||
}
|
||||
|
||||
|
@ -55,7 +55,7 @@ impl Parser {
|
|||
token_stream.reverse();
|
||||
Self {
|
||||
token_stream,
|
||||
active_doc_comments: vec![],
|
||||
current_attributes: vec![],
|
||||
last_span: TokenSpan::default(),
|
||||
}
|
||||
}
|
||||
|
@ -89,38 +89,21 @@ impl Parser {
|
|||
token![fn] => Ok(Declaration::Function(self.parse_function()?)),
|
||||
token![struct] => Ok(Declaration::Structure(self.parse_structure()?)),
|
||||
token![enum] => Ok(Declaration::Enumeration(self.parse_enumeration()?)),
|
||||
token![DocCommentMatch] => {
|
||||
while self.expect_peek(token![DocComment]) {
|
||||
let comment_to_push = {
|
||||
let doc_comment = self.expect(token![DocComment])?;
|
||||
let span = *doc_comment.span();
|
||||
let name = if let TokenKind::DocComment(content) = doc_comment.kind {
|
||||
content
|
||||
} else {
|
||||
unreachable!("The expect should have accounted for that case");
|
||||
};
|
||||
|
||||
Attribute::doc {
|
||||
content: name,
|
||||
span,
|
||||
}
|
||||
};
|
||||
self.active_doc_comments.push(comment_to_push);
|
||||
}
|
||||
token![#] => {
|
||||
let attributes = self.parse_attributes()?;
|
||||
self.current_attributes.extend(attributes);
|
||||
|
||||
if self.token_stream.is_empty() {
|
||||
fn get_span(attr: Option<&Attribute>) -> TokenSpan {
|
||||
match attr.expect("Something should be here") {
|
||||
Attribute::doc { span, .. } => *span,
|
||||
}
|
||||
attr.expect("Something should be here").span()
|
||||
}
|
||||
|
||||
let span = TokenSpan::from_range(
|
||||
get_span(self.active_doc_comments.first()),
|
||||
get_span(self.active_doc_comments.last()),
|
||||
get_span(self.current_attributes.first()),
|
||||
get_span(self.current_attributes.last()),
|
||||
);
|
||||
Err(ParsingError::TrailingDocComment {
|
||||
comments: mem::take(&mut self.active_doc_comments),
|
||||
Err(ParsingError::TrailingAttribute {
|
||||
comments: mem::take(&mut self.current_attributes),
|
||||
span,
|
||||
})
|
||||
} else {
|
||||
|
@ -159,29 +142,78 @@ impl Parser {
|
|||
})
|
||||
}
|
||||
|
||||
fn parse_doc_comments(&mut self) -> Result<Vec<Attribute>, ParsingError> {
|
||||
let mut attrs = mem::take(&mut self.active_doc_comments);
|
||||
fn parse_bracket_string_literal(&mut self) -> Result<StringLiteral, ParsingError> {
|
||||
self.expect(token![CurvedBracketOpen])?;
|
||||
let string_literal = self.expect(token![StringLiteral])?;
|
||||
self.expect(token![CurvedBracketClose])?;
|
||||
let string_literal = Into::<StringLiteral>::into(string_literal);
|
||||
Ok(string_literal)
|
||||
}
|
||||
|
||||
while self.expect_peek(token![DocComment]) {
|
||||
attrs.push({
|
||||
let doc_comment = self.expect(token![DocComment])?;
|
||||
let span = *doc_comment.span();
|
||||
let name = if let TokenKind::DocComment(content) = doc_comment.kind {
|
||||
content
|
||||
} else {
|
||||
unreachable!("The expect should have accounted for that case");
|
||||
};
|
||||
Attribute::doc {
|
||||
content: name,
|
||||
span,
|
||||
fn parse_attribute_value(&mut self) -> Result<Attribute, ParsingError> {
|
||||
let ident = self.expect(token![AttributeKeyword])?;
|
||||
let span = *ident.span();
|
||||
let TokenKind::AttributeKeyword(keyword) = ident.kind() else {
|
||||
unreachable!("This is checked in the `expect` above")
|
||||
};
|
||||
|
||||
let attribute = match keyword {
|
||||
AttributeKeyword::derive => {
|
||||
let string_literal = self.parse_bracket_string_literal()?;
|
||||
match string_literal.content.as_str() {
|
||||
"Error" => Ok(Attribute::derive {
|
||||
value: DeriveValue::Error,
|
||||
span,
|
||||
}),
|
||||
_ => Err(error::ParsingError::WrongDeriveValue {
|
||||
specified: string_literal,
|
||||
}),
|
||||
}
|
||||
}
|
||||
AttributeKeyword::doc => {
|
||||
self.expect(token![=])?;
|
||||
let string_literal = self.expect(token![StringLiteral])?;
|
||||
let string_literal = Into::<StringLiteral>::into(string_literal);
|
||||
if self.expect_peek(token![PoundSign]) {
|
||||
dbg!(&self.token_stream);
|
||||
}
|
||||
Ok(Attribute::doc {
|
||||
content: string_literal,
|
||||
span,
|
||||
})
|
||||
}
|
||||
AttributeKeyword::error => Ok(Attribute::error { span }),
|
||||
AttributeKeyword::msg => {
|
||||
let string_literal = self.parse_bracket_string_literal()?;
|
||||
Ok(Attribute::msg {
|
||||
content: string_literal,
|
||||
span,
|
||||
})
|
||||
}
|
||||
}?;
|
||||
|
||||
Ok(attribute)
|
||||
}
|
||||
|
||||
fn parse_attributes(&mut self) -> Result<Vec<Attribute>, ParsingError> {
|
||||
let mut attrs = mem::take(&mut self.current_attributes);
|
||||
|
||||
while self.expect_peek(token![#]) {
|
||||
attrs.push({
|
||||
self.expect(token![#])?;
|
||||
self.expect(token![SquareBracketOpen])?;
|
||||
|
||||
let attribue = self.parse_attribute_value()?;
|
||||
|
||||
self.expect(token![SquareBracketClose])?;
|
||||
attribue
|
||||
});
|
||||
}
|
||||
Ok(attrs)
|
||||
}
|
||||
|
||||
fn parse_namespace(&mut self) -> Result<Namespace, ParsingError> {
|
||||
let attributes = self.parse_doc_comments()?;
|
||||
let attributes = self.parse_attributes()?;
|
||||
self.expect(token![mod])?;
|
||||
|
||||
let mut namespace = Namespace {
|
||||
|
@ -209,29 +241,30 @@ impl Parser {
|
|||
}
|
||||
|
||||
fn parse_enumeration(&mut self) -> Result<Enumeration, ParsingError> {
|
||||
let attributes = self.parse_doc_comments()?;
|
||||
let attributes = self.parse_attributes()?;
|
||||
self.expect(token![enum])?;
|
||||
let identifier = self.expect(token![Ident])?;
|
||||
self.expect(token![BraceOpen])?;
|
||||
|
||||
let mut states = vec![];
|
||||
if self.expect_peek(token![Ident]) {
|
||||
let attributes = self.parse_doc_comments()?;
|
||||
if self.expect_peek(token![Ident]) || self.expect_peek(token![#]) {
|
||||
let attributes = self.parse_attributes()?;
|
||||
states.push(DocToken {
|
||||
token: self.expect(token![Ident])?,
|
||||
attributes,
|
||||
});
|
||||
}
|
||||
while self.expect_peek(token![Comma]) {
|
||||
self.expect(token![Comma])?;
|
||||
if self.expect_peek(token![Ident]) {
|
||||
let attributes = self.parse_doc_comments()?;
|
||||
states.push(DocToken {
|
||||
token: self.expect(token![Ident])?,
|
||||
attributes,
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
|
||||
while self.expect_peek(token![Comma]) {
|
||||
self.expect(token![Comma])?;
|
||||
if self.expect_peek(token![Ident]) || self.expect_peek(token![#]) {
|
||||
let attributes = self.parse_attributes()?;
|
||||
states.push(DocToken {
|
||||
token: self.expect(token![Ident])?,
|
||||
attributes,
|
||||
});
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
self.expect(token![BraceClose])?;
|
||||
|
@ -243,7 +276,7 @@ impl Parser {
|
|||
}
|
||||
|
||||
fn parse_structure(&mut self) -> Result<Structure, ParsingError> {
|
||||
let attributes = self.parse_doc_comments()?;
|
||||
let attributes = self.parse_attributes()?;
|
||||
self.expect(token![struct])?;
|
||||
let name = self.expect(token![Ident])?;
|
||||
self.expect(token![BraceOpen])?;
|
||||
|
@ -254,7 +287,7 @@ impl Parser {
|
|||
}
|
||||
while self.expect_peek(token![Comma]) {
|
||||
self.expect(token![Comma])?;
|
||||
if self.expect_peek(token![Ident]) || self.expect_peek(token![DocComment]) {
|
||||
if self.expect_peek(token![Ident]) || self.expect_peek(token![PoundSign]) {
|
||||
contents.push(self.parse_doc_named_type()?);
|
||||
} else {
|
||||
break;
|
||||
|
@ -277,7 +310,7 @@ impl Parser {
|
|||
}
|
||||
|
||||
fn parse_doc_named_type(&mut self) -> Result<DocNamedType, ParsingError> {
|
||||
let attributes = self.parse_doc_comments()?;
|
||||
let attributes = self.parse_attributes()?;
|
||||
let name = self.expect(token![Ident])?;
|
||||
self.expect(token![Colon])?;
|
||||
let r#type = self.parse_type()?;
|
||||
|
@ -289,7 +322,7 @@ impl Parser {
|
|||
}
|
||||
|
||||
fn parse_function(&mut self) -> Result<Function, ParsingError> {
|
||||
let attributes = self.parse_doc_comments()?;
|
||||
let attributes = self.parse_attributes()?;
|
||||
self.expect(token![fn])?;
|
||||
let name = self.expect(token![Ident])?;
|
||||
self.expect(token![ParenOpen])?;
|
||||
|
@ -322,20 +355,20 @@ impl Parser {
|
|||
/// For example:
|
||||
///
|
||||
/// ```dont_run
|
||||
/// use trixy_lang_parser::{
|
||||
/// lexing::{Keyword, TokenKind, TokenStream},
|
||||
/// parsing::unchecked::Parser,
|
||||
/// token,
|
||||
/// };
|
||||
///#use trixy_lang_parser::{
|
||||
///# lexing::{Keyword, TokenKind, TokenStream},
|
||||
///# parsing::unchecked::Parser,
|
||||
///# token,
|
||||
///#};
|
||||
///
|
||||
/// # fn main() {
|
||||
/// let token_stream = TokenStream::lex("mod {}").unwrap();
|
||||
/// let parser = Parser::new(token_stream);
|
||||
/// assert_eq!(parser.expect(token![mod]).unwrap(), TokenKind::Keyword(Keyword::mod));
|
||||
/// assert_eq!(parser.expect(token![BraceOpen]).unwrap(), TokenKind::BraceOpen);
|
||||
/// assert_eq!(parser.expect(token![BraceClose]).unwrap(), TokenKind::BraceClose);
|
||||
/// assert!(parser.expect(token![BraceClose]).is_err());
|
||||
/// # }
|
||||
///# fn main() {
|
||||
/// let token_stream = TokenStream::lex("mod {}").unwrap();
|
||||
/// let parser = Parser::new(token_stream);
|
||||
/// assert_eq!(parser.expect(token![mod]).unwrap(), TokenKind::Keyword(Keyword::mod));
|
||||
/// assert_eq!(parser.expect(token![CurlyBracketOpen]).unwrap(), TokenKind::BraceOpen);
|
||||
/// assert_eq!(parser.expect(token![CurlyBracketClose]).unwrap(), TokenKind::BraceClose);
|
||||
/// assert!(parser.expect(token![CurlyBracketClose]).is_err());
|
||||
///# }
|
||||
/// ```
|
||||
///
|
||||
pub(super) fn expect(&mut self, token: TokenKind) -> Result<Token, ParsingError> {
|
||||
|
|
Reference in New Issue