feat(parser): Add support for parsing attributes

This commit is contained in:
Benedikt Peetz 2024-03-24 19:16:52 +01:00
parent f1e9087f40
commit add0d170eb
Signed by: bpeetz
GPG Key ID: A5E94010C3A642AD
9 changed files with 444 additions and 144 deletions

View File

@ -20,7 +20,6 @@
#*)
# (*
# Trixy is fully whitespace independent, this means that you can
# interleave whitespace in the definitions.
@ -31,22 +30,31 @@
CommandSpec = {Function | Namespace | Enumeration | Structure } ;
Function = {DocComment} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ;
Namespace = {DocComment} "mod" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ;
Structure = {DocComment} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}";
Enumeration = {DocComment} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}";
Function = {DocComment} {Attribute} "fn" Identifier "(" [NamedType {"," NamedType }] ")" [ "->" Type ] ";" ;
Namespace = {DocComment} {Attribute} "mod" Identifier "{" {Function | Namespace | Enumeration | Structure} "}" ;
Structure = {DocComment} {Attribute} "struct" Identifier "{" [DocNamedType {"," DocNamedType } [","]] "}";
Enumeration = {DocComment} {Attribute} "enum" Identifier "{" [DocIdentifier {"," DocIdentifier} [","]] "}";
Type = Identifier ["<" Type {"," Type} ">"];
StringLiteral = ["r"] "\"" {ANYTHING} "\"" | "r" "#" {"#"} "\"" {ANYTHING} "#" {"#"} "\"";
Identifier = (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
DocIdentifier = {DocComment} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
DocIdentifier = {DocComment} {Attribute} (CHARACTER | "_") { NUMBER | CHARACTER | "_" } ;
NamedType = Identifier ":" Type;
DocNamedType = {DocComment} Identifier ":" Type;
DocNamedType = {DocComment} {Attribute} Identifier ":" Type;
# (* This is syntax sugar for a `DocAttribute` *)
DocComment = "///" {ANYTHING} LineEnding;
Attribute = "#" "[" AttributeValue "]" LineEnding;
AttributeValue = DeriveAttribute | DocAttribute | ErrorAttribute | MsgAttribute;
ErrorAttribute = "error";
MsgAttribute = "msg" "(" StringLiteral ")";
DeriveAttribute = "derive" "(" "Error" ")";
DocAttribute = "doc" "=" StringLiteral;
Comment = "//" [ NOT ("/" {ANYTHING} LineEnding) | "//"] {ANYTHING} LineEnding;
LineEnding = "\\n" | "\\r" | "\\r\\n";

View File

@ -176,11 +176,20 @@ impl TokenKind {
pub enum Attribute {
#[allow(non_camel_case_types)]
doc(String),
#[allow(non_camel_case_types)]
derive(DeriveValue),
#[allow(non_camel_case_types)]
error,
#[allow(non_camel_case_types)]
msg(String),
}
impl From<unchecked::Attribute> for Attribute {
fn from(value: unchecked::Attribute) -> Self {
match value {
unchecked::Attribute::doc { content: name, .. } => Self::doc(name),
unchecked::Attribute::doc { content: name, .. } => Self::doc(name.content),
unchecked::Attribute::derive { value, .. } => Self::derive(value),
unchecked::Attribute::error { .. } => Self::error,
unchecked::Attribute::msg { content, .. } => Self::msg(content.content),
}
}
}

View File

@ -71,7 +71,64 @@ pub enum Declaration {
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum Attribute {
#[allow(non_camel_case_types)]
doc { content: String, span: TokenSpan },
doc {
content: StringLiteral,
span: TokenSpan,
},
#[allow(non_camel_case_types)]
derive { value: DeriveValue, span: TokenSpan },
#[allow(non_camel_case_types)]
error { span: TokenSpan },
#[allow(non_camel_case_types)]
msg {
content: StringLiteral,
span: TokenSpan,
},
}
impl Display for Attribute {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Attribute::doc { .. } => f.write_str("doc"),
Attribute::derive { .. } => f.write_str("derive"),
Attribute::error { .. } => f.write_str("error"),
Attribute::msg { .. } => f.write_str("msg"),
}
}
}
impl Attribute {
pub fn span(&self) -> TokenSpan {
match self {
Attribute::doc { span, .. } => *span,
Attribute::derive { span, .. } => *span,
Attribute::error { span, .. } => *span,
Attribute::msg { span, .. } => *span,
}
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub struct StringLiteral {
pub(crate) content: String,
pub(crate) span: TokenSpan,
}
impl From<Token> for StringLiteral {
fn from(value: Token) -> Self {
let span = *value.span();
let content = match value.kind {
TokenKind::StringLiteral(content) => content,
_ => unreachable!("A string literal was expected"),
};
Self { content, span }
}
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum DeriveValue {
Error,
}
#[derive(Debug, PartialEq, Eq, PartialOrd, Ord, Clone)]

View File

@ -163,6 +163,7 @@ impl Token {
#[derive(Debug, Default, PartialEq, Eq, PartialOrd, Ord, Clone)]
pub enum TokenKind {
Keyword(Keyword),
AttributeKeyword(AttributeKeyword),
Identifier(String),
Colon,
Semicolon,
@ -174,8 +175,10 @@ pub enum TokenKind {
ParenClose,
SquareOpen,
SquareClose,
PoundSign,
EqualsSign,
StringLiteral(String),
DocComment(String),
Comment(String),
/// This is not a real TokenKind, but only used for error handling
@ -190,13 +193,18 @@ impl TokenKind {
return true;
}
}
if let TokenKind::Comment(_) = self {
if let TokenKind::Comment(_) = other {
if let TokenKind::AttributeKeyword(_) = self {
if let TokenKind::AttributeKeyword(_) = other {
return true;
}
}
if let TokenKind::DocComment(_) = self {
if let TokenKind::DocComment(_) = other {
if let TokenKind::StringLiteral(_) = self {
if let TokenKind::StringLiteral(_) = other {
return true;
}
}
if let TokenKind::Comment(_) = self {
if let TokenKind::Comment(_) = other {
return true;
}
}
@ -208,6 +216,7 @@ impl Display for TokenKind {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
TokenKind::Keyword(word) => write!(f, "KEYWORD({})", word),
TokenKind::AttributeKeyword(word) => write!(f, "ATTRIBUTE_KEYWORD({})", word),
TokenKind::Identifier(ident) => {
if ident.is_empty() {
write!(f, "IDENTIFIER")
@ -215,6 +224,8 @@ impl Display for TokenKind {
write!(f, "IDENTIFIER({})", ident)
}
}
TokenKind::EqualsSign => f.write_str("EQUALS_SIGN"),
TokenKind::PoundSign => f.write_str("POUND_SIGN"),
TokenKind::Colon => f.write_str("COLON"),
TokenKind::Semicolon => f.write_str("SEMICOLON"),
TokenKind::Comma => f.write_str("COMMA"),
@ -226,7 +237,7 @@ impl Display for TokenKind {
TokenKind::Dummy => f.write_str("DUMMY"),
TokenKind::SquareOpen => f.write_str("SQUAREOPEN"),
TokenKind::SquareClose => f.write_str("SQUARECLOSE"),
TokenKind::DocComment(text) => write!(f, "DOCCOMMENT({})", text),
TokenKind::StringLiteral(text) => write!(f, r#"STRING_LITERAL("{}")"#, text),
TokenKind::Comment(text) => write!(f, "COMMENT({})", text),
}
}
@ -249,6 +260,26 @@ pub enum Keyword {
r#enum,
}
/// Keywords used in attributes: (#[<keyword>(<value>)])
#[derive(Debug, PartialEq, PartialOrd, Ord, Eq, Clone, Copy)]
pub enum AttributeKeyword {
/// Derive a trait
#[allow(non_camel_case_types)]
derive,
/// Document the attached item
#[allow(non_camel_case_types)]
doc,
/// Mark the beginning of an error
#[allow(non_camel_case_types)]
error,
/// Encompass an error message
#[allow(non_camel_case_types)]
msg,
}
impl Display for Keyword {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
@ -260,6 +291,17 @@ impl Display for Keyword {
}
}
impl Display for AttributeKeyword {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
AttributeKeyword::derive => f.write_str("derive"),
AttributeKeyword::doc => f.write_str("doc"),
AttributeKeyword::error => f.write_str("error"),
AttributeKeyword::msg => f.write_str("msg"),
}
}
}
/// Shorthand macro for generating a token from *anything* which can be
/// converted into a `TokenKind`, or any of the `TokenKind` variants.
///
@ -267,11 +309,11 @@ impl Display for Keyword {
///
/// ```
/// use trixy_parser::token;
/// # fn main() {
///# fn main() {
/// token![mod];
/// token![;];
/// token![Arrow];
/// # }
///# }
/// ```
#[macro_export]
macro_rules! token {
@ -292,6 +334,10 @@ macro_rules! token {
[BraceClose] => { $crate::lexing::TokenKind::BraceClose };
// [}] => { $crate::lexing::TokenKind::BraceClose };
[ParenOpen] => { $crate::lexing::TokenKind::ParenOpen };
[PoundSign] => { $crate::lexing::TokenKind::PoundSign };
[#] => { $crate::lexing::TokenKind::PoundSign };
[EqualsSign] => { $crate::lexing::TokenKind::EqualsSign };
[=] => { $crate::lexing::TokenKind::EqualsSign };
// [(] => { $crate::lexing::TokenKind::ParenthesisOpen };
[ParenClose] => { $crate::lexing::TokenKind::ParenClose };
// [)] => { $crate::lexing::TokenKind::ParenthesisClose };
@ -301,13 +347,16 @@ macro_rules! token {
[struct] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#struct) };
[enum] => { $crate::lexing::TokenKind::Keyword($crate::lexing::Keyword::r#enum) };
// The `derive` here is completely arbitrary. It is only for comparisons (see `same_kind`)
[AttributeKeyword] => { $crate::lexing::TokenKind::AttributeKeyword($crate::lexing::AttributeKeyword::derive) };
// This is only works for checking for a identifier or comment
// see the `same_kind` method on TokenKind
[Ident] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
[Identifier] => { $crate::lexing::TokenKind::Identifier("".to_owned()) };
[DocComment] => { $crate::lexing::TokenKind::DocComment("".to_owned()) };
[DocCommentMatch] => { $crate::lexing::TokenKind::DocComment(_doc_comment) };
[StringLiteral] => { $crate::lexing::TokenKind::StringLiteral("".to_owned()) };
[Comment] => { $crate::lexing::TokenKind::Comment("".to_owned()) };
}

View File

@ -28,7 +28,7 @@ use crate::{
use super::{
error::{LexingError, SpannedLexingError},
Token, TokenKind,
AttributeKeyword, Token, TokenKind,
};
pub(super) struct Tokenizer<'a> {
@ -57,6 +57,20 @@ impl<'a> Tokenizer<'a> {
SpannedLexingError { source: e, context }
})?;
// if let TokenKind::StringLiteral(string) = &token_kind {
// if string == "" {
// eprintln!(
// "Got an empty StringLiteral '{}', with span: {}..{}",
// string,
// start,
// start + index
// );
// eprintln!(
// "Removing following text: '{}'\n",
// &self.remaining_text[..index],
// );
// }
// }
self.chomp(index); // end - start
let end = self.current_index;
@ -83,6 +97,10 @@ impl<'a> Tokenizer<'a> {
',' => (TokenKind::Comma, 1),
'<' => (TokenKind::SquareOpen, 1),
'>' => (TokenKind::SquareClose, 1),
'#' => (TokenKind::PoundSign, 1),
'=' => (TokenKind::EqualsSign, 1),
'"' => tokenize_literal_string(self.remaining_text, "\"")?,
'r' => try_to_tokenize_raw_literal_string(self.remaining_text)?,
'-' => tokenize_arrow(self.remaining_text)?,
'/' => tokenize_comment(self.remaining_text)?,
@ -167,38 +185,76 @@ fn tokenize_comment(text: &str) -> Result<(TokenKind, usize), LexingError> {
Err(LexingError::ExpectedComment)
} else {
let text: &str = &text[2..];
if let Some('/') = text.chars().next() {
let text = &text[1..];
if end_of_line(&text) {
Ok((TokenKind::DocComment("".to_owned()), 1 + 3))
} else {
let (doc_comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
// trim whitespace
let doc_comment = doc_comment.trim_start();
let doc_comment = doc_comment.trim_end();
Ok((
TokenKind::DocComment(doc_comment.to_owned()),
chars_read + 3,
))
}
if end_of_line(&text) {
Ok((TokenKind::Comment("".to_owned()), 1 + 2))
} else {
if end_of_line(&text) {
Ok((TokenKind::Comment("".to_owned()), 1 + 2))
} else {
let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
let (comment, chars_read) = take_while(text, |ch| ch != '\n' && ch != '\r')?;
// trim whitespace
let comment = comment.trim_start();
let comment = comment.trim_end();
// trim trailing whitespace (only at the end to avoid removing wanted whitespace)
let comment = comment.trim_end();
Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2))
}
Ok((TokenKind::Comment(comment.to_owned()), chars_read + 2))
}
}
}
/// We check if the r is the beginning of a literal string, otherwise, we tokenize a identifier
fn try_to_tokenize_raw_literal_string(text: &str) -> Result<(TokenKind, usize), LexingError> {
// remove the 'r' at the begining
let text_without_r = &text[1..];
let next_char = &text_without_r[0..1];
if next_char == "#" {
// The string is also escaped, count the hashtags
let (delimeter, chars_read) = take_while(text_without_r, |ch| ch == '#')?;
let delimeter = format!("\"{}", delimeter);
let (token, length) = tokenize_literal_string(&text_without_r[chars_read..], &delimeter)?;
// The 1 is the size of the extra 'r'
Ok((token, length + 1))
} else if next_char == "\"" {
// regular raw string literal
let (token, length) = tokenize_literal_string(text_without_r, "\"")?;
// The 1 is the size of the extra 'r'
Ok((token, length + 1))
} else {
// if the 'r' is not followed by either an '#' or a '"', it must be part of an identifier
tokenize_ident(text)
}
}
fn tokenize_literal_string(text: &str, delimeter: &str) -> Result<(TokenKind, usize), LexingError> {
// The first char is always a quote (")
assert_eq!(&text[..1], "\"");
let text_without_quote = &text[1..];
if &text_without_quote[0..delimeter.len()] == delimeter {
// eprintln!(
// "Got a direct delimeter, removing: '{}'",
// &text[..1 + delimeter.len()]
// );
// eprintln!("Next up to parse: '{}'\n", &text[1 + delimeter.len()..20]);
// The literal string does not contain anything
Ok((TokenKind::StringLiteral("".to_owned()), 1 + delimeter.len()))
} else {
let mut predicates: Vec<_> = delimeter
.chars()
.map(|ch| {
// eprintln!("Condition, which needs to match: |ch| ch == '{}'", ch);
move |ch2| ch2 == ch
})
.collect();
let (literal, chars_read) =
take_until_succesive_match(text_without_quote, &mut predicates)?;
// The second number read here is the last quote
Ok((
TokenKind::StringLiteral(literal.to_owned()),
chars_read + 1 + delimeter.len(),
))
}
}
fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> {
let (got, chars_read) = take_while(text, |ch| ch == '_' || ch.is_alphanumeric())?;
@ -208,6 +264,12 @@ fn tokenize_ident(text: &str) -> Result<(TokenKind, usize), LexingError> {
"fn" => TokenKind::Keyword(Keyword::r#fn),
"struct" => TokenKind::Keyword(Keyword::r#struct),
"enum" => TokenKind::Keyword(Keyword::r#enum),
"derive" => TokenKind::AttributeKeyword(AttributeKeyword::derive),
"doc" => TokenKind::AttributeKeyword(AttributeKeyword::doc),
"error" => TokenKind::AttributeKeyword(AttributeKeyword::error),
"msg" => TokenKind::AttributeKeyword(AttributeKeyword::msg),
other => TokenKind::Identifier(other.to_string()),
};
@ -252,6 +314,48 @@ where
Ok((&data[..current_index], current_index))
}
}
/// Consume bytes until all the predicates match in successive ways
fn take_until_succesive_match<'a, F>(
data: &'a str,
preds: &mut [F],
) -> Result<(&'a str, usize), LexingError>
where
F: FnMut(char) -> bool,
{
assert!(!preds.is_empty(), "Predicates need to be provided");
let mut current_index = 0;
'outer: for ch in data.chars() {
let should_stop = preds[0](ch);
current_index += ch.len_utf8();
if should_stop {
// eprintln!("First predicate did match char: {:#?}", ch);
if preds.len() == 1 {
// eprintln!("Only one predicate provided, which matched: {:#?}\n", ch);
break 'outer;
}
'inner: for pred in &mut preds[1..] {
let ch = &data.chars().nth(current_index).expect("This should exists");
// eprintln!("Checking pred with char: {:#?}", ch);
if pred(*ch) {
// eprintln!("Predicate did match char: {:#?}\n", ch);
break 'outer;
}
// eprintln!("Predicate did not match char: {:#?}\n", ch);
current_index += ch.len_utf8();
break 'inner;
}
}
}
if current_index == 0 {
Err(LexingError::NoMatchesTaken)
} else {
Ok((&data[..current_index], current_index))
}
}
/// Skips input until the remaining string pattern starts with the pattern
fn skip_until<'a>(mut src: &'a str, pattern: &str) -> &'a str {

View File

@ -24,7 +24,7 @@ use thiserror::Error;
use std::{error::Error, fmt::Display};
use crate::{
command_spec::checked::Identifier,
command_spec::{checked::Identifier, unchecked::Attribute},
error::{AdditionalHelp, ErrorContext, ErrorContextDisplay},
lexing::TokenSpan,
parsing::unchecked::error::SpannedParsingError as OldSpannedParsingError,
@ -63,6 +63,11 @@ pub enum ParsingError {
r#type: Identifier,
span: TokenSpan,
},
#[error("The {specified} attribute can't be used here!")]
WrongAttributeInPosition {
specified: Attribute,
span: TokenSpan,
},
}
impl ParsingError {
@ -74,6 +79,7 @@ impl ParsingError {
ParsingError::EnumWithNamespaceNamePascal { enum_span, .. } => enum_span,
ParsingError::NotEnoughGenericArgs { span, .. } => span,
ParsingError::TooManyGenericArgs { span, .. } => span,
ParsingError::WrongAttributeInPosition { span, .. } => span,
}
}
}
@ -87,6 +93,7 @@ impl AdditionalHelp for ParsingError {
| ParsingError::EnumWithNamespaceName {..} => "Change the name of this Enumeration as the generation process in trixy-macros needs to use this name".to_owned(),
ParsingError::NotEnoughGenericArgs { got, expected_min, .. } => format!("Add generic args until you have gone from {} to {}", got, expected_min),
ParsingError::TooManyGenericArgs { got, expected_max, .. } => format!("Remove generic args until you have gone from {} to {}", got, expected_max),
ParsingError::WrongAttributeInPosition { .. } => format!("Remove this attribute"),
}
}
}

View File

@ -19,7 +19,7 @@
* If not, see <https://www.gnu.org/licenses/>.
*/
use std::mem;
use std::{iter, mem};
use convert_case::{Case, Casing};
use trixy_types::BASE_TYPES;
@ -27,8 +27,8 @@ use trixy_types::BASE_TYPES;
use crate::{
command_spec::{
checked::{
CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier, NamedType,
Namespace, Structure, Type,
self, CommandSpec, DocIdentifier, DocNamedType, Enumeration, Function, Identifier,
NamedType, Namespace, Structure, Type,
},
unchecked::{
CommandSpec as UncheckedCommandSpec, DocNamedType as UncheckedDocNamedType,
@ -39,7 +39,7 @@ use crate::{
Variant,
},
error::ErrorContext,
lexing::{TokenKind, TokenSpan},
lexing::{Token, TokenKind, TokenSpan, TokenStream},
};
use self::error::{ParsingError, SpannedParsingError};
@ -48,6 +48,39 @@ pub mod error;
#[cfg(test)]
mod test;
macro_rules! take_attrs {
($name:expr, $($types:ident),*) => {
$name
.attributes
.into_iter()
.map(|val| {
take_attrs!{@process_val val, $($types),*}
})
.collect::<Result<Vec<_>, _>>()?
};
(@process_val_last $iden:ident) => {
{
let span = $iden.span();
return Err(ParsingError::WrongAttributeInPosition {
specified: $iden,
span,
});
}
};
(@process_val $iden:ident, $val:ident) => {
if let $crate::command_spec::unchecked::Attribute::$val{..} = $iden {
return Ok($iden.into());
};
take_attrs!{@process_val_last $iden}
};
(@process_val $iden:ident, $val:ident, $($other:tt),+ $(,)*) => {
if let $crate::command_spec::unchecked::Attribute::$val{..} = $iden {
return Ok($iden.into());
};
take_attrs!{@process_val $iden, $($other),*}
};
}
struct Parser {
command_spec: UncheckedCommandSpec,
structures: Vec<UncheckedStructure>,
@ -57,6 +90,7 @@ struct Parser {
impl UncheckedCommandSpec {
pub fn process(self, original_file: String) -> Result<CommandSpec, SpannedParsingError> {
let original_file = TokenStream::replace(&original_file).to_string();
let checked = Parser {
command_spec: self,
structures: vec![],
@ -68,12 +102,6 @@ impl UncheckedCommandSpec {
}
}
macro_rules! pass_attrs_along {
($name:ident) => {
$name.attributes.into_iter().map(|a| a.into()).collect()
};
}
impl Parser {
fn parse(mut self) -> Result<CommandSpec, SpannedParsingError> {
let namespace: UncheckedNamespace =
@ -142,7 +170,7 @@ impl Parser {
structures,
enumerations,
namespaces,
attributes: pass_attrs_along!(namespace),
attributes: take_attrs! {namespace, doc},
})
}
@ -165,7 +193,7 @@ impl Parser {
identifier,
inputs,
output,
attributes: pass_attrs_along!(function),
attributes: take_attrs! {function, doc},
})
}
@ -202,7 +230,7 @@ impl Parser {
mem::take(&mut state.token.kind).to_identifier(Variant::DocNamedType);
DocIdentifier {
name: ident.name,
attributes: pass_attrs_along!(state),
attributes: take_attrs! {state, doc, msg},
variant: Variant::DocNamedType,
}
})
@ -211,7 +239,7 @@ impl Parser {
Ok(Enumeration {
identifier,
states,
attributes: pass_attrs_along!(enumeration),
attributes: take_attrs! {enumeration, doc, derive, error},
})
}
@ -231,7 +259,7 @@ impl Parser {
Ok(Structure {
identifier,
contents,
attributes: pass_attrs_along!(structure),
attributes: take_attrs! {structure, doc, derive},
})
}
@ -254,7 +282,7 @@ impl Parser {
Ok(DocNamedType {
name,
r#type,
attributes: pass_attrs_along!(doc_named_type),
attributes: take_attrs! {doc_named_type, doc},
})
}

View File

@ -23,7 +23,7 @@ use std::{error::Error, fmt::Display};
use thiserror::Error;
use crate::{
command_spec::unchecked::Attribute,
command_spec::unchecked::{Attribute, StringLiteral},
error::{AdditionalHelp, ErrorContext, ErrorContextDisplay},
lexing::{TokenKind, TokenSpan},
};
@ -46,19 +46,23 @@ pub enum ParsingError {
#[error("Expected a Keyword to start a new declaration, but found: '{actual}'")]
ExpectedKeyword { actual: TokenKind, span: TokenSpan },
#[error("DocComment does not have target")]
TrailingDocComment {
#[error("Attribute does not have target")]
TrailingAttribute {
comments: Vec<Attribute>,
span: TokenSpan,
},
#[error("Derive value is not known")]
WrongDeriveValue { specified: StringLiteral },
}
impl ParsingError {
pub fn span(&self) -> &TokenSpan {
match self {
ParsingError::ExpectedDifferentToken { span, .. } => span,
ParsingError::ExpectedKeyword { span, .. } => span,
ParsingError::TrailingDocComment { span, .. } => span,
ParsingError::TrailingAttribute { span, .. } => span,
ParsingError::UnexpectedEOF { span, .. } => span,
ParsingError::WrongDeriveValue { specified } => &specified.span,
}
}
@ -81,8 +85,9 @@ impl AdditionalHelp for ParsingError {
ParsingError::ExpectedKeyword { actual, .. } => format!(
"I expected a keyword (that is something like 'fn' or 'mod') but you put a '{}' there!",
actual),
ParsingError::TrailingDocComment { .. } => "I expected some target (a function, namespace, enum, or something like this) which this doc comment annotates, but you put nothing there".to_owned(),
ParsingError::TrailingAttribute { .. } => "I expected some target (a function, namespace, enum, or something like this) which this attribute annotates, but you put nothing there".to_owned(),
ParsingError::UnexpectedEOF { expected, .. } => format!("Put the expected token ('{expected}') here."),
ParsingError::WrongDeriveValue { specified } => format!("'{}' is not a valid derive value! Take a look a the grammar file", specified.content),
}
}
}

View File

@ -19,15 +19,15 @@
* If not, see <https://www.gnu.org/licenses/>.
*/
use std::mem;
use std::{iter::once, mem};
use crate::{
command_spec::unchecked::{
Attribute, CommandSpec, Declaration, DocNamedType, DocToken, Enumeration, Function,
NamedType, Namespace, Structure, Type,
Attribute, CommandSpec, Declaration, DeriveValue, DocNamedType, DocToken, Enumeration,
Function, NamedType, Namespace, StringLiteral, Structure, Type,
},
error::ErrorContext,
lexing::{Token, TokenKind, TokenSpan, TokenStream},
lexing::{AttributeKeyword, Token, TokenKind, TokenSpan, TokenStream},
token,
};
@ -46,7 +46,7 @@ impl TokenStream {
pub(super) struct Parser {
token_stream: TokenStream,
active_doc_comments: Vec<Attribute>,
current_attributes: Vec<Attribute>,
last_span: TokenSpan,
}
@ -55,7 +55,7 @@ impl Parser {
token_stream.reverse();
Self {
token_stream,
active_doc_comments: vec![],
current_attributes: vec![],
last_span: TokenSpan::default(),
}
}
@ -89,38 +89,21 @@ impl Parser {
token![fn] => Ok(Declaration::Function(self.parse_function()?)),
token![struct] => Ok(Declaration::Structure(self.parse_structure()?)),
token![enum] => Ok(Declaration::Enumeration(self.parse_enumeration()?)),
token![DocCommentMatch] => {
while self.expect_peek(token![DocComment]) {
let comment_to_push = {
let doc_comment = self.expect(token![DocComment])?;
let span = *doc_comment.span();
let name = if let TokenKind::DocComment(content) = doc_comment.kind {
content
} else {
unreachable!("The expect should have accounted for that case");
};
Attribute::doc {
content: name,
span,
}
};
self.active_doc_comments.push(comment_to_push);
}
token![#] => {
let attributes = self.parse_attributes()?;
self.current_attributes.extend(attributes);
if self.token_stream.is_empty() {
fn get_span(attr: Option<&Attribute>) -> TokenSpan {
match attr.expect("Something should be here") {
Attribute::doc { span, .. } => *span,
}
attr.expect("Something should be here").span()
}
let span = TokenSpan::from_range(
get_span(self.active_doc_comments.first()),
get_span(self.active_doc_comments.last()),
get_span(self.current_attributes.first()),
get_span(self.current_attributes.last()),
);
Err(ParsingError::TrailingDocComment {
comments: mem::take(&mut self.active_doc_comments),
Err(ParsingError::TrailingAttribute {
comments: mem::take(&mut self.current_attributes),
span,
})
} else {
@ -159,29 +142,78 @@ impl Parser {
})
}
fn parse_doc_comments(&mut self) -> Result<Vec<Attribute>, ParsingError> {
let mut attrs = mem::take(&mut self.active_doc_comments);
fn parse_bracket_string_literal(&mut self) -> Result<StringLiteral, ParsingError> {
self.expect(token![CurvedBracketOpen])?;
let string_literal = self.expect(token![StringLiteral])?;
self.expect(token![CurvedBracketClose])?;
let string_literal = Into::<StringLiteral>::into(string_literal);
Ok(string_literal)
}
while self.expect_peek(token![DocComment]) {
attrs.push({
let doc_comment = self.expect(token![DocComment])?;
let span = *doc_comment.span();
let name = if let TokenKind::DocComment(content) = doc_comment.kind {
content
} else {
unreachable!("The expect should have accounted for that case");
};
Attribute::doc {
content: name,
span,
fn parse_attribute_value(&mut self) -> Result<Attribute, ParsingError> {
let ident = self.expect(token![AttributeKeyword])?;
let span = *ident.span();
let TokenKind::AttributeKeyword(keyword) = ident.kind() else {
unreachable!("This is checked in the `expect` above")
};
let attribute = match keyword {
AttributeKeyword::derive => {
let string_literal = self.parse_bracket_string_literal()?;
match string_literal.content.as_str() {
"Error" => Ok(Attribute::derive {
value: DeriveValue::Error,
span,
}),
_ => Err(error::ParsingError::WrongDeriveValue {
specified: string_literal,
}),
}
}
AttributeKeyword::doc => {
self.expect(token![=])?;
let string_literal = self.expect(token![StringLiteral])?;
let string_literal = Into::<StringLiteral>::into(string_literal);
if self.expect_peek(token![PoundSign]) {
dbg!(&self.token_stream);
}
Ok(Attribute::doc {
content: string_literal,
span,
})
}
AttributeKeyword::error => Ok(Attribute::error { span }),
AttributeKeyword::msg => {
let string_literal = self.parse_bracket_string_literal()?;
Ok(Attribute::msg {
content: string_literal,
span,
})
}
}?;
Ok(attribute)
}
fn parse_attributes(&mut self) -> Result<Vec<Attribute>, ParsingError> {
let mut attrs = mem::take(&mut self.current_attributes);
while self.expect_peek(token![#]) {
attrs.push({
self.expect(token![#])?;
self.expect(token![SquareBracketOpen])?;
let attribue = self.parse_attribute_value()?;
self.expect(token![SquareBracketClose])?;
attribue
});
}
Ok(attrs)
}
fn parse_namespace(&mut self) -> Result<Namespace, ParsingError> {
let attributes = self.parse_doc_comments()?;
let attributes = self.parse_attributes()?;
self.expect(token![mod])?;
let mut namespace = Namespace {
@ -209,29 +241,30 @@ impl Parser {
}
fn parse_enumeration(&mut self) -> Result<Enumeration, ParsingError> {
let attributes = self.parse_doc_comments()?;
let attributes = self.parse_attributes()?;
self.expect(token![enum])?;
let identifier = self.expect(token![Ident])?;
self.expect(token![BraceOpen])?;
let mut states = vec![];
if self.expect_peek(token![Ident]) {
let attributes = self.parse_doc_comments()?;
if self.expect_peek(token![Ident]) || self.expect_peek(token![#]) {
let attributes = self.parse_attributes()?;
states.push(DocToken {
token: self.expect(token![Ident])?,
attributes,
});
}
while self.expect_peek(token![Comma]) {
self.expect(token![Comma])?;
if self.expect_peek(token![Ident]) {
let attributes = self.parse_doc_comments()?;
states.push(DocToken {
token: self.expect(token![Ident])?,
attributes,
});
} else {
break;
while self.expect_peek(token![Comma]) {
self.expect(token![Comma])?;
if self.expect_peek(token![Ident]) || self.expect_peek(token![#]) {
let attributes = self.parse_attributes()?;
states.push(DocToken {
token: self.expect(token![Ident])?,
attributes,
});
} else {
break;
}
}
}
self.expect(token![BraceClose])?;
@ -243,7 +276,7 @@ impl Parser {
}
fn parse_structure(&mut self) -> Result<Structure, ParsingError> {
let attributes = self.parse_doc_comments()?;
let attributes = self.parse_attributes()?;
self.expect(token![struct])?;
let name = self.expect(token![Ident])?;
self.expect(token![BraceOpen])?;
@ -254,7 +287,7 @@ impl Parser {
}
while self.expect_peek(token![Comma]) {
self.expect(token![Comma])?;
if self.expect_peek(token![Ident]) || self.expect_peek(token![DocComment]) {
if self.expect_peek(token![Ident]) || self.expect_peek(token![PoundSign]) {
contents.push(self.parse_doc_named_type()?);
} else {
break;
@ -277,7 +310,7 @@ impl Parser {
}
fn parse_doc_named_type(&mut self) -> Result<DocNamedType, ParsingError> {
let attributes = self.parse_doc_comments()?;
let attributes = self.parse_attributes()?;
let name = self.expect(token![Ident])?;
self.expect(token![Colon])?;
let r#type = self.parse_type()?;
@ -289,7 +322,7 @@ impl Parser {
}
fn parse_function(&mut self) -> Result<Function, ParsingError> {
let attributes = self.parse_doc_comments()?;
let attributes = self.parse_attributes()?;
self.expect(token![fn])?;
let name = self.expect(token![Ident])?;
self.expect(token![ParenOpen])?;
@ -322,20 +355,20 @@ impl Parser {
/// For example:
///
/// ```dont_run
/// use trixy_lang_parser::{
/// lexing::{Keyword, TokenKind, TokenStream},
/// parsing::unchecked::Parser,
/// token,
/// };
///#use trixy_lang_parser::{
///# lexing::{Keyword, TokenKind, TokenStream},
///# parsing::unchecked::Parser,
///# token,
///#};
///
/// # fn main() {
/// let token_stream = TokenStream::lex("mod {}").unwrap();
/// let parser = Parser::new(token_stream);
/// assert_eq!(parser.expect(token![mod]).unwrap(), TokenKind::Keyword(Keyword::mod));
/// assert_eq!(parser.expect(token![BraceOpen]).unwrap(), TokenKind::BraceOpen);
/// assert_eq!(parser.expect(token![BraceClose]).unwrap(), TokenKind::BraceClose);
/// assert!(parser.expect(token![BraceClose]).is_err());
/// # }
///# fn main() {
/// let token_stream = TokenStream::lex("mod {}").unwrap();
/// let parser = Parser::new(token_stream);
/// assert_eq!(parser.expect(token![mod]).unwrap(), TokenKind::Keyword(Keyword::mod));
/// assert_eq!(parser.expect(token![CurlyBracketOpen]).unwrap(), TokenKind::BraceOpen);
/// assert_eq!(parser.expect(token![CurlyBracketClose]).unwrap(), TokenKind::BraceClose);
/// assert!(parser.expect(token![CurlyBracketClose]).is_err());
///# }
/// ```
///
pub(super) fn expect(&mut self, token: TokenKind) -> Result<Token, ParsingError> {