From 3c59e4fb9321a99982fe6adaccb056672667ab0d Mon Sep 17 00:00:00 2001 From: Eric-Paul Ickhorn Date: Tue, 28 Nov 2023 22:21:23 +0100 Subject: [PATCH] Started proper error recovery --- code/inc/ast.h | 15 +++++- code/inc/logger.h | 2 +- code/src/ast.c | 105 +++++++++++++++++++++----------------- code/src/ast_errors.c | 115 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 189 insertions(+), 48 deletions(-) create mode 100644 code/src/ast_errors.c diff --git a/code/inc/ast.h b/code/inc/ast.h index 813a27c..1a75d8d 100644 --- a/code/inc/ast.h +++ b/code/inc/ast.h @@ -3,6 +3,7 @@ #define PARCEL_AST_H #include +#include #include typedef struct pac_ast pac_ast_s; @@ -79,6 +80,18 @@ struct pac_ast_item } data; }; -pac_ast_s pac_grow_ast (pac_tlist_s tokens); +typedef struct pac_ast_grower +{ + pac_tlist_s *token_list; + pac_logger_s logger; + bool_t failed; + +} pac_ast_grower_s; + +pac_ast_s pac_grow_ast (pac_tlist_s tokens); + +bool_t pac_ast_handle_invalid_reference_name_token (pac_ast_grower_s *grower); +bool_t pac_ast_handle_missing_reference_close_tag (pac_ast_grower_s *grower); +bool_t pac_ast_handle_reference_with_equals_sign (pac_ast_grower_s *grower); #endif // PARCEL_AST_H diff --git a/code/inc/logger.h b/code/inc/logger.h index 51ce7e0..ea59cfe 100644 --- a/code/inc/logger.h +++ b/code/inc/logger.h @@ -23,7 +23,7 @@ typedef enum typedef enum { - PAC_RESTRICTED_ERROR_INVALID_RULE_NAME + PAC_NAMING_ERROR_INVALID_RULE_NAME } pac_naming_error_e; diff --git a/code/src/ast.c b/code/src/ast.c index d99c07e..45755ad 100644 --- a/code/src/ast.c +++ b/code/src/ast.c @@ -1,40 +1,48 @@ -#include #include +#include #include #include #include #include -#define CURRENT_STRING &tlist->source[tlist->tokens[tlist->cursor].offset] -#define CURRENT (tlist->tokens[tlist->cursor]) -#define SKIP_TOKEN ++tlist->cursor -#define TOKEN_AT(index) (tlist->tokens[index]) -#define END_REACHED (tlist->cursor >= tlist->num_tokens) +#define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset] +#define CURTOK (grower->token_list->tokens[grower->token_list->cursor]) +#define CURSOR (grower->token_list->cursor) +#define SKIP_TOKEN ++grower->token_list->cursor +#define REWIND_TOKEN --grower->token_list->cursor +#define TOKEN_AT(index) (grower->token_list->tokens[index]) +#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens) -i32_t pac_grow_reference(pac_tlist_s *tlist, pac_ast_reference_s *reference) + +i32_t pac_grow_reference(pac_ast_grower_s *grower, pac_ast_reference_s *reference) { - if(CURRENT.type != PAC_TOKEN_SIGN_OPEN_TAG) + if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG) { + // This is only possible with malformed state. return -1; } SKIP_TOKEN; - - if(CURRENT.type != PAC_TOKEN_WORD) + if(CURTOK.type != PAC_TOKEN_WORD) { - return -1; + return pac_ast_handle_invalid_reference_name_token(grower); } - usz_t len_name = CURRENT.length; - char *name = CURRENT_STRING; + usz_t len_name = CURTOK.length; + char *name = CURSTR; SKIP_TOKEN; - if(CURRENT.type != PAC_TOKEN_SIGN_CLOSE_TAG) + if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG) { - return -1; + return pac_ast_handle_missing_reference_close_tag(grower); } SKIP_TOKEN; + if(CURTOK.type == PAC_TOKEN_SIGN_EQUALS) + { + return pac_ast_handle_reference_with_equals_sign(grower); + } + reference->len_name = len_name; reference->name = malloc(len_name+1); pac_memory_copy(reference->name, name, len_name); @@ -43,28 +51,28 @@ i32_t pac_grow_reference(pac_tlist_s *tlist, pac_ast_reference_s *reference) return 3; } -i32_t pac_grow_item(pac_tlist_s *tlist, pac_ast_item_s *item) +i32_t pac_grow_item(pac_ast_grower_s *grower, pac_ast_item_s *item) { item->type = PAC_AST_ITEM_INVALID; pac_memory_zero(item, sizeof(pac_ast_item_s)); - if(CURRENT.type == PAC_TOKEN_LIT_STRING) + if(CURTOK.type == PAC_TOKEN_LIT_STRING) { item->type = PAC_AST_ITEM_LITERAL; - item->data.literal.length = CURRENT.length; + item->data.literal.length = CURTOK.length; item->data.literal.string = malloc(item->data.literal.length + 1); - pac_memory_copy(item->data.literal.string, CURRENT_STRING, CURRENT.length); + pac_memory_copy(item->data.literal.string, CURSTR, CURTOK.length); item->data.literal.string[item->data.literal.length] = 0x00; SKIP_TOKEN; return 1; } - if(CURRENT.type == PAC_TOKEN_SIGN_OPEN_TAG) + if(CURTOK.type == PAC_TOKEN_SIGN_OPEN_TAG) { item->type = PAC_AST_ITEM_REFERENCE; - return pac_grow_reference(tlist, &item->data.reference); + return pac_grow_reference(grower, &item->data.reference); } - if(CURRENT.type == PAC_TOKEN_KEYWORD_WORD) + if(CURTOK.type == PAC_TOKEN_KEYWORD_WORD) { item->type = PAC_AST_ITEM_SET; item->data.set = PAC_AST_SET_WORD; @@ -72,7 +80,7 @@ i32_t pac_grow_item(pac_tlist_s *tlist, pac_ast_item_s *item) return 1; } - if(CURRENT.type == PAC_TOKEN_KEYWORD_INTEGER) + if(CURTOK.type == PAC_TOKEN_KEYWORD_INTEGER) { item->type = PAC_AST_ITEM_SET; item->data.set = PAC_AST_SET_INTEGER; @@ -82,9 +90,9 @@ i32_t pac_grow_item(pac_tlist_s *tlist, pac_ast_item_s *item) return -1; } -i32_t pac_grow_variant(pac_tlist_s *tlist, pac_ast_variant_s *variant) +i32_t pac_grow_variant(pac_ast_grower_s *grower, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index) { - usz_t start_index = tlist->cursor; + usz_t start_index = CURSOR; pac_memory_zero(variant, sizeof(pac_ast_variant_s)); @@ -97,7 +105,7 @@ i32_t pac_grow_variant(pac_tlist_s *tlist, pac_ast_variant_s *variant) items_capacity *= 2; variant->items = calloc(sizeof(pac_ast_item_s), items_capacity); } - i32_t success = pac_grow_item(tlist, &variant->items[variant->num_items]); + i32_t success = pac_grow_item(grower, &variant->items[variant->num_items]); ++variant->num_items; if(success < 0) @@ -105,13 +113,13 @@ i32_t pac_grow_variant(pac_tlist_s *tlist, pac_ast_variant_s *variant) return success - 1; } if( - (CURRENT.type == PAC_TOKEN_SIGN_VERTICAL_BAR) - || (CURRENT.type == PAC_TOKEN_SIGN_SEMICOLON) + (CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR) + || (CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON) ) { - return tlist->cursor - start_index; + return CURSOR - start_index; } - if(CURRENT.type != PAC_TOKEN_SIGN_COMMA) + if(CURTOK.type != PAC_TOKEN_SIGN_COMMA) { return -1; } @@ -120,37 +128,37 @@ i32_t pac_grow_variant(pac_tlist_s *tlist, pac_ast_variant_s *variant) return -1; } -i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule) +i32_t pac_grow_rule(pac_ast_grower_s *grower, pac_ast_rule_s *rule) { pac_memory_zero(rule, sizeof(pac_ast_rule_s)); // Parse the header - usz_t start_index = tlist->cursor; + usz_t start_index = CURSOR; - if(CURRENT.type != PAC_TOKEN_SIGN_OPEN_TAG) + if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG) { return -1; } SKIP_TOKEN; - if(CURRENT.type != PAC_TOKEN_WORD) + if(CURTOK.type != PAC_TOKEN_WORD) { puts("A rule name must be a single word!"); return -1; } - usz_t len_name = CURRENT.length; - usz_t name_start = CURRENT.offset; + usz_t len_name = CURTOK.length; + usz_t name_in_source = CURSTR; SKIP_TOKEN; - if(CURRENT.type != PAC_TOKEN_SIGN_CLOSE_TAG) + if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG) { puts("Missing Tag closing sign!"); return -1; } SKIP_TOKEN; - if(CURRENT.type != PAC_TOKEN_SIGN_EQUALS) + if(CURTOK.type != PAC_TOKEN_SIGN_EQUALS) return -1; SKIP_TOKEN; @@ -158,7 +166,7 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule) // Parse all variants rule->name = malloc(len_name + 1); - pac_memory_copy(rule->name, &tlist->source[name_start], len_name); + pac_memory_copy(rule->name, name_in_source, len_name); rule->name[len_name] = 0; usz_t variants_capacity = 4; @@ -170,7 +178,7 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule) variants_capacity *= 2; rule->variants = realloc(rule->variants, sizeof(pac_ast_variant_s) * variants_capacity); } - i32_t success = pac_grow_variant(tlist, &rule->variants[rule->num_variants]); + i32_t success = pac_grow_variant(grower, &rule->variants[rule->num_variants], rule->name, rule->num_variants); ++rule->num_variants; if(success < 0) @@ -178,12 +186,12 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule) printf("Failed parsing a rule's variant!"); while(!END_REACHED) { - if(CURRENT.type == PAC_TOKEN_SIGN_VERTICAL_BAR) + if(CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR) { printf("Continuing with next variant.\n"); break; } - if(CURRENT.type == PAC_TOKEN_SIGN_SEMICOLON) + if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON) { printf("Continuing with next rule.\n"); SKIP_TOKEN; @@ -194,13 +202,13 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule) continue; } - if(CURRENT.type == PAC_TOKEN_SIGN_SEMICOLON) + if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON) { SKIP_TOKEN; - return tlist->cursor - start_index; + return CURSOR - start_index; } - if(CURRENT.type != PAC_TOKEN_SIGN_VERTICAL_BAR) + if(CURTOK.type != PAC_TOKEN_SIGN_VERTICAL_BAR) { return -1; } @@ -216,6 +224,11 @@ pac_ast_s pac_grow_ast(pac_tlist_s tokens) ast.num_rules = 0; ast.rules = malloc(sizeof(pac_ast_rule_s) * rules_capacity); + pac_ast_grower_s grower; + grower.logger = pac_create_logger(); + grower.token_list = &tokens; + grower.failed = FALSE; + while(tokens.cursor < tokens.num_tokens) { if(ast.num_rules >= rules_capacity) @@ -223,7 +236,7 @@ pac_ast_s pac_grow_ast(pac_tlist_s tokens) rules_capacity *= 2; ast.rules = realloc(ast.rules, sizeof(pac_ast_rule_s) * rules_capacity); } - int success = pac_grow_rule(&tokens, &ast.rules[ast.num_rules]); + int success = pac_grow_rule(&grower, &ast.rules[ast.num_rules]); if(success < 0) { printf("Failed parsing a rule at index: %lu!\n", tokens.cursor); diff --git a/code/src/ast_errors.c b/code/src/ast_errors.c new file mode 100644 index 0000000..a1b34ea --- /dev/null +++ b/code/src/ast_errors.c @@ -0,0 +1,115 @@ +#include +#include +#include + +#define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset] +#define CURTOK (grower->token_list->tokens[grower->token_list->cursor]) +#define CURSOR (grower->token_list->cursor) +#define SKIP_TOKEN ++grower->token_list->cursor +#define REWIND_TOKEN --grower->token_list->cursor +#define TOKEN_AT(index) (grower->token_list->tokens[(index)]) +#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens) + + + +// pac_ast_builder_is_at_item_start: An utility function which returns wheter +// the parser is at the start of an item at in the current state. +bool_t pac_ast_builder_is_at_item_start(pac_ast_grower_s *grower) +{ + switch(CURTOK.type) + { + case PAC_TOKEN_LIT_STRING: + case PAC_TOKEN_SIGN_OPEN_TAG: + case PAC_TOKEN_KEYWORD_WORD: + case PAC_TOKEN_KEYWORD_INTEGER: + return TRUE; + default: break; + } + return FALSE; +} + + + +// Returns whether the parser can continue to catch errors +bool_t pac_ast_handle_invalid_reference_name_token(pac_ast_grower_s *grower) +{ + usz_t open_tag_src_offset = TOKEN_AT(CURSOR-1).offset; + usz_t len_reference_name = 0; + + // Find closing tag for getting the name of the word for the error message. + usz_t tried_tokens = 0; + while(tried_tokens < 3) + { + if(TOKEN_AT(CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSE_TAG) + { + len_reference_name = TOKEN_AT(CURSOR + tried_tokens).offset - open_tag_src_offset; + ++len_reference_name; // Take the closing tag into the name + break; + } + ++tried_tokens; + } + // If no closing tag could be found, use the token after the opening tag. + if(len_reference_name == 0) + len_reference_name = ((CURTOK.offset + CURTOK.length) - open_tag_src_offset); + + pac_naming_error_s error; + error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME; + error.line = CURTOK.line; + error.column = CURTOK.column; + error.specifics.invalid_rule_name.given_rule_name = pac_log_alloc(&grower->logger, len_reference_name + 1); + pac_memory_copy(error.specifics.invalid_rule_name.given_rule_name, &grower->token_list->source[open_tag_src_offset], len_reference_name); + pac_log_naming_error(&grower->logger, error); + + + grower->failed = TRUE; + return TRUE; +} + +// Returns whether the parser can continue to catch errors +bool_t pac_ast_handle_missing_reference_close_tag(pac_ast_grower_s *grower) +{ + pac_syntax_error_s error; + error.type = PAC_SYNTAX_ERROR_MISSING_TOKEN; + error.line = CURTOK.line; + error.column = CURTOK.column; + error.specifics.missing_token.hint = NULL; + error.specifics.missing_token.wanted_token = "Tag Closer (>)"; + pac_log_syntax_error(&grower->logger, error); + + return TRUE; +} + +// Returns whether the parser can continue to catch errors +bool_t pac_ast_handle_reference_with_equals_sign(pac_ast_grower_s *grower) +{ + pac_syntax_error_s error; + error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; + error.line = CURTOK.line; + error.column = CURTOK.column; + error.specifics.odd_token.hint = "There might be a semicolon missing 4 tokens ago."; + error.specifics.odd_token.num_valid_options = 1; + error.specifics.odd_token.valid_options[0] = "Any Rule Item"; + pac_log_syntax_error(&grower->logger, error); + + // TODO: Handle the tokens following this as a new rule. + + return FALSE; +} + + + +// Returns whether the parser can continue to catch errors +bool_t pac_ast_handle_unknown_item_type(pac_ast_grower_s *grower) +{ + pac_syntax_error_s error; + error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; + error.line = CURTOK.line; + error.column = CURTOK.column; + error.specifics.odd_token.num_valid_options = 1; + error.specifics.odd_token.valid_options[0] = "Any Rule Item"; + pac_log_syntax_error(&grower->logger, error); + while(!pac_ast_builder_is_at_item_start(grower)) + SKIP_TOKEN; + + return TRUE; +}