Started proper error recovery

This commit is contained in:
Eric-Paul Ickhorn 2023-11-28 22:21:23 +01:00
parent 9fa1d70eb2
commit 3c59e4fb93
4 changed files with 189 additions and 48 deletions

View File

@ -3,6 +3,7 @@
#define PARCEL_AST_H #define PARCEL_AST_H
#include <utility.h> #include <utility.h>
#include <logger.h>
#include <tokenizer.h> #include <tokenizer.h>
typedef struct pac_ast pac_ast_s; typedef struct pac_ast pac_ast_s;
@ -79,6 +80,18 @@ struct pac_ast_item
} data; } data;
}; };
typedef struct pac_ast_grower
{
pac_tlist_s *token_list;
pac_logger_s logger;
bool_t failed;
} pac_ast_grower_s;
pac_ast_s pac_grow_ast (pac_tlist_s tokens); pac_ast_s pac_grow_ast (pac_tlist_s tokens);
bool_t pac_ast_handle_invalid_reference_name_token (pac_ast_grower_s *grower);
bool_t pac_ast_handle_missing_reference_close_tag (pac_ast_grower_s *grower);
bool_t pac_ast_handle_reference_with_equals_sign (pac_ast_grower_s *grower);
#endif // PARCEL_AST_H #endif // PARCEL_AST_H

View File

@ -23,7 +23,7 @@ typedef enum
typedef enum typedef enum
{ {
PAC_RESTRICTED_ERROR_INVALID_RULE_NAME PAC_NAMING_ERROR_INVALID_RULE_NAME
} pac_naming_error_e; } pac_naming_error_e;

View File

@ -1,40 +1,48 @@
#include <parcel.h>
#include <ast.h> #include <ast.h>
#include <logger.h>
#include <tokenizer.h> #include <tokenizer.h>
#include <string.h> #include <string.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#define CURRENT_STRING &tlist->source[tlist->tokens[tlist->cursor].offset] #define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset]
#define CURRENT (tlist->tokens[tlist->cursor]) #define CURTOK (grower->token_list->tokens[grower->token_list->cursor])
#define SKIP_TOKEN ++tlist->cursor #define CURSOR (grower->token_list->cursor)
#define TOKEN_AT(index) (tlist->tokens[index]) #define SKIP_TOKEN ++grower->token_list->cursor
#define END_REACHED (tlist->cursor >= tlist->num_tokens) #define REWIND_TOKEN --grower->token_list->cursor
#define TOKEN_AT(index) (grower->token_list->tokens[index])
#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens)
i32_t pac_grow_reference(pac_tlist_s *tlist, pac_ast_reference_s *reference)
i32_t pac_grow_reference(pac_ast_grower_s *grower, pac_ast_reference_s *reference)
{ {
if(CURRENT.type != PAC_TOKEN_SIGN_OPEN_TAG) if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG)
{ {
// This is only possible with malformed state.
return -1; return -1;
} }
SKIP_TOKEN; SKIP_TOKEN;
if(CURTOK.type != PAC_TOKEN_WORD)
if(CURRENT.type != PAC_TOKEN_WORD)
{ {
return -1; return pac_ast_handle_invalid_reference_name_token(grower);
} }
usz_t len_name = CURRENT.length; usz_t len_name = CURTOK.length;
char *name = CURRENT_STRING; char *name = CURSTR;
SKIP_TOKEN; SKIP_TOKEN;
if(CURRENT.type != PAC_TOKEN_SIGN_CLOSE_TAG) if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG)
{ {
return -1; return pac_ast_handle_missing_reference_close_tag(grower);
} }
SKIP_TOKEN; SKIP_TOKEN;
if(CURTOK.type == PAC_TOKEN_SIGN_EQUALS)
{
return pac_ast_handle_reference_with_equals_sign(grower);
}
reference->len_name = len_name; reference->len_name = len_name;
reference->name = malloc(len_name+1); reference->name = malloc(len_name+1);
pac_memory_copy(reference->name, name, len_name); pac_memory_copy(reference->name, name, len_name);
@ -43,28 +51,28 @@ i32_t pac_grow_reference(pac_tlist_s *tlist, pac_ast_reference_s *reference)
return 3; return 3;
} }
i32_t pac_grow_item(pac_tlist_s *tlist, pac_ast_item_s *item) i32_t pac_grow_item(pac_ast_grower_s *grower, pac_ast_item_s *item)
{ {
item->type = PAC_AST_ITEM_INVALID; item->type = PAC_AST_ITEM_INVALID;
pac_memory_zero(item, sizeof(pac_ast_item_s)); pac_memory_zero(item, sizeof(pac_ast_item_s));
if(CURRENT.type == PAC_TOKEN_LIT_STRING) if(CURTOK.type == PAC_TOKEN_LIT_STRING)
{ {
item->type = PAC_AST_ITEM_LITERAL; item->type = PAC_AST_ITEM_LITERAL;
item->data.literal.length = CURRENT.length; item->data.literal.length = CURTOK.length;
item->data.literal.string = malloc(item->data.literal.length + 1); item->data.literal.string = malloc(item->data.literal.length + 1);
pac_memory_copy(item->data.literal.string, CURRENT_STRING, CURRENT.length); pac_memory_copy(item->data.literal.string, CURSTR, CURTOK.length);
item->data.literal.string[item->data.literal.length] = 0x00; item->data.literal.string[item->data.literal.length] = 0x00;
SKIP_TOKEN; SKIP_TOKEN;
return 1; return 1;
} }
if(CURRENT.type == PAC_TOKEN_SIGN_OPEN_TAG) if(CURTOK.type == PAC_TOKEN_SIGN_OPEN_TAG)
{ {
item->type = PAC_AST_ITEM_REFERENCE; item->type = PAC_AST_ITEM_REFERENCE;
return pac_grow_reference(tlist, &item->data.reference); return pac_grow_reference(grower, &item->data.reference);
} }
if(CURRENT.type == PAC_TOKEN_KEYWORD_WORD) if(CURTOK.type == PAC_TOKEN_KEYWORD_WORD)
{ {
item->type = PAC_AST_ITEM_SET; item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_WORD; item->data.set = PAC_AST_SET_WORD;
@ -72,7 +80,7 @@ i32_t pac_grow_item(pac_tlist_s *tlist, pac_ast_item_s *item)
return 1; return 1;
} }
if(CURRENT.type == PAC_TOKEN_KEYWORD_INTEGER) if(CURTOK.type == PAC_TOKEN_KEYWORD_INTEGER)
{ {
item->type = PAC_AST_ITEM_SET; item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_INTEGER; item->data.set = PAC_AST_SET_INTEGER;
@ -82,9 +90,9 @@ i32_t pac_grow_item(pac_tlist_s *tlist, pac_ast_item_s *item)
return -1; return -1;
} }
i32_t pac_grow_variant(pac_tlist_s *tlist, pac_ast_variant_s *variant) i32_t pac_grow_variant(pac_ast_grower_s *grower, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index)
{ {
usz_t start_index = tlist->cursor; usz_t start_index = CURSOR;
pac_memory_zero(variant, sizeof(pac_ast_variant_s)); pac_memory_zero(variant, sizeof(pac_ast_variant_s));
@ -97,7 +105,7 @@ i32_t pac_grow_variant(pac_tlist_s *tlist, pac_ast_variant_s *variant)
items_capacity *= 2; items_capacity *= 2;
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity); variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
} }
i32_t success = pac_grow_item(tlist, &variant->items[variant->num_items]); i32_t success = pac_grow_item(grower, &variant->items[variant->num_items]);
++variant->num_items; ++variant->num_items;
if(success < 0) if(success < 0)
@ -105,13 +113,13 @@ i32_t pac_grow_variant(pac_tlist_s *tlist, pac_ast_variant_s *variant)
return success - 1; return success - 1;
} }
if( if(
(CURRENT.type == PAC_TOKEN_SIGN_VERTICAL_BAR) (CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
|| (CURRENT.type == PAC_TOKEN_SIGN_SEMICOLON) || (CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON)
) { ) {
return tlist->cursor - start_index; return CURSOR - start_index;
} }
if(CURRENT.type != PAC_TOKEN_SIGN_COMMA) if(CURTOK.type != PAC_TOKEN_SIGN_COMMA)
{ {
return -1; return -1;
} }
@ -120,37 +128,37 @@ i32_t pac_grow_variant(pac_tlist_s *tlist, pac_ast_variant_s *variant)
return -1; return -1;
} }
i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule) i32_t pac_grow_rule(pac_ast_grower_s *grower, pac_ast_rule_s *rule)
{ {
pac_memory_zero(rule, sizeof(pac_ast_rule_s)); pac_memory_zero(rule, sizeof(pac_ast_rule_s));
// Parse the header // Parse the header
usz_t start_index = tlist->cursor; usz_t start_index = CURSOR;
if(CURRENT.type != PAC_TOKEN_SIGN_OPEN_TAG) if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG)
{ {
return -1; return -1;
} }
SKIP_TOKEN; SKIP_TOKEN;
if(CURRENT.type != PAC_TOKEN_WORD) if(CURTOK.type != PAC_TOKEN_WORD)
{ {
puts("A rule name must be a single word!"); puts("A rule name must be a single word!");
return -1; return -1;
} }
usz_t len_name = CURRENT.length; usz_t len_name = CURTOK.length;
usz_t name_start = CURRENT.offset; usz_t name_in_source = CURSTR;
SKIP_TOKEN; SKIP_TOKEN;
if(CURRENT.type != PAC_TOKEN_SIGN_CLOSE_TAG) if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG)
{ {
puts("Missing Tag closing sign!"); puts("Missing Tag closing sign!");
return -1; return -1;
} }
SKIP_TOKEN; SKIP_TOKEN;
if(CURRENT.type != PAC_TOKEN_SIGN_EQUALS) if(CURTOK.type != PAC_TOKEN_SIGN_EQUALS)
return -1; return -1;
SKIP_TOKEN; SKIP_TOKEN;
@ -158,7 +166,7 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule)
// Parse all variants // Parse all variants
rule->name = malloc(len_name + 1); rule->name = malloc(len_name + 1);
pac_memory_copy(rule->name, &tlist->source[name_start], len_name); pac_memory_copy(rule->name, name_in_source, len_name);
rule->name[len_name] = 0; rule->name[len_name] = 0;
usz_t variants_capacity = 4; usz_t variants_capacity = 4;
@ -170,7 +178,7 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule)
variants_capacity *= 2; variants_capacity *= 2;
rule->variants = realloc(rule->variants, sizeof(pac_ast_variant_s) * variants_capacity); rule->variants = realloc(rule->variants, sizeof(pac_ast_variant_s) * variants_capacity);
} }
i32_t success = pac_grow_variant(tlist, &rule->variants[rule->num_variants]); i32_t success = pac_grow_variant(grower, &rule->variants[rule->num_variants], rule->name, rule->num_variants);
++rule->num_variants; ++rule->num_variants;
if(success < 0) if(success < 0)
@ -178,12 +186,12 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule)
printf("Failed parsing a rule's variant!"); printf("Failed parsing a rule's variant!");
while(!END_REACHED) while(!END_REACHED)
{ {
if(CURRENT.type == PAC_TOKEN_SIGN_VERTICAL_BAR) if(CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
{ {
printf("Continuing with next variant.\n"); printf("Continuing with next variant.\n");
break; break;
} }
if(CURRENT.type == PAC_TOKEN_SIGN_SEMICOLON) if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON)
{ {
printf("Continuing with next rule.\n"); printf("Continuing with next rule.\n");
SKIP_TOKEN; SKIP_TOKEN;
@ -194,13 +202,13 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule)
continue; continue;
} }
if(CURRENT.type == PAC_TOKEN_SIGN_SEMICOLON) if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON)
{ {
SKIP_TOKEN; SKIP_TOKEN;
return tlist->cursor - start_index; return CURSOR - start_index;
} }
if(CURRENT.type != PAC_TOKEN_SIGN_VERTICAL_BAR) if(CURTOK.type != PAC_TOKEN_SIGN_VERTICAL_BAR)
{ {
return -1; return -1;
} }
@ -216,6 +224,11 @@ pac_ast_s pac_grow_ast(pac_tlist_s tokens)
ast.num_rules = 0; ast.num_rules = 0;
ast.rules = malloc(sizeof(pac_ast_rule_s) * rules_capacity); ast.rules = malloc(sizeof(pac_ast_rule_s) * rules_capacity);
pac_ast_grower_s grower;
grower.logger = pac_create_logger();
grower.token_list = &tokens;
grower.failed = FALSE;
while(tokens.cursor < tokens.num_tokens) while(tokens.cursor < tokens.num_tokens)
{ {
if(ast.num_rules >= rules_capacity) if(ast.num_rules >= rules_capacity)
@ -223,7 +236,7 @@ pac_ast_s pac_grow_ast(pac_tlist_s tokens)
rules_capacity *= 2; rules_capacity *= 2;
ast.rules = realloc(ast.rules, sizeof(pac_ast_rule_s) * rules_capacity); ast.rules = realloc(ast.rules, sizeof(pac_ast_rule_s) * rules_capacity);
} }
int success = pac_grow_rule(&tokens, &ast.rules[ast.num_rules]); int success = pac_grow_rule(&grower, &ast.rules[ast.num_rules]);
if(success < 0) if(success < 0)
{ {
printf("Failed parsing a rule at index: %lu!\n", tokens.cursor); printf("Failed parsing a rule at index: %lu!\n", tokens.cursor);

115
code/src/ast_errors.c Normal file
View File

@ -0,0 +1,115 @@
#include <ast.h>
#include <logger.h>
#include <tokenizer.h>
#define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset]
#define CURTOK (grower->token_list->tokens[grower->token_list->cursor])
#define CURSOR (grower->token_list->cursor)
#define SKIP_TOKEN ++grower->token_list->cursor
#define REWIND_TOKEN --grower->token_list->cursor
#define TOKEN_AT(index) (grower->token_list->tokens[(index)])
#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens)
// pac_ast_builder_is_at_item_start: An utility function which returns wheter
// the parser is at the start of an item at in the current state.
bool_t pac_ast_builder_is_at_item_start(pac_ast_grower_s *grower)
{
switch(CURTOK.type)
{
case PAC_TOKEN_LIT_STRING:
case PAC_TOKEN_SIGN_OPEN_TAG:
case PAC_TOKEN_KEYWORD_WORD:
case PAC_TOKEN_KEYWORD_INTEGER:
return TRUE;
default: break;
}
return FALSE;
}
// Returns whether the parser can continue to catch errors
bool_t pac_ast_handle_invalid_reference_name_token(pac_ast_grower_s *grower)
{
usz_t open_tag_src_offset = TOKEN_AT(CURSOR-1).offset;
usz_t len_reference_name = 0;
// Find closing tag for getting the name of the word for the error message.
usz_t tried_tokens = 0;
while(tried_tokens < 3)
{
if(TOKEN_AT(CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSE_TAG)
{
len_reference_name = TOKEN_AT(CURSOR + tried_tokens).offset - open_tag_src_offset;
++len_reference_name; // Take the closing tag into the name
break;
}
++tried_tokens;
}
// If no closing tag could be found, use the token after the opening tag.
if(len_reference_name == 0)
len_reference_name = ((CURTOK.offset + CURTOK.length) - open_tag_src_offset);
pac_naming_error_s error;
error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME;
error.line = CURTOK.line;
error.column = CURTOK.column;
error.specifics.invalid_rule_name.given_rule_name = pac_log_alloc(&grower->logger, len_reference_name + 1);
pac_memory_copy(error.specifics.invalid_rule_name.given_rule_name, &grower->token_list->source[open_tag_src_offset], len_reference_name);
pac_log_naming_error(&grower->logger, error);
grower->failed = TRUE;
return TRUE;
}
// Returns whether the parser can continue to catch errors
bool_t pac_ast_handle_missing_reference_close_tag(pac_ast_grower_s *grower)
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_MISSING_TOKEN;
error.line = CURTOK.line;
error.column = CURTOK.column;
error.specifics.missing_token.hint = NULL;
error.specifics.missing_token.wanted_token = "Tag Closer (>)";
pac_log_syntax_error(&grower->logger, error);
return TRUE;
}
// Returns whether the parser can continue to catch errors
bool_t pac_ast_handle_reference_with_equals_sign(pac_ast_grower_s *grower)
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.line = CURTOK.line;
error.column = CURTOK.column;
error.specifics.odd_token.hint = "There might be a semicolon missing 4 tokens ago.";
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
pac_log_syntax_error(&grower->logger, error);
// TODO: Handle the tokens following this as a new rule.
return FALSE;
}
// Returns whether the parser can continue to catch errors
bool_t pac_ast_handle_unknown_item_type(pac_ast_grower_s *grower)
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.line = CURTOK.line;
error.column = CURTOK.column;
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
pac_log_syntax_error(&grower->logger, error);
while(!pac_ast_builder_is_at_item_start(grower))
SKIP_TOKEN;
return TRUE;
}