From b8f4fd9feb70ab4798084b2f13e55bf73522d3dc Mon Sep 17 00:00:00 2001 From: Eric-Paul Ickhorn Date: Wed, 29 Nov 2023 00:26:56 +0100 Subject: [PATCH] Added half-way decent error handling --- core-parser/inc/ast.h | 77 ++++++---- core-parser/inc/tokenizer.h | 3 - core-parser/src/ast.c | 285 ++++++++++++++++++++++------------- core-parser/src/ast_errors.c | 171 +++++++++++++++------ core-parser/src/grammar.c | 2 +- core-parser/src/tokenizer.c | 1 - 6 files changed, 350 insertions(+), 189 deletions(-) diff --git a/core-parser/inc/ast.h b/core-parser/inc/ast.h index 1a75d8d..1fd286c 100644 --- a/core-parser/inc/ast.h +++ b/core-parser/inc/ast.h @@ -6,6 +6,43 @@ #include #include +// pac_ast_set_e: An enumeration of all sets known in Parcel's AST. +// +// Sets are descriptions which describe a rough format of token, like +// with variable names; the format is known, but the actual name isn't. +typedef enum +{ + PAC_AST_SET_RUNE, + PAC_AST_SET_WORD, + PAC_AST_SET_INTEGER, + PAC_AST_SET_FLOAT + +} pac_ast_set_e; + +typedef enum +{ + PAC_AST_ITEM_INVALID = 0x00, + PAC_AST_ITEM_REFERENCE, + PAC_AST_ITEM_LITERAL, + PAC_AST_ITEM_SET + +} pac_ast_item_e; + +// pac_ast_recovery_level_e: How much higher in the call stack the program +// flow has to go to be able to recover. +typedef enum +{ + PAC_AST_STATUS_SUCCESS, + PAC_AST_STATUS_ERROR_HANDLED, + PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER, + PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER, + PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER, + PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE, + PAC_AST_STATUS_NOT_RECOVERABLE, + PAC_AST_STATUS_UNEXPECTED_FILE_END + +} pac_ast_status_e; + typedef struct pac_ast pac_ast_s; typedef struct pac_ast_rule pac_ast_rule_s; typedef struct pac_ast_variant pac_ast_variant_s; @@ -47,28 +84,6 @@ struct pac_ast_literal char *string; }; -// pac_ast_set_e: An enumeration of all sets known in Parcel's AST. -// -// Sets are descriptions which describe a rough format of token, like -// with variable names; the format is known, but the actual name isn't. -typedef enum -{ - PAC_AST_SET_RUNE, - PAC_AST_SET_WORD, - PAC_AST_SET_INTEGER, - PAC_AST_SET_FLOAT - -} pac_ast_set_e; - -typedef enum -{ - PAC_AST_ITEM_INVALID = 0x00, - PAC_AST_ITEM_REFERENCE, - PAC_AST_ITEM_LITERAL, - PAC_AST_ITEM_SET - -} pac_ast_item_e; - struct pac_ast_item { pac_ast_item_e type; @@ -80,18 +95,22 @@ struct pac_ast_item } data; }; -typedef struct pac_ast_grower +typedef struct pac_ast_builder { + usz_t cursor; pac_tlist_s *token_list; - pac_logger_s logger; bool_t failed; -} pac_ast_grower_s; + pac_logger_s logger; + +} pac_ast_builder_s; -pac_ast_s pac_grow_ast (pac_tlist_s tokens); +pac_ast_s pac_build_ast (pac_tlist_s tokens); -bool_t pac_ast_handle_invalid_reference_name_token (pac_ast_grower_s *grower); -bool_t pac_ast_handle_missing_reference_close_tag (pac_ast_grower_s *grower); -bool_t pac_ast_handle_reference_with_equals_sign (pac_ast_grower_s *grower); +pac_ast_status_e pac_ast_handle_invalid_reference_name_token (pac_ast_builder_s *builder); +pac_ast_status_e pac_ast_handle_missing_reference_close_tag (pac_ast_builder_s *builder); +pac_ast_status_e pac_ast_handle_reference_with_equals_sign (pac_ast_builder_s *builder); +pac_ast_status_e pac_ast_handle_missing_item_delimiter (pac_ast_builder_s *builder); +pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header (pac_ast_builder_s *builder); #endif // PARCEL_AST_H diff --git a/core-parser/inc/tokenizer.h b/core-parser/inc/tokenizer.h index 08d8ee2..6e76437 100644 --- a/core-parser/inc/tokenizer.h +++ b/core-parser/inc/tokenizer.h @@ -48,9 +48,6 @@ struct pac_tlist char *source; usz_t num_tokens; pac_token_s *tokens; - - // cursor: An index into the 'tokens'-array; used in later stages. - usz_t cursor; }; pac_token_e pac_word_to_token_type (char *word, usz_t length); diff --git a/core-parser/src/ast.c b/core-parser/src/ast.c index 45755ad..e8f8573 100644 --- a/core-parser/src/ast.c +++ b/core-parser/src/ast.c @@ -6,41 +6,41 @@ #include #include -#define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset] -#define CURTOK (grower->token_list->tokens[grower->token_list->cursor]) -#define CURSOR (grower->token_list->cursor) -#define SKIP_TOKEN ++grower->token_list->cursor -#define REWIND_TOKEN --grower->token_list->cursor -#define TOKEN_AT(index) (grower->token_list->tokens[index]) -#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens) +#define CURRENT_STRING &builder->token_list->source[builder->token_list->tokens[builder->cursor].offset] +#define CURRENT_TOKEN (builder->token_list->tokens[builder->cursor]) +#define TOKEN_CURSOR (builder->cursor) +#define SKIP_TOKEN ++builder->cursor +#define REWIND_TOKEN --builder->cursor +#define TOKEN_AT(index) (builder->token_list->tokens[(index)]) +#define END_REACHED (builder->cursor >= builder->token_list->num_tokens) -i32_t pac_grow_reference(pac_ast_grower_s *grower, pac_ast_reference_s *reference) +pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_reference_s *reference) { - if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG) + if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG) { // This is only possible with malformed state. - return -1; + return PAC_AST_STATUS_NOT_RECOVERABLE; } SKIP_TOKEN; - if(CURTOK.type != PAC_TOKEN_WORD) + if(CURRENT_TOKEN.type != PAC_TOKEN_WORD) { - return pac_ast_handle_invalid_reference_name_token(grower); + return pac_ast_handle_invalid_reference_name_token(builder); } - usz_t len_name = CURTOK.length; - char *name = CURSTR; + usz_t len_name = CURRENT_TOKEN.length; + char *name = CURRENT_STRING; SKIP_TOKEN; - if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG) + if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG) { - return pac_ast_handle_missing_reference_close_tag(grower); + return pac_ast_handle_missing_reference_close_tag(builder); } SKIP_TOKEN; - if(CURTOK.type == PAC_TOKEN_SIGN_EQUALS) + if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_EQUALS) { - return pac_ast_handle_reference_with_equals_sign(grower); + return pac_ast_handle_reference_with_equals_sign(builder); } reference->len_name = len_name; @@ -48,52 +48,52 @@ i32_t pac_grow_reference(pac_ast_grower_s *grower, pac_ast_reference_s *referenc pac_memory_copy(reference->name, name, len_name); reference->name[len_name] = 0x00; - return 3; + return PAC_AST_STATUS_SUCCESS; } -i32_t pac_grow_item(pac_ast_grower_s *grower, pac_ast_item_s *item) +pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *item) { item->type = PAC_AST_ITEM_INVALID; pac_memory_zero(item, sizeof(pac_ast_item_s)); - if(CURTOK.type == PAC_TOKEN_LIT_STRING) + if(CURRENT_TOKEN.type == PAC_TOKEN_LIT_STRING) { item->type = PAC_AST_ITEM_LITERAL; - item->data.literal.length = CURTOK.length; + item->data.literal.length = CURRENT_TOKEN.length; item->data.literal.string = malloc(item->data.literal.length + 1); - pac_memory_copy(item->data.literal.string, CURSTR, CURTOK.length); + pac_memory_copy(item->data.literal.string, CURRENT_STRING, CURRENT_TOKEN.length); item->data.literal.string[item->data.literal.length] = 0x00; SKIP_TOKEN; - return 1; + return PAC_AST_STATUS_SUCCESS; } - if(CURTOK.type == PAC_TOKEN_SIGN_OPEN_TAG) + if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPEN_TAG) { item->type = PAC_AST_ITEM_REFERENCE; - return pac_grow_reference(grower, &item->data.reference); + return pac_build_ast_reference(builder, &item->data.reference); } - if(CURTOK.type == PAC_TOKEN_KEYWORD_WORD) + if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_WORD) { item->type = PAC_AST_ITEM_SET; item->data.set = PAC_AST_SET_WORD; SKIP_TOKEN; - return 1; + return PAC_AST_STATUS_SUCCESS; } - if(CURTOK.type == PAC_TOKEN_KEYWORD_INTEGER) + if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_INTEGER) { item->type = PAC_AST_ITEM_SET; item->data.set = PAC_AST_SET_INTEGER; SKIP_TOKEN; - return 1; + return PAC_AST_STATUS_SUCCESS; } - return -1; + builder->failed = TRUE; + SKIP_TOKEN; // Skip to (probably) the next item. + return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER; } -i32_t pac_grow_variant(pac_ast_grower_s *grower, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index) +pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index) { - usz_t start_index = CURSOR; - pac_memory_zero(variant, sizeof(pac_ast_variant_s)); usz_t items_capacity = 8; @@ -105,68 +105,93 @@ i32_t pac_grow_variant(pac_ast_grower_s *grower, pac_ast_variant_s *variant, cha items_capacity *= 2; variant->items = calloc(sizeof(pac_ast_item_s), items_capacity); } - i32_t success = pac_grow_item(grower, &variant->items[variant->num_items]); + pac_ast_status_e status = pac_build_ast_item(builder, &variant->items[variant->num_items]); ++variant->num_items; - if(success < 0) + if(status == PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER) { - return success - 1; - } - if( - (CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR) - || (CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON) - ) { - return CURSOR - start_index; + continue; } - if(CURTOK.type != PAC_TOKEN_SIGN_COMMA) + if(status != PAC_AST_STATUS_SUCCESS) { - return -1; + return status; + } + if( + (CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR) + || (CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON) + ) { + return PAC_AST_STATUS_SUCCESS; + } + + if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA) + { + pac_ast_status_e comma_missing_status = pac_ast_handle_missing_item_delimiter(builder); + if(comma_missing_status != PAC_AST_STATUS_ERROR_HANDLED) return comma_missing_status; + } SKIP_TOKEN; } - return -1; + return PAC_AST_STATUS_UNEXPECTED_FILE_END; } -i32_t pac_grow_rule(pac_ast_grower_s *grower, pac_ast_rule_s *rule) + + +pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder) +{ + if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG) + { + // Invalid state + return PAC_AST_STATUS_NOT_RECOVERABLE; + } + SKIP_TOKEN; + + pac_ast_status_e status; + + + if(CURRENT_TOKEN.type != PAC_TOKEN_WORD) + { + if((status = pac_ast_handle_invalid_rule_name(builder)) != PAC_AST_STATUS_ERROR_HANDLED) + return status; + } + SKIP_TOKEN; + + if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG) + { + if((status = pac_ast_handle_missing_rule_closing_sign(builder) != PAC_AST_STATUS_ERROR_HANDLED)) + return status; + } + SKIP_TOKEN; + + if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_EQUALS) + { + pac_ast_status_e status = pac_ast_handle_missing_equals_sign_after_rule_header(builder); + if(status == PAC_AST_STATUS_ERROR_HANDLED) + { + REWIND_TOKEN; + } + else return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + } + SKIP_TOKEN; + + return PAC_AST_STATUS_SUCCESS; +} + +pac_ast_status_e pac_build_ast_rule(pac_ast_builder_s *builder, pac_ast_rule_s *rule) { pac_memory_zero(rule, sizeof(pac_ast_rule_s)); - // Parse the header - - usz_t start_index = CURSOR; - - if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG) - { - return -1; - } - SKIP_TOKEN; - - if(CURTOK.type != PAC_TOKEN_WORD) - { - puts("A rule name must be a single word!"); - return -1; - } - usz_t len_name = CURTOK.length; - usz_t name_in_source = CURSTR; - SKIP_TOKEN; - - if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG) - { - puts("Missing Tag closing sign!"); - return -1; - } - SKIP_TOKEN; - - if(CURTOK.type != PAC_TOKEN_SIGN_EQUALS) - return -1; - SKIP_TOKEN; - + usz_t start_index = TOKEN_CURSOR; + pac_skip_ast_rule_header(builder); // Parse all variants + usz_t len_name = TOKEN_AT(start_index+1).length; + usz_t name_offset = TOKEN_AT(start_index+1).offset; + char *name_pointer = &builder->token_list->source[name_offset]; + rule->name = malloc(len_name + 1); - pac_memory_copy(rule->name, name_in_source, len_name); + pac_memory_copy(rule->name, name_pointer, len_name); rule->name[len_name] = 0; usz_t variants_capacity = 4; @@ -178,68 +203,114 @@ i32_t pac_grow_rule(pac_ast_grower_s *grower, pac_ast_rule_s *rule) variants_capacity *= 2; rule->variants = realloc(rule->variants, sizeof(pac_ast_variant_s) * variants_capacity); } - i32_t success = pac_grow_variant(grower, &rule->variants[rule->num_variants], rule->name, rule->num_variants); + pac_ast_status_e status = pac_build_ast_variant(builder, &rule->variants[rule->num_variants], rule->name, rule->num_variants); ++rule->num_variants; - if(success < 0) + switch(status) { - printf("Failed parsing a rule's variant!"); - while(!END_REACHED) + case PAC_AST_STATUS_SUCCESS: + break; + case PAC_AST_STATUS_NOT_RECOVERABLE: + return PAC_AST_STATUS_NOT_RECOVERABLE; + + case PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE: + puts("Failed parsing a rule's variant, recovering at next rule!"); + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + + case PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER: { - if(CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR) + builder->failed = TRUE; + printf("Failed parsing a rule's variant, "); + while(!END_REACHED) { - printf("Continuing with next variant.\n"); - break; - } - if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON) - { - printf("Continuing with next rule.\n"); + if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR) + { + puts("continuing with the next variant."); + break; + } + if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON) + { + puts("continuing with next rule."); + SKIP_TOKEN; + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + } SKIP_TOKEN; - return 2; } - SKIP_TOKEN; - } - continue; + } break; + + case PAC_AST_STATUS_UNEXPECTED_FILE_END: + return PAC_AST_STATUS_UNEXPECTED_FILE_END; + + default: + printf("Internal Error: An invalid status code was produced: %d\n", status); + return PAC_AST_STATUS_NOT_RECOVERABLE; } - if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON) + if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON) { SKIP_TOKEN; - return CURSOR - start_index; + return PAC_AST_STATUS_SUCCESS; } - if(CURTOK.type != PAC_TOKEN_SIGN_VERTICAL_BAR) + if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_VERTICAL_BAR) { - return -1; + return PAC_AST_STATUS_NOT_RECOVERABLE; } SKIP_TOKEN; } - return -1; + return PAC_AST_STATUS_UNEXPECTED_FILE_END; } -pac_ast_s pac_grow_ast(pac_tlist_s tokens) +void pac_ast_find_next_rule(pac_ast_builder_s *builder) +{ + while(!END_REACHED) + { + if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON) + { + SKIP_TOKEN; + break; + } + SKIP_TOKEN; + } + return; +} + +pac_ast_s pac_build_ast(pac_tlist_s tokens) { usz_t rules_capacity = 32; pac_ast_s ast; ast.num_rules = 0; ast.rules = malloc(sizeof(pac_ast_rule_s) * rules_capacity); - pac_ast_grower_s grower; - grower.logger = pac_create_logger(); - grower.token_list = &tokens; - grower.failed = FALSE; + pac_ast_builder_s builder; + builder.cursor = 0; + builder.logger = pac_create_logger(); + builder.token_list = &tokens; + builder.failed = FALSE; - while(tokens.cursor < tokens.num_tokens) + while(builder.cursor < tokens.num_tokens) { if(ast.num_rules >= rules_capacity) { rules_capacity *= 2; ast.rules = realloc(ast.rules, sizeof(pac_ast_rule_s) * rules_capacity); } - int success = pac_grow_rule(&grower, &ast.rules[ast.num_rules]); - if(success < 0) + + pac_ast_status_e status = pac_build_ast_rule(&builder, &ast.rules[ast.num_rules]); + + if(status == PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE) + pac_ast_find_next_rule(&builder); + + if(status == PAC_AST_STATUS_UNEXPECTED_FILE_END) { - printf("Failed parsing a rule at index: %lu!\n", tokens.cursor); + puts("File ended prematurely!"); + break; + } + + if(status != PAC_AST_STATUS_SUCCESS) + { + printf("Failed parsing a rule at index: %lu with status code: %d!\n", builder.cursor, status); + continue; } ++ast.num_rules; } diff --git a/core-parser/src/ast_errors.c b/core-parser/src/ast_errors.c index a1b34ea..41ae30a 100644 --- a/core-parser/src/ast_errors.c +++ b/core-parser/src/ast_errors.c @@ -2,21 +2,35 @@ #include #include -#define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset] -#define CURTOK (grower->token_list->tokens[grower->token_list->cursor]) -#define CURSOR (grower->token_list->cursor) -#define SKIP_TOKEN ++grower->token_list->cursor -#define REWIND_TOKEN --grower->token_list->cursor -#define TOKEN_AT(index) (grower->token_list->tokens[(index)]) -#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens) +#define CURRENT_STRING &builder->token_list->source[builder->token_list->tokens[builder->cursor].offset] +#define CURRENT_TOKEN (builder->token_list->tokens[builder->cursor]) +#define TOKEN_CURSOR builder->cursor +#define SKIP_TOKEN ++builder->cursor +#define REWIND_TOKEN --builder->cursor +#define TOKEN_AT(index) (builder->token_list->tokens[(index)]) +#define END_REACHED (builder->cursor >= builder->token_list->num_tokens) +bool_t pac_ast_builder_forward_seek_token_type(pac_ast_builder_s *builder, pac_token_e type, usz_t max_steps) +{ + usz_t cursor_backup = TOKEN_CURSOR; + usz_t step_counter = 0; + while(!END_REACHED) + { + if(step_counter >= max_steps) break; + if(CURRENT_TOKEN.type == type) return TRUE; + ++step_counter; + } + TOKEN_CURSOR = cursor_backup; + return FALSE; +} + // pac_ast_builder_is_at_item_start: An utility function which returns wheter // the parser is at the start of an item at in the current state. -bool_t pac_ast_builder_is_at_item_start(pac_ast_grower_s *grower) +bool_t pac_ast_builder_is_at_item_start(pac_ast_builder_s *builder) { - switch(CURTOK.type) + switch(CURRENT_TOKEN.type) { case PAC_TOKEN_LIT_STRING: case PAC_TOKEN_SIGN_OPEN_TAG: @@ -28,88 +42,149 @@ bool_t pac_ast_builder_is_at_item_start(pac_ast_grower_s *grower) return FALSE; } - - -// Returns whether the parser can continue to catch errors -bool_t pac_ast_handle_invalid_reference_name_token(pac_ast_grower_s *grower) +bool_t pac_ast_builder_forward_seek_item_start(pac_ast_builder_s *builder, usz_t max_steps) { - usz_t open_tag_src_offset = TOKEN_AT(CURSOR-1).offset; + usz_t cursor_backup = TOKEN_CURSOR; + usz_t step_counter = 0; + while(!END_REACHED) + { + if(step_counter >= max_steps) break; + if(pac_ast_builder_is_at_item_start(builder)) return TRUE; + ++step_counter; + } + TOKEN_CURSOR = cursor_backup; + return FALSE; +} + + + +pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s *builder) +{ + usz_t open_tag_src_offset = TOKEN_AT(TOKEN_CURSOR-1).offset; usz_t len_reference_name = 0; // Find closing tag for getting the name of the word for the error message. usz_t tried_tokens = 0; while(tried_tokens < 3) { - if(TOKEN_AT(CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSE_TAG) + if(TOKEN_AT(TOKEN_CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSE_TAG) { - len_reference_name = TOKEN_AT(CURSOR + tried_tokens).offset - open_tag_src_offset; + len_reference_name = TOKEN_AT(TOKEN_CURSOR + tried_tokens).offset - open_tag_src_offset; ++len_reference_name; // Take the closing tag into the name break; } ++tried_tokens; } + // If no closing tag could be found, use the token after the opening tag. if(len_reference_name == 0) - len_reference_name = ((CURTOK.offset + CURTOK.length) - open_tag_src_offset); - + { + len_reference_name = ((CURRENT_TOKEN.offset + CURRENT_TOKEN.length) - open_tag_src_offset); + } + char *given_rule_name = pac_log_alloc(&builder->logger, len_reference_name + 1); + pac_memory_copy(given_rule_name, &builder->token_list->source[open_tag_src_offset], len_reference_name); pac_naming_error_s error; - error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME; - error.line = CURTOK.line; - error.column = CURTOK.column; - error.specifics.invalid_rule_name.given_rule_name = pac_log_alloc(&grower->logger, len_reference_name + 1); - pac_memory_copy(error.specifics.invalid_rule_name.given_rule_name, &grower->token_list->source[open_tag_src_offset], len_reference_name); - pac_log_naming_error(&grower->logger, error); + error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME; + error.line = CURRENT_TOKEN.line; + error.column = CURRENT_TOKEN.column; + error.specifics.invalid_rule_name.given_rule_name = given_rule_name; + pac_log_naming_error(&builder->logger, error); - grower->failed = TRUE; - return TRUE; + builder->failed = TRUE; + + if(!pac_ast_builder_forward_seek_item_start(builder, 4)) + return PAC_AST_STATUS_NOT_RECOVERABLE; + return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER; + } -// Returns whether the parser can continue to catch errors -bool_t pac_ast_handle_missing_reference_close_tag(pac_ast_grower_s *grower) +pac_ast_status_e pac_ast_handle_missing_reference_close_tag(pac_ast_builder_s *builder) { pac_syntax_error_s error; error.type = PAC_SYNTAX_ERROR_MISSING_TOKEN; - error.line = CURTOK.line; - error.column = CURTOK.column; + error.line = CURRENT_TOKEN.line; + error.column = CURRENT_TOKEN.column; error.specifics.missing_token.hint = NULL; error.specifics.missing_token.wanted_token = "Tag Closer (>)"; - pac_log_syntax_error(&grower->logger, error); + pac_log_syntax_error(&builder->logger, error); - return TRUE; + builder->failed = TRUE; + return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER; } -// Returns whether the parser can continue to catch errors -bool_t pac_ast_handle_reference_with_equals_sign(pac_ast_grower_s *grower) +pac_ast_status_e pac_ast_handle_reference_with_equals_sign(pac_ast_builder_s *builder) { pac_syntax_error_s error; error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; - error.line = CURTOK.line; - error.column = CURTOK.column; + error.line = CURRENT_TOKEN.line; + error.column = CURRENT_TOKEN.column; error.specifics.odd_token.hint = "There might be a semicolon missing 4 tokens ago."; error.specifics.odd_token.num_valid_options = 1; error.specifics.odd_token.valid_options[0] = "Any Rule Item"; - pac_log_syntax_error(&grower->logger, error); + pac_log_syntax_error(&builder->logger, error); - // TODO: Handle the tokens following this as a new rule. - - return FALSE; + builder->failed = TRUE; + return PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER; } -// Returns whether the parser can continue to catch errors -bool_t pac_ast_handle_unknown_item_type(pac_ast_grower_s *grower) +pac_ast_status_e pac_ast_handle_unknown_item_type(pac_ast_builder_s *builder) { pac_syntax_error_s error; error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; - error.line = CURTOK.line; - error.column = CURTOK.column; + error.line = CURRENT_TOKEN.line; + error.column = CURRENT_TOKEN.column; error.specifics.odd_token.num_valid_options = 1; error.specifics.odd_token.valid_options[0] = "Any Rule Item"; - pac_log_syntax_error(&grower->logger, error); - while(!pac_ast_builder_is_at_item_start(grower)) - SKIP_TOKEN; + pac_log_syntax_error(&builder->logger, error); + SKIP_TOKEN; + if(!pac_ast_builder_forward_seek_item_start(builder, 3)) + return PAC_AST_STATUS_NOT_RECOVERABLE; - return TRUE; + builder->failed = TRUE; + return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER; +} + +pac_ast_status_e pac_ast_handle_missing_item_delimiter(pac_ast_builder_s *builder) +{ + SKIP_TOKEN; + puts("Missing the item delimiter!"); + builder->failed = TRUE; + return PAC_AST_STATUS_ERROR_HANDLED; +} + +pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header(pac_ast_builder_s *builder) +{ + puts("Missing Equals sign after rule header!"); + builder->failed = TRUE; + + // Continue at first item (if possible) + if(!pac_ast_builder_forward_seek_item_start(builder, 2)) + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + return PAC_AST_STATUS_ERROR_HANDLED; +} + +pac_ast_status_e pac_ast_handle_invalid_rule_name(pac_ast_builder_s *builder) +{ + builder->failed = TRUE; + if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 3)) + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + return PAC_AST_STATUS_ERROR_HANDLED; +} + +pac_ast_status_e pac_ast_handle_missing_rule_closing_sign(pac_ast_builder_s *builder) +{ + puts("Missing Tag closing sign!"); + builder->failed = TRUE; + + // Continue at equals sign (if possible) + if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 2)) + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + + // The next step in the calling function is to advance and check for the equals sign, + // thus, it is necessary to go one back for it to find it again when advancing. + REWIND_TOKEN; + return PAC_AST_STATUS_ERROR_HANDLED; } diff --git a/core-parser/src/grammar.c b/core-parser/src/grammar.c index f86d288..12e187d 100644 --- a/core-parser/src/grammar.c +++ b/core-parser/src/grammar.c @@ -10,7 +10,7 @@ pac_grammar_s pac_convert_grammar(char *source) { usz_t len_source = strlen(source); pac_tlist_s tokens = pac_tokenize_grammar(source, len_source); - pac_ast_s ast = pac_grow_ast(tokens); + pac_ast_s ast = pac_build_ast(tokens); pac_grammar_s grammar = pac_link_grammar(ast); return grammar; diff --git a/core-parser/src/tokenizer.c b/core-parser/src/tokenizer.c index ca8ef3a..fd218b3 100644 --- a/core-parser/src/tokenizer.c +++ b/core-parser/src/tokenizer.c @@ -48,7 +48,6 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source) usz_t tokens_capacity = 1024; pac_tlist_s list; - list.cursor = 0; list.source = source; list.num_tokens = 0; list.tokens = calloc(sizeof(pac_token_s), tokens_capacity);