diff --git a/checks/print-grammar/src/main.c b/checks/print-grammar/src/main.c index 6f906d9..617c397 100644 --- a/checks/print-grammar/src/main.c +++ b/checks/print-grammar/src/main.c @@ -4,7 +4,7 @@ #include #include -char * load_file(char *path) +char * load_file(char *path, long *len) { FILE *file = fopen(path, "r"); if(file == NULL) @@ -20,6 +20,9 @@ char * load_file(char *path) char *content = malloc(length+1); content[length] = 0x00; fread(content, 1, length, file); + + if(len != NULL) + (*len) = length; fclose(file); return content; @@ -32,12 +35,18 @@ int main(int argc, char **argv) printf("Usage: %s \n", argv[0]); return -1; } - char *source = load_file(argv[1]); + usz_t len_source; + char *source = load_file(argv[1], &len_source); if(source == NULL) { puts("Stopping due to previous error!"); return -2; } - pac_convert_grammar(source); + pac_grammar_s grammar = pac_convert_grammar(source, len_source); + free(source); + + pac_display_grammar(grammar); + pac_delete_grammar(grammar); + return 0; } diff --git a/checks/print-tokens/src/main.c b/checks/print-tokens/src/main.c index 1748fe0..67db6ab 100644 --- a/checks/print-tokens/src/main.c +++ b/checks/print-tokens/src/main.c @@ -23,9 +23,7 @@ char * load_file(char *path, long *len) fread(content, 1, length, file); if(len != NULL) - { (*len) = length; - } fclose(file); return content; @@ -46,6 +44,8 @@ int main(int argc, char **argv) return -2; } pac_tlist_s tokens = pac_tokenize_grammar(source, len_source); + free(source); + pac_display_tlist(tokens); return 0; diff --git a/core-parser/inc/ast.h b/core-parser/inc/ast.h index 1fd286c..2041290 100644 --- a/core-parser/inc/ast.h +++ b/core-parser/inc/ast.h @@ -37,6 +37,7 @@ typedef enum PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER, PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER, PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER, + PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_VARIANT, PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE, PAC_AST_STATUS_NOT_RECOVERABLE, PAC_AST_STATUS_UNEXPECTED_FILE_END @@ -46,7 +47,7 @@ typedef enum typedef struct pac_ast pac_ast_s; typedef struct pac_ast_rule pac_ast_rule_s; typedef struct pac_ast_variant pac_ast_variant_s; -typedef struct pac_ast_literal pac_ast_literal_s; +typedef struct pac_ast_string_literal pac_ast_string_literal_s; typedef struct pac_ast_reference pac_ast_reference_s; typedef struct pac_ast_item pac_ast_item_s; @@ -54,6 +55,7 @@ struct pac_ast { usz_t num_rules; pac_ast_rule_s *rules; + pac_arena_s string_arena; }; struct pac_ast_rule @@ -78,7 +80,7 @@ struct pac_ast_reference char *name; }; -struct pac_ast_literal +struct pac_ast_string_literal { usz_t length; char *string; @@ -89,7 +91,7 @@ struct pac_ast_item pac_ast_item_e type; union pac_ast_item_data { - pac_ast_literal_s literal; + pac_ast_string_literal_s string_literal; pac_ast_set_e set; pac_ast_reference_s reference; } data; @@ -101,16 +103,22 @@ typedef struct pac_ast_builder pac_tlist_s *token_list; bool_t failed; - pac_logger_s logger; + pac_logger_s *logger; + pac_arena_s string_arena; } pac_ast_builder_s; -pac_ast_s pac_build_ast (pac_tlist_s tokens); +pac_ast_s pac_build_ast (pac_tlist_s tokens, pac_logger_s *logger); +void pac_delete_ast (pac_ast_s ast); pac_ast_status_e pac_ast_handle_invalid_reference_name_token (pac_ast_builder_s *builder); pac_ast_status_e pac_ast_handle_missing_reference_close_tag (pac_ast_builder_s *builder); +pac_ast_status_e pac_ast_handle_missing_rule_header_closing_sign (pac_ast_builder_s *builder); pac_ast_status_e pac_ast_handle_reference_with_equals_sign (pac_ast_builder_s *builder); -pac_ast_status_e pac_ast_handle_missing_item_delimiter (pac_ast_builder_s *builder); +pac_ast_status_e pac_ast_handle_missing_item_delimiter (pac_ast_builder_s *builder, char *rule_name, usz_t variant_index, usz_t item_index); pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header (pac_ast_builder_s *builder); +pac_ast_status_e pac_ast_handle_unknown_item_type (pac_ast_builder_s *builder, char *rule_name, usz_t variant_index, usz_t item_index); + +char * pac_ast_stringify_status (pac_ast_status_e status); #endif // PARCEL_AST_H diff --git a/core-parser/inc/logger.h b/core-parser/inc/logger.h index ea59cfe..a04f395 100644 --- a/core-parser/inc/logger.h +++ b/core-parser/inc/logger.h @@ -8,6 +8,7 @@ typedef enum { PAC_SYNTAX_ERROR, PAC_NAMING_ERROR, + PAC_VALIDATION_ERROR, PAC_INTERNAL_ERROR } pac_error_e; @@ -17,20 +18,52 @@ typedef enum PAC_SYNTAX_ERROR_UNSPECIFIED, PAC_SYNTAX_ERROR_STRAY, PAC_SYNTAX_ERROR_MISSING_TOKEN, - PAC_SYNTAX_ERROR_ODD_TOKEN + PAC_SYNTAX_ERROR_ODD_TOKEN, + + PAC_SYNTAX_ERROR_INVALID_ITEM } pac_syntax_error_e; typedef enum { + PAC_NAMING_ERROR_INVALID_REFERENCE_NAME, PAC_NAMING_ERROR_INVALID_RULE_NAME } pac_naming_error_e; -typedef struct pac_syntax_error +typedef enum { + PAC_VALIDATION_ERROR_UNKNOWN_RULE, + PAC_VALIDATION_ERROR_RULE_NAMING_CONFLICT, + PAC_VALIDATION_ERROR_VARIANT_DUPLICATION, + PAC_VALIDATION_ERROR_CIRCULAR_REFERENCE + +} pac_validation_error_e; + +typedef enum +{ + PAC_INTERNAL_ERROR_INVALID_ARGUMENTS, + PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS, + PAC_INTERNAL_ERROR_INVALID_FUNCTION_ENTRY, + PAC_INTERNAL_ERROR_NOT_IMPLEMENTED + +} pac_internal_error_e; + +typedef struct pac_source_location +{ + char *file; usz_t line; usz_t column; + + char *rule; + usz_t variant_index; + usz_t item_index; + +} pac_source_location_s; + +typedef struct pac_syntax_error +{ + pac_source_location_s location; pac_syntax_error_e type; union pac_syntax_error_specifics @@ -50,10 +83,17 @@ typedef struct pac_syntax_error { usz_t num_valid_options; char *valid_options[16]; - char *found_token; + char *present_token; char *hint; } odd_token; + struct pac_syntax_error_invalid_item + { + char *present_construct; + char *hint; + + } invalid_item; + } specifics; } pac_syntax_error_s; @@ -74,6 +114,80 @@ typedef struct pac_naming_error } pac_naming_error_s; +typedef struct pac_validation_error +{ + char *rule; + usz_t variant; + + pac_validation_error_e type; + union pac_validation_error_specifics + { + struct pac_naming_error_unknown_rule + { + char *searched_name; + + } unknown_rule; + + struct pac_naming_error_rule_naming_conflict + { + char *rule_name; + usz_t original_line; + usz_t duplicate_line; + + } rule_naming_conflict; + + struct pac_naming_error_variant_duplication + { + char *rule_name; + usz_t original; + usz_t duplicate; + + } variant_doubling; + + } specifics; + +} pac_validation_error_s; + +typedef struct pac_internal_error +{ + pac_internal_error_e type; + union pac_internal_error_specifics + { + struct pac_internal_error_invalid_arguments + { + char *argument; + char *value; + char *error_description; + + } invalid_arguments; + + struct pac_internal_error_invalid_returned_status + { + char *function; + char *immediate_returner; + char *value; + + } invalid_returned_status; + + struct pac_internal_error_invalid_function_entry + { + char *function; + char *error_description; + + } invalid_function_entry; + + struct pac_internal_error_not_implemented + { + char *feature_name; + char *feature_description; + + } not_implemented; + + } specifics; + +} pac_internal_error_s; + + typedef struct pac_error { @@ -82,6 +196,9 @@ typedef struct pac_error { pac_syntax_error_s syntax_error; pac_naming_error_s naming_error; + pac_validation_error_s validation_error; + pac_internal_error_s internal_error; + } specifics; } pac_error_s; @@ -97,8 +214,12 @@ typedef struct pac_logger } pac_logger_s; pac_logger_s pac_create_logger (); +void pac_delete_logger (pac_logger_s logger); + void pac_log_syntax_error (pac_logger_s *logger, pac_syntax_error_s error); void pac_log_naming_error (pac_logger_s *logger, pac_naming_error_s error); +void pac_log_validation_error (pac_logger_s *logger, pac_validation_error_s error); +void pac_log_internal_error (pac_logger_s *logger, pac_internal_error_s error); void * pac_log_alloc (pac_logger_s *logger, usz_t num_bytes); diff --git a/core-parser/inc/parcel.h b/core-parser/inc/parcel.h index 70ba5bf..6d289ed 100644 --- a/core-parser/inc/parcel.h +++ b/core-parser/inc/parcel.h @@ -3,6 +3,7 @@ #define PARCEL_H #include +#include typedef struct pac_grammar pac_grammar_s; typedef struct pac_rule pac_rule_s; @@ -71,11 +72,14 @@ struct pac_grammar usz_t num_rules; pac_rule_s *rules; + + pac_logger_s log; + pac_arena_s result_arena; }; -pac_grammar_s pac_convert_grammar (char *source); +pac_grammar_s pac_convert_grammar (char *source, usz_t len_source); void pac_delete_grammar (pac_grammar_s grammar); void pac_display_log (pac_grammar_s grammar); diff --git a/core-parser/inc/tokenizer.h b/core-parser/inc/tokenizer.h index 6e76437..defe28c 100644 --- a/core-parser/inc/tokenizer.h +++ b/core-parser/inc/tokenizer.h @@ -50,9 +50,11 @@ struct pac_tlist pac_token_s *tokens; }; -pac_token_e pac_word_to_token_type (char *word, usz_t length); pac_tlist_s pac_tokenize_grammar (char *source, usz_t len_source); -char * pac_stringify_token_type (pac_token_e type); +void pac_delete_token_list (pac_tlist_s list); void pac_display_tlist (pac_tlist_s list); +pac_token_e pac_word_to_token_type (char *word, usz_t length); +char * pac_stringify_token_type (pac_token_e type); + #endif diff --git a/core-parser/src/ast.c b/core-parser/src/ast.c index e8f8573..6f345a5 100644 --- a/core-parser/src/ast.c +++ b/core-parser/src/ast.c @@ -1,6 +1,4 @@ #include -#include -#include #include #include @@ -11,15 +9,22 @@ #define TOKEN_CURSOR (builder->cursor) #define SKIP_TOKEN ++builder->cursor #define REWIND_TOKEN --builder->cursor -#define TOKEN_AT(index) (builder->token_list->tokens[(index)]) +#define TOKEN_AT(index) (builder->token_list->tokens[index]) +#define STRING_AT(index) &builder->token_list->source[builder->token_list->tokens[index].offset] #define END_REACHED (builder->cursor >= builder->token_list->num_tokens) - pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_reference_s *reference) { + usz_t start_cursor = TOKEN_CURSOR; + if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG) { - // This is only possible with malformed state. + pac_internal_error_s error; + error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS; + error.specifics.invalid_function_entry.function = "__FILE__:pac_build_ast_reference"; + error.specifics.invalid_function_entry.error_description = "The function must be entered while at an opening tag token."; + pac_log_internal_error(builder->logger, error); + return PAC_AST_STATUS_NOT_RECOVERABLE; } SKIP_TOKEN; @@ -28,8 +33,6 @@ pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_ref { return pac_ast_handle_invalid_reference_name_token(builder); } - usz_t len_name = CURRENT_TOKEN.length; - char *name = CURRENT_STRING; SKIP_TOKEN; if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG) @@ -38,32 +41,41 @@ pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_ref } SKIP_TOKEN; - if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_EQUALS) - { - return pac_ast_handle_reference_with_equals_sign(builder); - } + // Test if this reference is followed by an equals sign. That equals sign shouldn't + // be there anyways and it being there can be a hint for a missing semicolon making + // this reference actually be a rule header. + if(!END_REACHED) + if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_EQUALS) + return pac_ast_handle_reference_with_equals_sign(builder); - reference->len_name = len_name; - reference->name = malloc(len_name+1); - pac_memory_copy(reference->name, name, len_name); - reference->name[len_name] = 0x00; + reference->len_name = TOKEN_AT(TOKEN_CURSOR-2).length; + reference->name = pac_arena_alloc(&builder->string_arena, reference->len_name + 1); + reference->name[reference->len_name] = 0x00; + pac_memory_copy(reference->name, STRING_AT(start_cursor+1), reference->len_name); return PAC_AST_STATUS_SUCCESS; } -pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *item) +pac_ast_status_e pac_build_ast_string_literal(pac_ast_builder_s *builder, pac_ast_string_literal_s *literal) +{ + usz_t len_literal = CURRENT_TOKEN.length; + char *text = CURRENT_STRING; + literal->length = len_literal; + literal->string = pac_arena_alloc(&builder->string_arena, len_literal + 1); + literal->string[len_literal] = 0x00; + pac_memory_copy(literal->string, text, len_literal); + SKIP_TOKEN; + return PAC_AST_STATUS_SUCCESS; +} + +pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *item, char *rule_name, usz_t variant_index, usz_t item_index) { item->type = PAC_AST_ITEM_INVALID; pac_memory_zero(item, sizeof(pac_ast_item_s)); if(CURRENT_TOKEN.type == PAC_TOKEN_LIT_STRING) { item->type = PAC_AST_ITEM_LITERAL; - item->data.literal.length = CURRENT_TOKEN.length; - item->data.literal.string = malloc(item->data.literal.length + 1); - pac_memory_copy(item->data.literal.string, CURRENT_STRING, CURRENT_TOKEN.length); - item->data.literal.string[item->data.literal.length] = 0x00; - SKIP_TOKEN; - return PAC_AST_STATUS_SUCCESS; + return pac_build_ast_string_literal(builder, &item->data.string_literal); } if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPEN_TAG) @@ -87,9 +99,7 @@ pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s * SKIP_TOKEN; return PAC_AST_STATUS_SUCCESS; } - builder->failed = TRUE; - SKIP_TOKEN; // Skip to (probably) the next item. - return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER; + return pac_ast_handle_unknown_item_type(builder, rule_name, variant_index, item_index); } pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index) @@ -105,18 +115,18 @@ pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_varia items_capacity *= 2; variant->items = calloc(sizeof(pac_ast_item_s), items_capacity); } - pac_ast_status_e status = pac_build_ast_item(builder, &variant->items[variant->num_items]); - ++variant->num_items; + pac_ast_status_e status = + pac_build_ast_item(builder, &variant->items[variant->num_items], rule_name, variant->num_items, variant->num_items); if(status == PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER) - { continue; - } - + if(status != PAC_AST_STATUS_SUCCESS) - { return status; - } + + ++variant->num_items; + + // Test for the end of this variant (and possibly the rule it belongs to). if( (CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR) || (CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON) @@ -124,11 +134,14 @@ pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_varia return PAC_AST_STATUS_SUCCESS; } - if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA) - { - pac_ast_status_e comma_missing_status = pac_ast_handle_missing_item_delimiter(builder); - if(comma_missing_status != PAC_AST_STATUS_ERROR_HANDLED) return comma_missing_status; - + // Test if there is no delimiter and, if three is none, handle the error. + if( + (CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA) + && (CURRENT_TOKEN.type != PAC_TOKEN_SIGN_HYPHEN) + ) { + status = pac_ast_handle_missing_item_delimiter(builder, rule_name, variant_index, variant->num_items); + if(status != PAC_AST_STATUS_ERROR_HANDLED) return status; + continue; } SKIP_TOKEN; } @@ -141,7 +154,12 @@ pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder) { if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG) { - // Invalid state + pac_internal_error_s error; + error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS; + error.specifics.invalid_function_entry.function = "__FILE__:pac_skip_ast_rule_header"; + error.specifics.invalid_function_entry.error_description = "The function must be entered while at an opening tag token."; + pac_log_internal_error(builder->logger, error); + return PAC_AST_STATUS_NOT_RECOVERABLE; } SKIP_TOKEN; @@ -158,7 +176,7 @@ pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder) if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG) { - if((status = pac_ast_handle_missing_rule_closing_sign(builder) != PAC_AST_STATUS_ERROR_HANDLED)) + if((status = pac_ast_handle_missing_rule_header_closing_sign(builder) != PAC_AST_STATUS_ERROR_HANDLED)) return status; } SKIP_TOKEN; @@ -184,13 +202,11 @@ pac_ast_status_e pac_build_ast_rule(pac_ast_builder_s *builder, pac_ast_rule_s * usz_t start_index = TOKEN_CURSOR; pac_skip_ast_rule_header(builder); - // Parse all variants - usz_t len_name = TOKEN_AT(start_index+1).length; usz_t name_offset = TOKEN_AT(start_index+1).offset; char *name_pointer = &builder->token_list->source[name_offset]; - rule->name = malloc(len_name + 1); + rule->name = pac_arena_alloc(&builder->string_arena, len_name + 1); pac_memory_copy(rule->name, name_pointer, len_name); rule->name[len_name] = 0; @@ -210,41 +226,39 @@ pac_ast_status_e pac_build_ast_rule(pac_ast_builder_s *builder, pac_ast_rule_s * { case PAC_AST_STATUS_SUCCESS: break; + case PAC_AST_STATUS_NOT_RECOVERABLE: - return PAC_AST_STATUS_NOT_RECOVERABLE; - case PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE: - puts("Failed parsing a rule's variant, recovering at next rule!"); - return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + case PAC_AST_STATUS_UNEXPECTED_FILE_END: + return status; case PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER: { - builder->failed = TRUE; - printf("Failed parsing a rule's variant, "); while(!END_REACHED) { + // For continuing with the next variant of this rule. if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR) - { - puts("continuing with the next variant."); break; - } + + // For exiting this rule and continuing with a possible next one. if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON) - { - puts("continuing with next rule."); - SKIP_TOKEN; return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; - } + SKIP_TOKEN; } - } break; + } return PAC_AST_STATUS_UNEXPECTED_FILE_END; - case PAC_AST_STATUS_UNEXPECTED_FILE_END: - return PAC_AST_STATUS_UNEXPECTED_FILE_END; - default: - printf("Internal Error: An invalid status code was produced: %d\n", status); + pac_internal_error_s error; + error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS; + error.specifics.invalid_returned_status.function = "__FILE__:pac_build_ast_rule"; + error.specifics.invalid_returned_status.immediate_returner = "__FILE__:pac_build_ast_variant"; + error.specifics.invalid_returned_status.value = pac_ast_stringify_status(status); + pac_log_internal_error(builder->logger, error); + return PAC_AST_STATUS_NOT_RECOVERABLE; } + // It only goes here in the case of PAC_AST_STATUS_SUCCESS if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON) { @@ -261,7 +275,7 @@ pac_ast_status_e pac_build_ast_rule(pac_ast_builder_s *builder, pac_ast_rule_s * return PAC_AST_STATUS_UNEXPECTED_FILE_END; } -void pac_ast_find_next_rule(pac_ast_builder_s *builder) +void pac_ast_skip_to_next_rule(pac_ast_builder_s *builder) { while(!END_REACHED) { @@ -275,7 +289,7 @@ void pac_ast_find_next_rule(pac_ast_builder_s *builder) return; } -pac_ast_s pac_build_ast(pac_tlist_s tokens) +pac_ast_s pac_build_ast(pac_tlist_s tokens, pac_logger_s *logger) { usz_t rules_capacity = 32; pac_ast_s ast; @@ -284,9 +298,10 @@ pac_ast_s pac_build_ast(pac_tlist_s tokens) pac_ast_builder_s builder; builder.cursor = 0; - builder.logger = pac_create_logger(); builder.token_list = &tokens; builder.failed = FALSE; + builder.string_arena = pac_create_arena(1024 * 256); // 262144 (2^18) + builder.logger = logger; while(builder.cursor < tokens.num_tokens) { @@ -299,20 +314,65 @@ pac_ast_s pac_build_ast(pac_tlist_s tokens) pac_ast_status_e status = pac_build_ast_rule(&builder, &ast.rules[ast.num_rules]); if(status == PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE) - pac_ast_find_next_rule(&builder); + pac_ast_skip_to_next_rule(&builder); if(status == PAC_AST_STATUS_UNEXPECTED_FILE_END) - { - puts("File ended prematurely!"); break; - } if(status != PAC_AST_STATUS_SUCCESS) - { - printf("Failed parsing a rule at index: %lu with status code: %d!\n", builder.cursor, status); - continue; - } + break; + ++ast.num_rules; } + ast.string_arena = builder.string_arena; return ast; } + + + +void pac_delete_ast_variant(pac_ast_variant_s *variant) +{ + free(variant->items); +} + +void pac_delete_ast_rule(pac_ast_rule_s *rule) +{ + usz_t variant_index = 0; + while(variant_index < rule->num_variants) + { + pac_delete_ast_variant(&rule->variants[variant_index]); + ++variant_index; + } + free(rule->variants); +} + +void pac_delete_ast(pac_ast_s ast) +{ + usz_t rule_index = 0; + while(rule_index < ast.num_rules) + { + pac_delete_ast_rule(&ast.rules[rule_index]); + ++rule_index; + } + free(ast.rules); + pac_delete_arena(ast.string_arena); +} + + + +char * pac_ast_stringify_status(pac_ast_status_e status) +{ + switch(status) + { + case PAC_AST_STATUS_SUCCESS: return "SUCCESS"; + case PAC_AST_STATUS_ERROR_HANDLED: return "ERROR_HANDLED"; + case PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER: return "ESCALATE_TO_ITEM_PARSER"; + case PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER: return "ESCALATE_TO_VARIANT_PARSER"; + case PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER: return "ESCALATE_TO_RULE_PARSER"; + case PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_VARIANT: return "FIND_NEXT_VARIANT"; + case PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE: return "FIND_NEXT_RULE"; + case PAC_AST_STATUS_NOT_RECOVERABLE: return "NOT_RECOVERABLE"; + case PAC_AST_STATUS_UNEXPECTED_FILE_END: return "UNEXPECTED_END"; + } + return " - "; +} diff --git a/core-parser/src/ast_errors.c b/core-parser/src/ast_errors.c index 41ae30a..ede87e8 100644 --- a/core-parser/src/ast_errors.c +++ b/core-parser/src/ast_errors.c @@ -12,6 +12,15 @@ +char * pac_identify_present_construct(pac_ast_builder_s *builder) +{ + switch(CURRENT_TOKEN.type) + { + case PAC_TOKEN_SIGN_OPEN_TAG: return "RULE_IDENTIFIER"; + default: return "UNKNOWN"; + } +} + bool_t pac_ast_builder_forward_seek_token_type(pac_ast_builder_s *builder, pac_token_e type, usz_t max_steps) { usz_t cursor_backup = TOKEN_CURSOR; @@ -58,6 +67,8 @@ bool_t pac_ast_builder_forward_seek_item_start(pac_ast_builder_s *builder, usz_t + + pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s *builder) { usz_t open_tag_src_offset = TOKEN_AT(TOKEN_CURSOR-1).offset; @@ -84,7 +95,7 @@ pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s * char *given_rule_name = pac_log_alloc(&builder->logger, len_reference_name + 1); pac_memory_copy(given_rule_name, &builder->token_list->source[open_tag_src_offset], len_reference_name); pac_naming_error_s error; - error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME; + error.type = PAC_NAMING_ERROR_INVALID_REFERENCE_NAME; error.line = CURRENT_TOKEN.line; error.column = CURRENT_TOKEN.column; error.specifics.invalid_rule_name.given_rule_name = given_rule_name; @@ -94,22 +105,23 @@ pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s * builder->failed = TRUE; if(!pac_ast_builder_forward_seek_item_start(builder, 4)) - return PAC_AST_STATUS_NOT_RECOVERABLE; + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER; } pac_ast_status_e pac_ast_handle_missing_reference_close_tag(pac_ast_builder_s *builder) { + builder->failed = TRUE; + pac_syntax_error_s error; error.type = PAC_SYNTAX_ERROR_MISSING_TOKEN; - error.line = CURRENT_TOKEN.line; - error.column = CURRENT_TOKEN.column; + error.location.line = CURRENT_TOKEN.line; + error.location.column = CURRENT_TOKEN.column; error.specifics.missing_token.hint = NULL; error.specifics.missing_token.wanted_token = "Tag Closer (>)"; pac_log_syntax_error(&builder->logger, error); - builder->failed = TRUE; return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER; } @@ -117,8 +129,8 @@ pac_ast_status_e pac_ast_handle_reference_with_equals_sign(pac_ast_builder_s *bu { pac_syntax_error_s error; error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; - error.line = CURRENT_TOKEN.line; - error.column = CURRENT_TOKEN.column; + error.location.line = CURRENT_TOKEN.line; + error.location.column = CURRENT_TOKEN.column; error.specifics.odd_token.hint = "There might be a semicolon missing 4 tokens ago."; error.specifics.odd_token.num_valid_options = 1; error.specifics.odd_token.valid_options[0] = "Any Rule Item"; @@ -129,62 +141,117 @@ pac_ast_status_e pac_ast_handle_reference_with_equals_sign(pac_ast_builder_s *bu } - -pac_ast_status_e pac_ast_handle_unknown_item_type(pac_ast_builder_s *builder) +pac_ast_status_e pac_ast_handle_missing_item_delimiter(pac_ast_builder_s *builder, char *rule_name, usz_t variant_index, usz_t item_index) { + builder->failed = TRUE; + pac_syntax_error_s error; error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; - error.line = CURRENT_TOKEN.line; - error.column = CURRENT_TOKEN.column; - error.specifics.odd_token.num_valid_options = 1; - error.specifics.odd_token.valid_options[0] = "Any Rule Item"; + error.location.line = CURRENT_TOKEN.line; + error.location.column = CURRENT_TOKEN.column; + error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type); + error.specifics.odd_token.num_valid_options = 2; + error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_COMMA); + error.specifics.odd_token.valid_options[1] = pac_stringify_token_type(PAC_TOKEN_SIGN_HYPHEN); pac_log_syntax_error(&builder->logger, error); - SKIP_TOKEN; - if(!pac_ast_builder_forward_seek_item_start(builder, 3)) - return PAC_AST_STATUS_NOT_RECOVERABLE; - builder->failed = TRUE; - return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER; -} - -pac_ast_status_e pac_ast_handle_missing_item_delimiter(pac_ast_builder_s *builder) -{ - SKIP_TOKEN; - puts("Missing the item delimiter!"); - builder->failed = TRUE; return PAC_AST_STATUS_ERROR_HANDLED; } pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header(pac_ast_builder_s *builder) { - puts("Missing Equals sign after rule header!"); builder->failed = TRUE; + pac_syntax_error_s error; + error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; + error.location.line = CURRENT_TOKEN.line; + error.location.column = CURRENT_TOKEN.column; + error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type); + error.specifics.odd_token.num_valid_options = 1; + error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_EQUALS); + // Continue at first item (if possible) if(!pac_ast_builder_forward_seek_item_start(builder, 2)) + { + error.specifics.odd_token.hint = "Missing equals sign after rule header. Continuing at next rule."; + pac_log_syntax_error(&builder->logger, error); + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + } + error.specifics.odd_token.hint = "Missing equals sign after rule header. Continuing at next item."; + pac_log_syntax_error(&builder->logger, error); + return PAC_AST_STATUS_ERROR_HANDLED; } pac_ast_status_e pac_ast_handle_invalid_rule_name(pac_ast_builder_s *builder) { builder->failed = TRUE; + + pac_syntax_error_s error; + error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; + error.location.line = CURRENT_TOKEN.line; + error.location.column = CURRENT_TOKEN.column; + error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type); + error.specifics.odd_token.num_valid_options = 1; + error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_WORD); + if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 3)) + { + error.specifics.odd_token.hint = "Invalid rule name. Continuing at next rule due to missing equals sign."; + pac_log_syntax_error(&builder->logger, error); + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + } + error.specifics.odd_token.hint = "Invalid rule name. Continuing after the equals sign."; + pac_log_syntax_error(&builder->logger, error); + return PAC_AST_STATUS_ERROR_HANDLED; } -pac_ast_status_e pac_ast_handle_missing_rule_closing_sign(pac_ast_builder_s *builder) +pac_ast_status_e pac_ast_handle_missing_rule_header_closing_sign(pac_ast_builder_s *builder) { - puts("Missing Tag closing sign!"); - builder->failed = TRUE; + builder->failed = TRUE; - // Continue at equals sign (if possible) - if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 2)) - return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; - - // The next step in the calling function is to advance and check for the equals sign, - // thus, it is necessary to go one back for it to find it again when advancing. - REWIND_TOKEN; - return PAC_AST_STATUS_ERROR_HANDLED; + pac_syntax_error_s error; + error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; + error.location.line = CURRENT_TOKEN.line; + error.location.column = CURRENT_TOKEN.column; + error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type); + error.specifics.odd_token.num_valid_options = 1; + error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_CLOSE_TAG); + + // Continue at equals sign (if possible) + if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 3)) + { + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + } + error.specifics.odd_token.hint = "Continuing parser process after the next equals sign."; + + // The next step in the calling function is to advance and check for the equals sign, + // thus, it is necessary to go one back for it to find it again when advancing. + REWIND_TOKEN; + return PAC_AST_STATUS_ERROR_HANDLED; +} + +pac_ast_status_e pac_ast_handle_unknown_item_type(pac_ast_builder_s *builder, char *rule_name, usz_t variant_index, usz_t item_index) +{ + builder->failed = TRUE; + + pac_syntax_error_s error; + error.type = PAC_SYNTAX_ERROR_ODD_TOKEN; + error.location.line = CURRENT_TOKEN.line; + error.location.column = CURRENT_TOKEN.column; + error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type); + error.specifics.odd_token.num_valid_options = 1; + error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_CLOSE_TAG); + + if(!pac_ast_builder_forward_seek_item_start(builder, 5)) + { + if(pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_VERTICAL_BAR, 5)) + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_VARIANT; + + return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE; + } + return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER; } diff --git a/core-parser/src/grammar.c b/core-parser/src/grammar.c index 12e187d..b95412b 100644 --- a/core-parser/src/grammar.c +++ b/core-parser/src/grammar.c @@ -4,14 +4,21 @@ #include -extern pac_grammar_s pac_link_grammar(pac_ast_s ast); +extern void pac_link_grammar(pac_grammar_s *grammar, pac_ast_s ast); -pac_grammar_s pac_convert_grammar(char *source) +pac_grammar_s pac_convert_grammar(char *source, usz_t len_source) { - usz_t len_source = strlen(source); + pac_logger_s logger = pac_create_logger(); + pac_grammar_s grammar; + grammar.result_arena = pac_create_arena(1024 * 1024); + pac_tlist_s tokens = pac_tokenize_grammar(source, len_source); - pac_ast_s ast = pac_build_ast(tokens); - pac_grammar_s grammar = pac_link_grammar(ast); + pac_ast_s ast = pac_build_ast(tokens, &logger); + grammar.log = logger; + pac_link_grammar(&grammar, ast); + + pac_delete_ast(ast); + pac_delete_token_list(tokens); return grammar; } @@ -23,5 +30,6 @@ void pac_display_grammar(pac_grammar_s grammar) void pac_delete_grammar(pac_grammar_s grammar) { - + pac_delete_arena(grammar.result_arena); + pac_delete_logger(grammar.log); } diff --git a/core-parser/src/linker.c b/core-parser/src/linker.c index f4c4459..5f47c92 100644 --- a/core-parser/src/linker.c +++ b/core-parser/src/linker.c @@ -14,7 +14,7 @@ void pac_copy_empty_rules_from_ast_to_grammar(pac_grammar_s *grammar, pac_ast_s pac_ast_rule_s ast_rule = ast->rules[rule_index]; pac_rule_s *rule = &grammar->rules[rule_index]; usz_t len_rule_name = strlen(ast_rule.name); - rule->name = malloc(len_rule_name + 1); + rule->name = pac_arena_alloc(&grammar->result_arena, len_rule_name + 1); pac_memory_copy(rule->name, ast_rule.name, len_rule_name); rule->name[len_rule_name] = 0x00; } @@ -46,7 +46,7 @@ pac_rule_s * pac_find_rule(pac_grammar_s *grammar, char *name) void pac_copy_single_variant(pac_grammar_s *grammar, pac_variant_s *variant, pac_ast_variant_s *ast_variant) { variant->num_items = ast_variant->num_items; - variant->items = malloc(sizeof(pac_item_s) * variant->num_items); + variant->items = pac_arena_alloc(&grammar->result_arena, sizeof(pac_item_s) * variant->num_items); for(usz_t item_index = 0; item_index < variant->num_items; ++item_index) { pac_ast_item_s ast_item = ast_variant->items[item_index]; @@ -61,8 +61,8 @@ void pac_copy_single_variant(pac_grammar_s *grammar, pac_variant_s *variant, pac case PAC_AST_ITEM_LITERAL: { item->type = PAC_ITEM_LITERAL; - item->data.literal.length = ast_item.data.literal.length; - item->data.literal.string = ast_item.data.literal.string; // TODO: Copy this into a grammar-owned arena! + item->data.literal.length = ast_item.data.string_literal.length; + item->data.literal.string = ast_item.data.string_literal.string; // TODO: Copy this into a grammar-owned arena! } break; case PAC_AST_ITEM_REFERENCE: @@ -88,23 +88,21 @@ void pac_copy_variants_and_link_references(pac_grammar_s *grammar, pac_ast_s *as pac_ast_rule_s ast_rule = ast->rules[rule_index]; pac_rule_s *rule = &grammar->rules[rule_index]; rule->num_variants = ast_rule.num_variants; - rule->variants = malloc(sizeof(pac_variant_s) * rule->num_variants); + rule->variants = pac_arena_alloc(&grammar->result_arena, sizeof(pac_variant_s) * rule->num_variants); for(usz_t variant_index = 0; variant_index < rule->num_variants; ++variant_index) { - pac_copy_single_variant(grammar, &rule->variants[variant_index], &ast_rule.variants[variant_index]); } } } -pac_grammar_s pac_link_grammar(pac_ast_s ast) +void pac_link_grammar(pac_grammar_s *grammar, pac_ast_s ast) { - pac_grammar_s grammar; - grammar.num_rules = ast.num_rules; - grammar.rules = malloc(sizeof(pac_rule_s) * ast.num_rules); + grammar->num_rules = ast.num_rules; + grammar->rules = pac_arena_alloc(&grammar->result_arena, sizeof(pac_rule_s) * ast.num_rules); - pac_copy_empty_rules_from_ast_to_grammar(&grammar, &ast); - pac_copy_variants_and_link_references(&grammar, &ast); + pac_copy_empty_rules_from_ast_to_grammar(grammar, &ast); + pac_copy_variants_and_link_references(grammar, &ast); return grammar; } diff --git a/core-parser/src/logger.c b/core-parser/src/logger.c index a7fabd0..bb4c609 100644 --- a/core-parser/src/logger.c +++ b/core-parser/src/logger.c @@ -13,6 +13,12 @@ pac_logger_s pac_create_logger() return logger; } +void pac_delete_logger(pac_logger_s logger) +{ + free(logger.errors); + pac_delete_arena(logger.string_arena); +} + void pac_resize_log_if_needed(pac_logger_s *logger) { if(logger->num_errors >= logger->allocated_errors) @@ -26,16 +32,32 @@ void pac_resize_log_if_needed(pac_logger_s *logger) void pac_log_syntax_error(pac_logger_s *logger, pac_syntax_error_s error) { pac_resize_log_if_needed(logger); - logger->errors[logger->num_errors].type = PAC_SYNTAX_ERROR; - logger->errors[logger->num_errors].specifics.syntax_error = error; + logger->errors[logger->num_errors].type = PAC_SYNTAX_ERROR; + logger->errors[logger->num_errors].specifics.syntax_error = error; ++logger->num_errors; } void pac_log_naming_error(pac_logger_s *logger, pac_naming_error_s error) { pac_resize_log_if_needed(logger); - logger->errors[logger->num_errors].type = PAC_NAMING_ERROR; - logger->errors[logger->num_errors].specifics.naming_error = error; + logger->errors[logger->num_errors].type = PAC_NAMING_ERROR; + logger->errors[logger->num_errors].specifics.naming_error = error; + ++logger->num_errors; +} + +void pac_log_validation_error(pac_logger_s *logger, pac_validation_error_s error) +{ + pac_resize_log_if_needed(logger); + logger->errors[logger->num_errors].type = PAC_VALIDATION_ERROR; + logger->errors[logger->num_errors].specifics.validation_error = error; + ++logger->num_errors; +} + +void pac_log_internal_error(pac_logger_s *logger, pac_internal_error_s error) +{ + pac_resize_log_if_needed(logger); + logger->errors[logger->num_errors].type = PAC_INTERNAL_ERROR; + logger->errors[logger->num_errors].specifics.internal_error = error; ++logger->num_errors; } diff --git a/core-parser/src/tokenizer.c b/core-parser/src/tokenizer.c index fd218b3..53bbb45 100644 --- a/core-parser/src/tokenizer.c +++ b/core-parser/src/tokenizer.c @@ -48,9 +48,11 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source) usz_t tokens_capacity = 1024; pac_tlist_s list; - list.source = source; list.num_tokens = 0; list.tokens = calloc(sizeof(pac_token_s), tokens_capacity); + list.source = malloc(len_source + 1); + pac_memory_copy(list.source, source, len_source); + list.source[len_source] = 0; usz_t line = 1; usz_t column = 1; @@ -257,4 +259,11 @@ void pac_display_tlist(pac_tlist_s list) content ); } -} \ No newline at end of file +} + +void pac_delete_token_list(pac_tlist_s list) +{ + free(list.tokens); + free(list.source); + pac_memory_fill(&list, sizeof(pac_tlist_s), 0); +}