From 61ff4967a60363d88937091c90bd533d8c930d47 Mon Sep 17 00:00:00 2001 From: Eric-Paul Ickhorn Date: Tue, 28 Nov 2023 18:57:36 +0100 Subject: [PATCH] Implemented grammar reference linking and removed most warnings --- code/inc/ast.h | 2 +- code/inc/parcel.h | 72 ++++++++++++++++++++++++----- code/src/ast.c | 64 +++++++++++++------------- code/src/grammar.c | 27 +++++++++++ code/src/linker.c | 110 +++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 233 insertions(+), 42 deletions(-) create mode 100644 code/src/grammar.c create mode 100644 code/src/linker.c diff --git a/code/inc/ast.h b/code/inc/ast.h index 1e78c44..813a27c 100644 --- a/code/inc/ast.h +++ b/code/inc/ast.h @@ -71,7 +71,7 @@ typedef enum struct pac_ast_item { pac_ast_item_e type; - union pac_item_data + union pac_ast_item_data { pac_ast_literal_s literal; pac_ast_set_e set; diff --git a/code/inc/parcel.h b/code/inc/parcel.h index d5fdb2e..70ba5bf 100644 --- a/code/inc/parcel.h +++ b/code/inc/parcel.h @@ -5,28 +5,80 @@ #include typedef struct pac_grammar pac_grammar_s; -typedef struct pac_log_entry pac_log_entry_s; -typedef struct pac_log pac_log_s; +typedef struct pac_rule pac_rule_s; +typedef struct pac_variant pac_variant_s; +typedef struct pac_literal pac_literal_s; +typedef struct pac_item pac_item_s; -struct pac_log_entry +// pac_item_e: The type of an item of a rule variant. +// +// A rule (-variant) is made up of multiple items which +// can have one of the types that are listed in this enum. +typedef enum +{ + PAC_ITEM_INVALID = 0x00, + PAC_ITEM_REFERENCE, + PAC_ITEM_LITERAL, + PAC_ITEM_SET, + +} pac_item_e; + +typedef enum +{ + PAC_SET_INVALID = 0x00, + PAC_SET_RUNE, + PAC_SET_WORD, + PAC_SET_INTEGER, + PAC_SET_FLOAT +} pac_set_e; + +struct pac_literal { usz_t length; - char *text; - + char *string; }; -struct pac_log +struct pac_item { - usz_t length; - pac_log_entry_s *entries; + pac_item_e type; + union pac_item_data + { + pac_literal_s literal; + pac_set_e set; + pac_rule_s *reference; + + } data; +}; + +struct pac_variant +{ + usz_t num_items; + pac_item_s *items; +}; + +struct pac_rule +{ + char *name; + + usz_t num_variants; + pac_variant_s *variants; }; struct pac_grammar { - + usz_t len_source; + char *source; + + usz_t num_rules; + pac_rule_s *rules; }; + + pac_grammar_s pac_convert_grammar (char *source); -// void pac_delete_grammar (pac_grammar_s grammar); +void pac_delete_grammar (pac_grammar_s grammar); + +void pac_display_log (pac_grammar_s grammar); +void pac_display_grammar (pac_grammar_s grammar); #endif // PARCEL_H diff --git a/code/src/ast.c b/code/src/ast.c index f5ac29c..b542c28 100644 --- a/code/src/ast.c +++ b/code/src/ast.c @@ -47,37 +47,37 @@ i32_t pac_grow_item(pac_tlist_s *tlist, pac_ast_item_s *item) { item->type = PAC_AST_ITEM_INVALID; memset(item, 0x00, sizeof(pac_ast_item_s)); - switch(CURRENT.type) + if(CURRENT.type == PAC_TOKEN_LIT_STRING) { - case PAC_TOKEN_LIT_STRING: - { - item->type = PAC_AST_ITEM_LITERAL; - item->data.literal.length = CURRENT.length; - item->data.literal.string = malloc(item->data.literal.length + 1); - memcpy(item->data.literal.string, CURRENT_STRING, CURRENT.length); - item->data.literal.string[item->data.literal.length] = 0x00; - SKIP_TOKEN; - } return 1; - - case PAC_TOKEN_SIGN_OPEN_TAG: - { - item->type = PAC_AST_ITEM_REFERENCE; - return pac_grow_reference(tlist, &item->data.reference); - } + item->type = PAC_AST_ITEM_LITERAL; + item->data.literal.length = CURRENT.length; + item->data.literal.string = malloc(item->data.literal.length + 1); + memcpy(item->data.literal.string, CURRENT_STRING, CURRENT.length); + item->data.literal.string[item->data.literal.length] = 0x00; + SKIP_TOKEN; + return 1; + } - case PAC_TOKEN_KEYWORD_WORD: - { - item->type = PAC_AST_ITEM_SET; - item->data.set = PAC_AST_SET_WORD; - SKIP_TOKEN; - } return 1; - - case PAC_TOKEN_KEYWORD_INTEGER: - { - item->type = PAC_AST_ITEM_SET; - item->data.set = PAC_AST_SET_INTEGER; - SKIP_TOKEN; - } return 1; + if(CURRENT.type == PAC_TOKEN_SIGN_OPEN_TAG) + { + item->type = PAC_AST_ITEM_REFERENCE; + return pac_grow_reference(tlist, &item->data.reference); + } + + if(CURRENT.type == PAC_TOKEN_KEYWORD_WORD) + { + item->type = PAC_AST_ITEM_SET; + item->data.set = PAC_AST_SET_WORD; + return 1; + SKIP_TOKEN; + } + + if(CURRENT.type == PAC_TOKEN_KEYWORD_INTEGER) + { + item->type = PAC_AST_ITEM_SET; + item->data.set = PAC_AST_SET_INTEGER; + SKIP_TOKEN; + return 1; } return -1; } @@ -126,7 +126,9 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule) usz_t start_index = tlist->cursor; if(CURRENT.type != PAC_TOKEN_SIGN_OPEN_TAG) + { return -1; + } SKIP_TOKEN; if(CURRENT.type != PAC_TOKEN_WORD) @@ -170,7 +172,7 @@ i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule) if(success < 0) { - printf("Failed parsing variant %u of rule '%d'. ", rule->num_variants); + printf("Failed parsing a rule's variant!"); while(!END_REACHED) { if(CURRENT.type == PAC_TOKEN_SIGN_VERTICAL_BAR) @@ -220,7 +222,7 @@ pac_ast_s pac_grow_ast(pac_tlist_s tokens) int success = pac_grow_rule(&tokens, &ast.rules[ast.num_rules]); if(success < 0) { - printf("Failed parsing a rule at index: %u!\n", tokens.cursor); + printf("Failed parsing a rule at index: %lu!\n", tokens.cursor); } ++ast.num_rules; } diff --git a/code/src/grammar.c b/code/src/grammar.c new file mode 100644 index 0000000..f86d288 --- /dev/null +++ b/code/src/grammar.c @@ -0,0 +1,27 @@ +#include +#include +#include + +#include + +extern pac_grammar_s pac_link_grammar(pac_ast_s ast); + +pac_grammar_s pac_convert_grammar(char *source) +{ + usz_t len_source = strlen(source); + pac_tlist_s tokens = pac_tokenize_grammar(source, len_source); + pac_ast_s ast = pac_grow_ast(tokens); + pac_grammar_s grammar = pac_link_grammar(ast); + + return grammar; +} + +void pac_display_grammar(pac_grammar_s grammar) +{ + +} + +void pac_delete_grammar(pac_grammar_s grammar) +{ + +} diff --git a/code/src/linker.c b/code/src/linker.c new file mode 100644 index 0000000..7f888a4 --- /dev/null +++ b/code/src/linker.c @@ -0,0 +1,110 @@ +#include +#include + +#include +#include +#include + +// pac_copy_empty_rules_from_ast_to_grammar: Creates a grammar corresponding to the AST, +// not copying the variants and their items, but just the rules and their names. +void pac_copy_empty_rules_from_ast_to_grammar(pac_grammar_s *grammar, pac_ast_s *ast) +{ + for(usz_t rule_index = 0; rule_index < grammar->num_rules; ++rule_index) + { + pac_ast_rule_s ast_rule = ast->rules[rule_index]; + pac_rule_s *rule = &grammar->rules[rule_index]; + usz_t len_rule_name = strlen(ast_rule.name); + rule->name = malloc(len_rule_name + 1); + memcpy(rule->name, ast_rule.name, len_rule_name); + rule->name[len_rule_name] = 0x00; + } +} + +pac_set_e pac_convert_ast_set_to_grammar_set(pac_ast_set_e set) +{ + switch(set) + { + case PAC_AST_SET_RUNE: return PAC_SET_RUNE; + case PAC_AST_SET_WORD: return PAC_SET_WORD; + case PAC_AST_SET_INTEGER: return PAC_SET_INTEGER; + case PAC_AST_SET_FLOAT: return PAC_SET_FLOAT; + } + return PAC_SET_INVALID; +} + +pac_rule_s * pac_find_rule(pac_grammar_s *grammar, char *name) +{ + for(usz_t index = 0; index < grammar->num_rules; ++index) + { + if(!strcmp(grammar->rules[index].name, name)) + return &grammar->rules[index]; + } + printf("Couldn't find refernced rule: %s\n", name); + return NULL; +} + +void pac_copy_single_variant(pac_grammar_s *grammar, pac_variant_s *variant, pac_ast_variant_s *ast_variant) +{ + variant->num_items = ast_variant->num_items; + variant->items = malloc(sizeof(pac_item_s) * variant->num_items); + for(usz_t item_index = 0; item_index < variant->num_items; ++item_index) + { + pac_ast_item_s ast_item = ast_variant->items[item_index]; + pac_item_s *item = &variant->items[item_index]; + switch(ast_item.type) + { + case PAC_AST_ITEM_INVALID: + { + item->type = PAC_ITEM_INVALID; + } break; + + case PAC_AST_ITEM_LITERAL: + { + item->type = PAC_ITEM_LITERAL; + item->data.literal.length = ast_item.data.literal.length; + item->data.literal.string = ast_item.data.literal.string; // TODO: Copy this into a grammar-owned arena! + } break; + + case PAC_AST_ITEM_REFERENCE: + { + item->type = PAC_ITEM_REFERENCE; + item->data.reference = pac_find_rule(grammar, ast_item.data.reference.name); + } break; + + case PAC_AST_ITEM_SET: + { + item->type = PAC_ITEM_SET; + item->data.set = pac_convert_ast_set_to_grammar_set(ast_item.data.set); + } break; + } + } +} + +void pac_copy_variants_and_link_references(pac_grammar_s *grammar, pac_ast_s *ast) +{ + grammar->num_rules = ast->num_rules; + for(usz_t rule_index = 0; rule_index < ast->num_rules; ++rule_index) + { + pac_ast_rule_s ast_rule = ast->rules[rule_index]; + pac_rule_s *rule = &grammar->rules[rule_index]; + rule->num_variants = ast_rule.num_variants; + rule->variants = malloc(sizeof(pac_variant_s) * rule->num_variants); + for(usz_t variant_index = 0; variant_index < rule->num_variants; ++variant_index) + { + + pac_copy_single_variant(grammar, &rule->variants[variant_index], &ast_rule.variants[variant_index]); + } + } +} + +pac_grammar_s pac_link_grammar(pac_ast_s ast) +{ + pac_grammar_s grammar; + grammar.num_rules = ast.num_rules; + grammar.rules = malloc(sizeof(pac_rule_s) * ast.num_rules); + + pac_copy_empty_rules_from_ast_to_grammar(&grammar, &ast); + pac_copy_variants_and_link_references(&grammar, &ast); + + return grammar; +}