Moved item parser to own file and changed the need for clear connectors between items to collections of items

This commit is contained in:
Eric-Paul Ickhorn 2023-11-29 22:13:15 +01:00
parent 1d8df2c22a
commit 2cbcfdd336
3 changed files with 195 additions and 101 deletions

View File

@ -24,6 +24,8 @@ typedef enum
PAC_AST_ITEM_INVALID = 0x00,
PAC_AST_ITEM_REFERENCE,
PAC_AST_ITEM_LITERAL,
PAC_AST_ITEM_RANGE,
PAC_AST_ITEM_COLLECTION, // A collection of any of the other items which may not be interrupted by spaces.
PAC_AST_ITEM_SET
} pac_ast_item_e;
@ -49,6 +51,7 @@ typedef struct pac_ast_rule pac_ast_rule_s;
typedef struct pac_ast_variant pac_ast_variant_s;
typedef struct pac_ast_string_literal pac_ast_string_literal_s;
typedef struct pac_ast_reference pac_ast_reference_s;
typedef struct pac_ast_collection pac_ast_collection_s;
typedef struct pac_ast_item pac_ast_item_s;
struct pac_ast
@ -80,6 +83,12 @@ struct pac_ast_reference
char *name;
};
struct pac_ast_collection
{
usz_t num_items;
pac_ast_item_s *items;
};
struct pac_ast_string_literal
{
usz_t length;
@ -93,6 +102,7 @@ struct pac_ast_item
{
pac_ast_string_literal_s string_literal;
pac_ast_set_e set;
pac_ast_collection_s collection;
pac_ast_reference_s reference;
} data;
};

View File

@ -13,95 +13,6 @@
#define STRING_AT(index) &builder->token_list->source[builder->token_list->tokens[index].offset]
#define END_REACHED (builder->cursor >= builder->token_list->num_tokens)
pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_reference_s *reference)
{
usz_t start_cursor = TOKEN_CURSOR;
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPENING_TAG)
{
pac_internal_error_s error;
error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS;
error.specifics.invalid_function_entry.function = "__FILE__:pac_build_ast_reference";
error.specifics.invalid_function_entry.error_description = "The function must be entered while at an opening tag token.";
pac_log_internal_error(builder->logger, error);
return PAC_AST_STATUS_NOT_RECOVERABLE;
}
SKIP_TOKEN;
if(CURRENT_TOKEN.type != PAC_TOKEN_WORD)
{
return pac_ast_handle_invalid_reference_name_token(builder);
}
SKIP_TOKEN;
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSING_TAG)
{
return pac_ast_handle_missing_reference_close_tag(builder);
}
SKIP_TOKEN;
// Test if this reference is followed by an equals sign. That equals sign shouldn't
// be there anyways and it being there can be a hint for a missing semicolon making
// this reference actually be a rule header.
if(!END_REACHED)
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_EQUALS)
return pac_ast_handle_reference_with_equals_sign(builder);
reference->len_name = TOKEN_AT(TOKEN_CURSOR-2).length;
reference->name = pac_arena_alloc(&builder->string_arena, reference->len_name + 1);
reference->name[reference->len_name] = 0x00;
pac_memory_copy(reference->name, STRING_AT(start_cursor+1), reference->len_name);
return PAC_AST_STATUS_SUCCESS;
}
pac_ast_status_e pac_build_ast_string_literal(pac_ast_builder_s *builder, pac_ast_string_literal_s *literal)
{
usz_t len_literal = CURRENT_TOKEN.length;
char *text = CURRENT_STRING;
literal->length = len_literal;
literal->string = pac_arena_alloc(&builder->string_arena, len_literal + 1);
literal->string[len_literal] = 0x00;
pac_memory_copy(literal->string, text, len_literal);
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *item, char *rule_name, usz_t variant_index, usz_t item_index)
{
item->type = PAC_AST_ITEM_INVALID;
pac_memory_zero(item, sizeof(pac_ast_item_s));
if(CURRENT_TOKEN.type == PAC_TOKEN_LIT_STRING)
{
item->type = PAC_AST_ITEM_LITERAL;
return pac_build_ast_string_literal(builder, &item->data.string_literal);
}
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPENING_TAG)
{
item->type = PAC_AST_ITEM_REFERENCE;
return pac_build_ast_reference(builder, &item->data.reference);
}
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_WORD)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_WORD;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_INTEGER)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_INTEGER;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
return pac_ast_handle_unknown_item_type(builder, rule_name, variant_index, item_index);
}
pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index)
{
pac_memory_zero(variant, sizeof(pac_ast_variant_s));
@ -116,7 +27,7 @@ pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_varia
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
}
pac_ast_status_e status =
pac_build_ast_item(builder, &variant->items[variant->num_items], rule_name, variant->num_items, variant->num_items);
pac_build_ast_item(builder, &variant->items[variant->num_items], rule_name, variant_index, variant->num_items);
if(status == PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER)
continue;
@ -133,17 +44,6 @@ pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_varia
) {
return PAC_AST_STATUS_SUCCESS;
}
// Test if there is no separator and, if there is none, handle the error.
if(
(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA)
&& (CURRENT_TOKEN.type != PAC_TOKEN_SIGN_MINUS)
) {
status = pac_ast_handle_missing_item_separator(builder, rule_name, variant_index, variant->num_items);
if(status != PAC_AST_STATUS_ERROR_HANDLED) return status;
continue;
}
SKIP_TOKEN;
}
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
}

View File

@ -0,0 +1,184 @@
#include <ast.h>
#include <stdlib.h>
#define CURRENT_STRING &builder->token_list->source[builder->token_list->tokens[builder->cursor].offset]
#define CURRENT_TOKEN (builder->token_list->tokens[builder->cursor])
#define TOKEN_CURSOR (builder->cursor)
#define SKIP_TOKEN ++builder->cursor
#define REWIND_TOKEN --builder->cursor
#define TOKEN_AT(index) (builder->token_list->tokens[index])
#define STRING_AT(index) &builder->token_list->source[builder->token_list->tokens[index].offset]
#define END_REACHED (builder->cursor >= builder->token_list->num_tokens)
pac_ast_status_e pac_build_ast_string_literal (pac_ast_builder_s *builder, pac_ast_string_literal_s *literal);
pac_ast_status_e pac_build_ast_reference (pac_ast_builder_s *builder, pac_ast_reference_s *reference);
pac_ast_status_e pac_build_ast_item (pac_ast_builder_s *builder, pac_ast_item_s *item, char *rule_name, usz_t variant_index, usz_t item_index);
pac_ast_status_e pac_build_ast_collection_item(pac_ast_builder_s *builder, pac_ast_item_s *item, char *rule_name)
{
item->type = PAC_AST_ITEM_INVALID;
pac_memory_zero(item, sizeof(pac_ast_item_s));
if(CURRENT_TOKEN.type == PAC_TOKEN_LIT_STRING)
{
item->type = PAC_AST_ITEM_LITERAL;
return pac_build_ast_string_literal(builder, &item->data.string_literal);
}
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPENING_TAG)
{
item->type = PAC_AST_ITEM_REFERENCE;
return pac_build_ast_reference(builder, &item->data.reference);
}
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_WORD)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_WORD;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_INTEGER)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_INTEGER;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPENING_BRACKET)
{
puts("Collections can't be nested!");
return PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER;
}
return PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER;
}
pac_ast_status_e pac_build_ast_collection(pac_ast_builder_s *builder, pac_ast_collection_s *collection, char *rule_name)
{
pac_memory_zero(collection, sizeof(pac_ast_collection_s));
usz_t items_capacity = 8;
collection->items = calloc(sizeof(pac_ast_item_s), items_capacity);
while(!END_REACHED)
{
if(collection->num_items >= items_capacity)
{
items_capacity *= 2;
collection->items = calloc(sizeof(pac_ast_item_s), items_capacity);
}
pac_ast_status_e status =
pac_build_ast_collection_item(builder, &collection->items[collection->num_items], rule_name);
if(status == PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER)
continue;
if(status != PAC_AST_STATUS_SUCCESS)
return status;
++collection->num_items;
if(
(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_CLOSING_BRACKET)
) {
return PAC_AST_STATUS_SUCCESS;
}
}
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
}
pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_reference_s *reference)
{
usz_t start_cursor = TOKEN_CURSOR;
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPENING_TAG)
{
pac_internal_error_s error;
error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS;
error.specifics.invalid_function_entry.function = "__FILE__:pac_build_ast_reference";
error.specifics.invalid_function_entry.error_description = "The function must be entered while at an opening tag token.";
pac_log_internal_error(builder->logger, error);
return PAC_AST_STATUS_NOT_RECOVERABLE;
}
SKIP_TOKEN;
if(CURRENT_TOKEN.type != PAC_TOKEN_WORD)
{
return pac_ast_handle_invalid_reference_name_token(builder);
}
SKIP_TOKEN;
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSING_TAG)
{
return pac_ast_handle_missing_reference_close_tag(builder);
}
SKIP_TOKEN;
// Test if this reference is followed by an equals sign. That equals sign shouldn't
// be there anyways and it being there can be a hint for a missing semicolon making
// this reference actually be a rule header.
if(!END_REACHED)
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_EQUALS)
return pac_ast_handle_reference_with_equals_sign(builder);
reference->len_name = TOKEN_AT(TOKEN_CURSOR-2).length;
reference->name = pac_arena_alloc(&builder->string_arena, reference->len_name + 1);
reference->name[reference->len_name] = 0x00;
pac_memory_copy(reference->name, STRING_AT(start_cursor+1), reference->len_name);
return PAC_AST_STATUS_SUCCESS;
}
pac_ast_status_e pac_build_ast_string_literal(pac_ast_builder_s *builder, pac_ast_string_literal_s *literal)
{
usz_t len_literal = CURRENT_TOKEN.length;
char *text = CURRENT_STRING;
literal->length = len_literal;
literal->string = pac_arena_alloc(&builder->string_arena, len_literal + 1);
literal->string[len_literal] = 0x00;
pac_memory_copy(literal->string, text, len_literal);
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *item, char *rule_name, usz_t variant_index, usz_t item_index)
{
item->type = PAC_AST_ITEM_INVALID;
pac_memory_zero(item, sizeof(pac_ast_item_s));
if(CURRENT_TOKEN.type == PAC_TOKEN_LIT_STRING)
{
item->type = PAC_AST_ITEM_LITERAL;
return pac_build_ast_string_literal(builder, &item->data.string_literal);
}
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPENING_TAG)
{
item->type = PAC_AST_ITEM_REFERENCE;
return pac_build_ast_reference(builder, &item->data.reference);
}
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_WORD)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_WORD;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_INTEGER)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_INTEGER;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPENING_BRACKET)
{
item->type = PAC_AST_ITEM_COLLECTION;
SKIP_TOKEN;
return pac_build_ast_collection(builder, &item->data.collection, rule_name);
}
return pac_ast_handle_unknown_item_type(builder, rule_name, variant_index, item_index);
}