Compare commits

..

3 Commits

17 changed files with 562 additions and 401 deletions

View File

@ -1,6 +1,6 @@
#!/usr/bin/env bash #!/usr/bin/env bash
INCLUDE_STATEMENTS="-I code/inc/ -I code/exports/" INCLUDE_STATEMENTS="-I core-parser/inc/ -I core-parser/exports/"
INVOCATION_PATH=`pwd` INVOCATION_PATH=`pwd`
function build_checks() { function build_checks() {
@ -15,7 +15,7 @@ function build_checks() {
fi fi
cd $check_folder cd $check_folder
gcc src/*.c $INVOCATION_PATH/.build/libparcel.a -o check.elf -I $INVOCATION_PATH/code/inc/ gcc src/*.c $INVOCATION_PATH/.build/libparcel.a -o check.elf -I $INVOCATION_PATH/core-parser/inc/
cd .. cd ..
done done
} }
@ -29,14 +29,14 @@ function compile_sources() {
rm -r .build/objects/ rm -r .build/objects/
rm .build/libparcel.a rm .build/libparcel.a
mkdir -p .build/objects mkdir -p .build/objects
for source_file in $(find "code/src/") for source_file in $(find "core-parser/src/")
do do
if [[ ! -f $source_file ]] then if [[ ! -f $source_file ]] then
continue continue
fi fi
# Cut out the prefix (code/src/) # Cut out the prefix (core-parser/src/)
BASENAME=`echo $source_file | cut -c '10-'` BASENAME=`echo $source_file | cut -c '17-'`
echo "==== COMPILING $source_file ====" echo "==== COMPILING $source_file ===="
gcc -c $COMPILATION_FLAGS -o .build/objects/$BASENAME.o $source_file $INCLUDE_STATEMENTS gcc -c $COMPILATION_FLAGS -o .build/objects/$BASENAME.o $source_file $INCLUDE_STATEMENTS
done done

View File

@ -1,247 +0,0 @@
#include <ast.h>
#include <logger.h>
#include <tokenizer.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset]
#define CURTOK (grower->token_list->tokens[grower->token_list->cursor])
#define CURSOR (grower->token_list->cursor)
#define SKIP_TOKEN ++grower->token_list->cursor
#define REWIND_TOKEN --grower->token_list->cursor
#define TOKEN_AT(index) (grower->token_list->tokens[index])
#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens)
i32_t pac_grow_reference(pac_ast_grower_s *grower, pac_ast_reference_s *reference)
{
if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG)
{
// This is only possible with malformed state.
return -1;
}
SKIP_TOKEN;
if(CURTOK.type != PAC_TOKEN_WORD)
{
return pac_ast_handle_invalid_reference_name_token(grower);
}
usz_t len_name = CURTOK.length;
char *name = CURSTR;
SKIP_TOKEN;
if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG)
{
return pac_ast_handle_missing_reference_close_tag(grower);
}
SKIP_TOKEN;
if(CURTOK.type == PAC_TOKEN_SIGN_EQUALS)
{
return pac_ast_handle_reference_with_equals_sign(grower);
}
reference->len_name = len_name;
reference->name = malloc(len_name+1);
pac_memory_copy(reference->name, name, len_name);
reference->name[len_name] = 0x00;
return 3;
}
i32_t pac_grow_item(pac_ast_grower_s *grower, pac_ast_item_s *item)
{
item->type = PAC_AST_ITEM_INVALID;
pac_memory_zero(item, sizeof(pac_ast_item_s));
if(CURTOK.type == PAC_TOKEN_LIT_STRING)
{
item->type = PAC_AST_ITEM_LITERAL;
item->data.literal.length = CURTOK.length;
item->data.literal.string = malloc(item->data.literal.length + 1);
pac_memory_copy(item->data.literal.string, CURSTR, CURTOK.length);
item->data.literal.string[item->data.literal.length] = 0x00;
SKIP_TOKEN;
return 1;
}
if(CURTOK.type == PAC_TOKEN_SIGN_OPEN_TAG)
{
item->type = PAC_AST_ITEM_REFERENCE;
return pac_grow_reference(grower, &item->data.reference);
}
if(CURTOK.type == PAC_TOKEN_KEYWORD_WORD)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_WORD;
SKIP_TOKEN;
return 1;
}
if(CURTOK.type == PAC_TOKEN_KEYWORD_INTEGER)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_INTEGER;
SKIP_TOKEN;
return 1;
}
return -1;
}
i32_t pac_grow_variant(pac_ast_grower_s *grower, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index)
{
usz_t start_index = CURSOR;
pac_memory_zero(variant, sizeof(pac_ast_variant_s));
usz_t items_capacity = 8;
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
while(!END_REACHED)
{
if(variant->num_items >= items_capacity)
{
items_capacity *= 2;
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
}
i32_t success = pac_grow_item(grower, &variant->items[variant->num_items]);
++variant->num_items;
if(success < 0)
{
return success - 1;
}
if(
(CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
|| (CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON)
) {
return CURSOR - start_index;
}
if(CURTOK.type != PAC_TOKEN_SIGN_COMMA)
{
return -1;
}
SKIP_TOKEN;
}
return -1;
}
i32_t pac_grow_rule(pac_ast_grower_s *grower, pac_ast_rule_s *rule)
{
pac_memory_zero(rule, sizeof(pac_ast_rule_s));
// Parse the header
usz_t start_index = CURSOR;
if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG)
{
return -1;
}
SKIP_TOKEN;
if(CURTOK.type != PAC_TOKEN_WORD)
{
puts("A rule name must be a single word!");
return -1;
}
usz_t len_name = CURTOK.length;
usz_t name_in_source = CURSTR;
SKIP_TOKEN;
if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG)
{
puts("Missing Tag closing sign!");
return -1;
}
SKIP_TOKEN;
if(CURTOK.type != PAC_TOKEN_SIGN_EQUALS)
return -1;
SKIP_TOKEN;
// Parse all variants
rule->name = malloc(len_name + 1);
pac_memory_copy(rule->name, name_in_source, len_name);
rule->name[len_name] = 0;
usz_t variants_capacity = 4;
rule->variants = malloc(sizeof(pac_ast_variant_s) * variants_capacity);
while(!END_REACHED)
{
if(rule->num_variants >= variants_capacity)
{
variants_capacity *= 2;
rule->variants = realloc(rule->variants, sizeof(pac_ast_variant_s) * variants_capacity);
}
i32_t success = pac_grow_variant(grower, &rule->variants[rule->num_variants], rule->name, rule->num_variants);
++rule->num_variants;
if(success < 0)
{
printf("Failed parsing a rule's variant!");
while(!END_REACHED)
{
if(CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
{
printf("Continuing with next variant.\n");
break;
}
if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON)
{
printf("Continuing with next rule.\n");
SKIP_TOKEN;
return 2;
}
SKIP_TOKEN;
}
continue;
}
if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON)
{
SKIP_TOKEN;
return CURSOR - start_index;
}
if(CURTOK.type != PAC_TOKEN_SIGN_VERTICAL_BAR)
{
return -1;
}
SKIP_TOKEN;
}
return -1;
}
pac_ast_s pac_grow_ast(pac_tlist_s tokens)
{
usz_t rules_capacity = 32;
pac_ast_s ast;
ast.num_rules = 0;
ast.rules = malloc(sizeof(pac_ast_rule_s) * rules_capacity);
pac_ast_grower_s grower;
grower.logger = pac_create_logger();
grower.token_list = &tokens;
grower.failed = FALSE;
while(tokens.cursor < tokens.num_tokens)
{
if(ast.num_rules >= rules_capacity)
{
rules_capacity *= 2;
ast.rules = realloc(ast.rules, sizeof(pac_ast_rule_s) * rules_capacity);
}
int success = pac_grow_rule(&grower, &ast.rules[ast.num_rules]);
if(success < 0)
{
printf("Failed parsing a rule at index: %lu!\n", tokens.cursor);
}
++ast.num_rules;
}
return ast;
}

View File

@ -1,115 +0,0 @@
#include <ast.h>
#include <logger.h>
#include <tokenizer.h>
#define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset]
#define CURTOK (grower->token_list->tokens[grower->token_list->cursor])
#define CURSOR (grower->token_list->cursor)
#define SKIP_TOKEN ++grower->token_list->cursor
#define REWIND_TOKEN --grower->token_list->cursor
#define TOKEN_AT(index) (grower->token_list->tokens[(index)])
#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens)
// pac_ast_builder_is_at_item_start: An utility function which returns wheter
// the parser is at the start of an item at in the current state.
bool_t pac_ast_builder_is_at_item_start(pac_ast_grower_s *grower)
{
switch(CURTOK.type)
{
case PAC_TOKEN_LIT_STRING:
case PAC_TOKEN_SIGN_OPEN_TAG:
case PAC_TOKEN_KEYWORD_WORD:
case PAC_TOKEN_KEYWORD_INTEGER:
return TRUE;
default: break;
}
return FALSE;
}
// Returns whether the parser can continue to catch errors
bool_t pac_ast_handle_invalid_reference_name_token(pac_ast_grower_s *grower)
{
usz_t open_tag_src_offset = TOKEN_AT(CURSOR-1).offset;
usz_t len_reference_name = 0;
// Find closing tag for getting the name of the word for the error message.
usz_t tried_tokens = 0;
while(tried_tokens < 3)
{
if(TOKEN_AT(CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSE_TAG)
{
len_reference_name = TOKEN_AT(CURSOR + tried_tokens).offset - open_tag_src_offset;
++len_reference_name; // Take the closing tag into the name
break;
}
++tried_tokens;
}
// If no closing tag could be found, use the token after the opening tag.
if(len_reference_name == 0)
len_reference_name = ((CURTOK.offset + CURTOK.length) - open_tag_src_offset);
pac_naming_error_s error;
error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME;
error.line = CURTOK.line;
error.column = CURTOK.column;
error.specifics.invalid_rule_name.given_rule_name = pac_log_alloc(&grower->logger, len_reference_name + 1);
pac_memory_copy(error.specifics.invalid_rule_name.given_rule_name, &grower->token_list->source[open_tag_src_offset], len_reference_name);
pac_log_naming_error(&grower->logger, error);
grower->failed = TRUE;
return TRUE;
}
// Returns whether the parser can continue to catch errors
bool_t pac_ast_handle_missing_reference_close_tag(pac_ast_grower_s *grower)
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_MISSING_TOKEN;
error.line = CURTOK.line;
error.column = CURTOK.column;
error.specifics.missing_token.hint = NULL;
error.specifics.missing_token.wanted_token = "Tag Closer (>)";
pac_log_syntax_error(&grower->logger, error);
return TRUE;
}
// Returns whether the parser can continue to catch errors
bool_t pac_ast_handle_reference_with_equals_sign(pac_ast_grower_s *grower)
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.line = CURTOK.line;
error.column = CURTOK.column;
error.specifics.odd_token.hint = "There might be a semicolon missing 4 tokens ago.";
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
pac_log_syntax_error(&grower->logger, error);
// TODO: Handle the tokens following this as a new rule.
return FALSE;
}
// Returns whether the parser can continue to catch errors
bool_t pac_ast_handle_unknown_item_type(pac_ast_grower_s *grower)
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.line = CURTOK.line;
error.column = CURTOK.column;
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
pac_log_syntax_error(&grower->logger, error);
while(!pac_ast_builder_is_at_item_start(grower))
SKIP_TOKEN;
return TRUE;
}

View File

@ -6,6 +6,43 @@
#include <logger.h> #include <logger.h>
#include <tokenizer.h> #include <tokenizer.h>
// pac_ast_set_e: An enumeration of all sets known in Parcel's AST.
//
// Sets are descriptions which describe a rough format of token, like
// with variable names; the format is known, but the actual name isn't.
typedef enum
{
PAC_AST_SET_RUNE,
PAC_AST_SET_WORD,
PAC_AST_SET_INTEGER,
PAC_AST_SET_FLOAT
} pac_ast_set_e;
typedef enum
{
PAC_AST_ITEM_INVALID = 0x00,
PAC_AST_ITEM_REFERENCE,
PAC_AST_ITEM_LITERAL,
PAC_AST_ITEM_SET
} pac_ast_item_e;
// pac_ast_recovery_level_e: How much higher in the call stack the program
// flow has to go to be able to recover.
typedef enum
{
PAC_AST_STATUS_SUCCESS,
PAC_AST_STATUS_ERROR_HANDLED,
PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER,
PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER,
PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER,
PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE,
PAC_AST_STATUS_NOT_RECOVERABLE,
PAC_AST_STATUS_UNEXPECTED_FILE_END
} pac_ast_status_e;
typedef struct pac_ast pac_ast_s; typedef struct pac_ast pac_ast_s;
typedef struct pac_ast_rule pac_ast_rule_s; typedef struct pac_ast_rule pac_ast_rule_s;
typedef struct pac_ast_variant pac_ast_variant_s; typedef struct pac_ast_variant pac_ast_variant_s;
@ -47,28 +84,6 @@ struct pac_ast_literal
char *string; char *string;
}; };
// pac_ast_set_e: An enumeration of all sets known in Parcel's AST.
//
// Sets are descriptions which describe a rough format of token, like
// with variable names; the format is known, but the actual name isn't.
typedef enum
{
PAC_AST_SET_RUNE,
PAC_AST_SET_WORD,
PAC_AST_SET_INTEGER,
PAC_AST_SET_FLOAT
} pac_ast_set_e;
typedef enum
{
PAC_AST_ITEM_INVALID = 0x00,
PAC_AST_ITEM_REFERENCE,
PAC_AST_ITEM_LITERAL,
PAC_AST_ITEM_SET
} pac_ast_item_e;
struct pac_ast_item struct pac_ast_item
{ {
pac_ast_item_e type; pac_ast_item_e type;
@ -80,18 +95,22 @@ struct pac_ast_item
} data; } data;
}; };
typedef struct pac_ast_grower typedef struct pac_ast_builder
{ {
usz_t cursor;
pac_tlist_s *token_list; pac_tlist_s *token_list;
pac_logger_s logger;
bool_t failed; bool_t failed;
} pac_ast_grower_s; pac_logger_s logger;
} pac_ast_builder_s;
pac_ast_s pac_grow_ast (pac_tlist_s tokens); pac_ast_s pac_build_ast (pac_tlist_s tokens);
bool_t pac_ast_handle_invalid_reference_name_token (pac_ast_grower_s *grower); pac_ast_status_e pac_ast_handle_invalid_reference_name_token (pac_ast_builder_s *builder);
bool_t pac_ast_handle_missing_reference_close_tag (pac_ast_grower_s *grower); pac_ast_status_e pac_ast_handle_missing_reference_close_tag (pac_ast_builder_s *builder);
bool_t pac_ast_handle_reference_with_equals_sign (pac_ast_grower_s *grower); pac_ast_status_e pac_ast_handle_reference_with_equals_sign (pac_ast_builder_s *builder);
pac_ast_status_e pac_ast_handle_missing_item_delimiter (pac_ast_builder_s *builder);
pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header (pac_ast_builder_s *builder);
#endif // PARCEL_AST_H #endif // PARCEL_AST_H

View File

@ -48,9 +48,6 @@ struct pac_tlist
char *source; char *source;
usz_t num_tokens; usz_t num_tokens;
pac_token_s *tokens; pac_token_s *tokens;
// cursor: An index into the 'tokens'-array; used in later stages.
usz_t cursor;
}; };
pac_token_e pac_word_to_token_type (char *word, usz_t length); pac_token_e pac_word_to_token_type (char *word, usz_t length);

318
core-parser/src/ast.c Normal file
View File

@ -0,0 +1,318 @@
#include <ast.h>
#include <logger.h>
#include <tokenizer.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define CURRENT_STRING &builder->token_list->source[builder->token_list->tokens[builder->cursor].offset]
#define CURRENT_TOKEN (builder->token_list->tokens[builder->cursor])
#define TOKEN_CURSOR (builder->cursor)
#define SKIP_TOKEN ++builder->cursor
#define REWIND_TOKEN --builder->cursor
#define TOKEN_AT(index) (builder->token_list->tokens[(index)])
#define END_REACHED (builder->cursor >= builder->token_list->num_tokens)
pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_reference_s *reference)
{
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG)
{
// This is only possible with malformed state.
return PAC_AST_STATUS_NOT_RECOVERABLE;
}
SKIP_TOKEN;
if(CURRENT_TOKEN.type != PAC_TOKEN_WORD)
{
return pac_ast_handle_invalid_reference_name_token(builder);
}
usz_t len_name = CURRENT_TOKEN.length;
char *name = CURRENT_STRING;
SKIP_TOKEN;
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG)
{
return pac_ast_handle_missing_reference_close_tag(builder);
}
SKIP_TOKEN;
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_EQUALS)
{
return pac_ast_handle_reference_with_equals_sign(builder);
}
reference->len_name = len_name;
reference->name = malloc(len_name+1);
pac_memory_copy(reference->name, name, len_name);
reference->name[len_name] = 0x00;
return PAC_AST_STATUS_SUCCESS;
}
pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *item)
{
item->type = PAC_AST_ITEM_INVALID;
pac_memory_zero(item, sizeof(pac_ast_item_s));
if(CURRENT_TOKEN.type == PAC_TOKEN_LIT_STRING)
{
item->type = PAC_AST_ITEM_LITERAL;
item->data.literal.length = CURRENT_TOKEN.length;
item->data.literal.string = malloc(item->data.literal.length + 1);
pac_memory_copy(item->data.literal.string, CURRENT_STRING, CURRENT_TOKEN.length);
item->data.literal.string[item->data.literal.length] = 0x00;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPEN_TAG)
{
item->type = PAC_AST_ITEM_REFERENCE;
return pac_build_ast_reference(builder, &item->data.reference);
}
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_WORD)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_WORD;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_INTEGER)
{
item->type = PAC_AST_ITEM_SET;
item->data.set = PAC_AST_SET_INTEGER;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
builder->failed = TRUE;
SKIP_TOKEN; // Skip to (probably) the next item.
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
}
pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index)
{
pac_memory_zero(variant, sizeof(pac_ast_variant_s));
usz_t items_capacity = 8;
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
while(!END_REACHED)
{
if(variant->num_items >= items_capacity)
{
items_capacity *= 2;
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
}
pac_ast_status_e status = pac_build_ast_item(builder, &variant->items[variant->num_items]);
++variant->num_items;
if(status == PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER)
{
continue;
}
if(status != PAC_AST_STATUS_SUCCESS)
{
return status;
}
if(
(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
|| (CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
) {
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA)
{
pac_ast_status_e comma_missing_status = pac_ast_handle_missing_item_delimiter(builder);
if(comma_missing_status != PAC_AST_STATUS_ERROR_HANDLED) return comma_missing_status;
}
SKIP_TOKEN;
}
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
}
pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder)
{
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG)
{
// Invalid state
return PAC_AST_STATUS_NOT_RECOVERABLE;
}
SKIP_TOKEN;
pac_ast_status_e status;
if(CURRENT_TOKEN.type != PAC_TOKEN_WORD)
{
if((status = pac_ast_handle_invalid_rule_name(builder)) != PAC_AST_STATUS_ERROR_HANDLED)
return status;
}
SKIP_TOKEN;
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG)
{
if((status = pac_ast_handle_missing_rule_closing_sign(builder) != PAC_AST_STATUS_ERROR_HANDLED))
return status;
}
SKIP_TOKEN;
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_EQUALS)
{
pac_ast_status_e status = pac_ast_handle_missing_equals_sign_after_rule_header(builder);
if(status == PAC_AST_STATUS_ERROR_HANDLED)
{
REWIND_TOKEN;
}
else return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
}
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
pac_ast_status_e pac_build_ast_rule(pac_ast_builder_s *builder, pac_ast_rule_s *rule)
{
pac_memory_zero(rule, sizeof(pac_ast_rule_s));
usz_t start_index = TOKEN_CURSOR;
pac_skip_ast_rule_header(builder);
// Parse all variants
usz_t len_name = TOKEN_AT(start_index+1).length;
usz_t name_offset = TOKEN_AT(start_index+1).offset;
char *name_pointer = &builder->token_list->source[name_offset];
rule->name = malloc(len_name + 1);
pac_memory_copy(rule->name, name_pointer, len_name);
rule->name[len_name] = 0;
usz_t variants_capacity = 4;
rule->variants = malloc(sizeof(pac_ast_variant_s) * variants_capacity);
while(!END_REACHED)
{
if(rule->num_variants >= variants_capacity)
{
variants_capacity *= 2;
rule->variants = realloc(rule->variants, sizeof(pac_ast_variant_s) * variants_capacity);
}
pac_ast_status_e status = pac_build_ast_variant(builder, &rule->variants[rule->num_variants], rule->name, rule->num_variants);
++rule->num_variants;
switch(status)
{
case PAC_AST_STATUS_SUCCESS:
break;
case PAC_AST_STATUS_NOT_RECOVERABLE:
return PAC_AST_STATUS_NOT_RECOVERABLE;
case PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE:
puts("Failed parsing a rule's variant, recovering at next rule!");
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
case PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER:
{
builder->failed = TRUE;
printf("Failed parsing a rule's variant, ");
while(!END_REACHED)
{
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
{
puts("continuing with the next variant.");
break;
}
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
{
puts("continuing with next rule.");
SKIP_TOKEN;
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
}
SKIP_TOKEN;
}
} break;
case PAC_AST_STATUS_UNEXPECTED_FILE_END:
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
default:
printf("Internal Error: An invalid status code was produced: %d\n", status);
return PAC_AST_STATUS_NOT_RECOVERABLE;
}
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
{
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_VERTICAL_BAR)
{
return PAC_AST_STATUS_NOT_RECOVERABLE;
}
SKIP_TOKEN;
}
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
}
void pac_ast_find_next_rule(pac_ast_builder_s *builder)
{
while(!END_REACHED)
{
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
{
SKIP_TOKEN;
break;
}
SKIP_TOKEN;
}
return;
}
pac_ast_s pac_build_ast(pac_tlist_s tokens)
{
usz_t rules_capacity = 32;
pac_ast_s ast;
ast.num_rules = 0;
ast.rules = malloc(sizeof(pac_ast_rule_s) * rules_capacity);
pac_ast_builder_s builder;
builder.cursor = 0;
builder.logger = pac_create_logger();
builder.token_list = &tokens;
builder.failed = FALSE;
while(builder.cursor < tokens.num_tokens)
{
if(ast.num_rules >= rules_capacity)
{
rules_capacity *= 2;
ast.rules = realloc(ast.rules, sizeof(pac_ast_rule_s) * rules_capacity);
}
pac_ast_status_e status = pac_build_ast_rule(&builder, &ast.rules[ast.num_rules]);
if(status == PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE)
pac_ast_find_next_rule(&builder);
if(status == PAC_AST_STATUS_UNEXPECTED_FILE_END)
{
puts("File ended prematurely!");
break;
}
if(status != PAC_AST_STATUS_SUCCESS)
{
printf("Failed parsing a rule at index: %lu with status code: %d!\n", builder.cursor, status);
continue;
}
++ast.num_rules;
}
return ast;
}

View File

@ -0,0 +1,190 @@
#include <ast.h>
#include <logger.h>
#include <tokenizer.h>
#define CURRENT_STRING &builder->token_list->source[builder->token_list->tokens[builder->cursor].offset]
#define CURRENT_TOKEN (builder->token_list->tokens[builder->cursor])
#define TOKEN_CURSOR builder->cursor
#define SKIP_TOKEN ++builder->cursor
#define REWIND_TOKEN --builder->cursor
#define TOKEN_AT(index) (builder->token_list->tokens[(index)])
#define END_REACHED (builder->cursor >= builder->token_list->num_tokens)
bool_t pac_ast_builder_forward_seek_token_type(pac_ast_builder_s *builder, pac_token_e type, usz_t max_steps)
{
usz_t cursor_backup = TOKEN_CURSOR;
usz_t step_counter = 0;
while(!END_REACHED)
{
if(step_counter >= max_steps) break;
if(CURRENT_TOKEN.type == type) return TRUE;
++step_counter;
}
TOKEN_CURSOR = cursor_backup;
return FALSE;
}
// pac_ast_builder_is_at_item_start: An utility function which returns wheter
// the parser is at the start of an item at in the current state.
bool_t pac_ast_builder_is_at_item_start(pac_ast_builder_s *builder)
{
switch(CURRENT_TOKEN.type)
{
case PAC_TOKEN_LIT_STRING:
case PAC_TOKEN_SIGN_OPEN_TAG:
case PAC_TOKEN_KEYWORD_WORD:
case PAC_TOKEN_KEYWORD_INTEGER:
return TRUE;
default: break;
}
return FALSE;
}
bool_t pac_ast_builder_forward_seek_item_start(pac_ast_builder_s *builder, usz_t max_steps)
{
usz_t cursor_backup = TOKEN_CURSOR;
usz_t step_counter = 0;
while(!END_REACHED)
{
if(step_counter >= max_steps) break;
if(pac_ast_builder_is_at_item_start(builder)) return TRUE;
++step_counter;
}
TOKEN_CURSOR = cursor_backup;
return FALSE;
}
pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s *builder)
{
usz_t open_tag_src_offset = TOKEN_AT(TOKEN_CURSOR-1).offset;
usz_t len_reference_name = 0;
// Find closing tag for getting the name of the word for the error message.
usz_t tried_tokens = 0;
while(tried_tokens < 3)
{
if(TOKEN_AT(TOKEN_CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSE_TAG)
{
len_reference_name = TOKEN_AT(TOKEN_CURSOR + tried_tokens).offset - open_tag_src_offset;
++len_reference_name; // Take the closing tag into the name
break;
}
++tried_tokens;
}
// If no closing tag could be found, use the token after the opening tag.
if(len_reference_name == 0)
{
len_reference_name = ((CURRENT_TOKEN.offset + CURRENT_TOKEN.length) - open_tag_src_offset);
}
char *given_rule_name = pac_log_alloc(&builder->logger, len_reference_name + 1);
pac_memory_copy(given_rule_name, &builder->token_list->source[open_tag_src_offset], len_reference_name);
pac_naming_error_s error;
error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME;
error.line = CURRENT_TOKEN.line;
error.column = CURRENT_TOKEN.column;
error.specifics.invalid_rule_name.given_rule_name = given_rule_name;
pac_log_naming_error(&builder->logger, error);
builder->failed = TRUE;
if(!pac_ast_builder_forward_seek_item_start(builder, 4))
return PAC_AST_STATUS_NOT_RECOVERABLE;
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
}
pac_ast_status_e pac_ast_handle_missing_reference_close_tag(pac_ast_builder_s *builder)
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_MISSING_TOKEN;
error.line = CURRENT_TOKEN.line;
error.column = CURRENT_TOKEN.column;
error.specifics.missing_token.hint = NULL;
error.specifics.missing_token.wanted_token = "Tag Closer (>)";
pac_log_syntax_error(&builder->logger, error);
builder->failed = TRUE;
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
}
pac_ast_status_e pac_ast_handle_reference_with_equals_sign(pac_ast_builder_s *builder)
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.line = CURRENT_TOKEN.line;
error.column = CURRENT_TOKEN.column;
error.specifics.odd_token.hint = "There might be a semicolon missing 4 tokens ago.";
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
pac_log_syntax_error(&builder->logger, error);
builder->failed = TRUE;
return PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER;
}
pac_ast_status_e pac_ast_handle_unknown_item_type(pac_ast_builder_s *builder)
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.line = CURRENT_TOKEN.line;
error.column = CURRENT_TOKEN.column;
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
pac_log_syntax_error(&builder->logger, error);
SKIP_TOKEN;
if(!pac_ast_builder_forward_seek_item_start(builder, 3))
return PAC_AST_STATUS_NOT_RECOVERABLE;
builder->failed = TRUE;
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
}
pac_ast_status_e pac_ast_handle_missing_item_delimiter(pac_ast_builder_s *builder)
{
SKIP_TOKEN;
puts("Missing the item delimiter!");
builder->failed = TRUE;
return PAC_AST_STATUS_ERROR_HANDLED;
}
pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header(pac_ast_builder_s *builder)
{
puts("Missing Equals sign after rule header!");
builder->failed = TRUE;
// Continue at first item (if possible)
if(!pac_ast_builder_forward_seek_item_start(builder, 2))
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
return PAC_AST_STATUS_ERROR_HANDLED;
}
pac_ast_status_e pac_ast_handle_invalid_rule_name(pac_ast_builder_s *builder)
{
builder->failed = TRUE;
if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 3))
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
return PAC_AST_STATUS_ERROR_HANDLED;
}
pac_ast_status_e pac_ast_handle_missing_rule_closing_sign(pac_ast_builder_s *builder)
{
puts("Missing Tag closing sign!");
builder->failed = TRUE;
// Continue at equals sign (if possible)
if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 2))
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
// The next step in the calling function is to advance and check for the equals sign,
// thus, it is necessary to go one back for it to find it again when advancing.
REWIND_TOKEN;
return PAC_AST_STATUS_ERROR_HANDLED;
}

View File

@ -10,7 +10,7 @@ pac_grammar_s pac_convert_grammar(char *source)
{ {
usz_t len_source = strlen(source); usz_t len_source = strlen(source);
pac_tlist_s tokens = pac_tokenize_grammar(source, len_source); pac_tlist_s tokens = pac_tokenize_grammar(source, len_source);
pac_ast_s ast = pac_grow_ast(tokens); pac_ast_s ast = pac_build_ast(tokens);
pac_grammar_s grammar = pac_link_grammar(ast); pac_grammar_s grammar = pac_link_grammar(ast);
return grammar; return grammar;

View File

@ -48,7 +48,6 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
usz_t tokens_capacity = 1024; usz_t tokens_capacity = 1024;
pac_tlist_s list; pac_tlist_s list;
list.cursor = 0;
list.source = source; list.source = source;
list.num_tokens = 0; list.num_tokens = 0;
list.tokens = calloc(sizeof(pac_token_s), tokens_capacity); list.tokens = calloc(sizeof(pac_token_s), tokens_capacity);