Added half-way decent error handling
This commit is contained in:
parent
0b0b1d227f
commit
b8f4fd9feb
|
@ -6,6 +6,43 @@
|
||||||
#include <logger.h>
|
#include <logger.h>
|
||||||
#include <tokenizer.h>
|
#include <tokenizer.h>
|
||||||
|
|
||||||
|
// pac_ast_set_e: An enumeration of all sets known in Parcel's AST.
|
||||||
|
//
|
||||||
|
// Sets are descriptions which describe a rough format of token, like
|
||||||
|
// with variable names; the format is known, but the actual name isn't.
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
PAC_AST_SET_RUNE,
|
||||||
|
PAC_AST_SET_WORD,
|
||||||
|
PAC_AST_SET_INTEGER,
|
||||||
|
PAC_AST_SET_FLOAT
|
||||||
|
|
||||||
|
} pac_ast_set_e;
|
||||||
|
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
PAC_AST_ITEM_INVALID = 0x00,
|
||||||
|
PAC_AST_ITEM_REFERENCE,
|
||||||
|
PAC_AST_ITEM_LITERAL,
|
||||||
|
PAC_AST_ITEM_SET
|
||||||
|
|
||||||
|
} pac_ast_item_e;
|
||||||
|
|
||||||
|
// pac_ast_recovery_level_e: How much higher in the call stack the program
|
||||||
|
// flow has to go to be able to recover.
|
||||||
|
typedef enum
|
||||||
|
{
|
||||||
|
PAC_AST_STATUS_SUCCESS,
|
||||||
|
PAC_AST_STATUS_ERROR_HANDLED,
|
||||||
|
PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER,
|
||||||
|
PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER,
|
||||||
|
PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER,
|
||||||
|
PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE,
|
||||||
|
PAC_AST_STATUS_NOT_RECOVERABLE,
|
||||||
|
PAC_AST_STATUS_UNEXPECTED_FILE_END
|
||||||
|
|
||||||
|
} pac_ast_status_e;
|
||||||
|
|
||||||
typedef struct pac_ast pac_ast_s;
|
typedef struct pac_ast pac_ast_s;
|
||||||
typedef struct pac_ast_rule pac_ast_rule_s;
|
typedef struct pac_ast_rule pac_ast_rule_s;
|
||||||
typedef struct pac_ast_variant pac_ast_variant_s;
|
typedef struct pac_ast_variant pac_ast_variant_s;
|
||||||
|
@ -47,28 +84,6 @@ struct pac_ast_literal
|
||||||
char *string;
|
char *string;
|
||||||
};
|
};
|
||||||
|
|
||||||
// pac_ast_set_e: An enumeration of all sets known in Parcel's AST.
|
|
||||||
//
|
|
||||||
// Sets are descriptions which describe a rough format of token, like
|
|
||||||
// with variable names; the format is known, but the actual name isn't.
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
PAC_AST_SET_RUNE,
|
|
||||||
PAC_AST_SET_WORD,
|
|
||||||
PAC_AST_SET_INTEGER,
|
|
||||||
PAC_AST_SET_FLOAT
|
|
||||||
|
|
||||||
} pac_ast_set_e;
|
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
PAC_AST_ITEM_INVALID = 0x00,
|
|
||||||
PAC_AST_ITEM_REFERENCE,
|
|
||||||
PAC_AST_ITEM_LITERAL,
|
|
||||||
PAC_AST_ITEM_SET
|
|
||||||
|
|
||||||
} pac_ast_item_e;
|
|
||||||
|
|
||||||
struct pac_ast_item
|
struct pac_ast_item
|
||||||
{
|
{
|
||||||
pac_ast_item_e type;
|
pac_ast_item_e type;
|
||||||
|
@ -80,18 +95,22 @@ struct pac_ast_item
|
||||||
} data;
|
} data;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct pac_ast_grower
|
typedef struct pac_ast_builder
|
||||||
{
|
{
|
||||||
|
usz_t cursor;
|
||||||
pac_tlist_s *token_list;
|
pac_tlist_s *token_list;
|
||||||
pac_logger_s logger;
|
|
||||||
bool_t failed;
|
bool_t failed;
|
||||||
|
|
||||||
} pac_ast_grower_s;
|
pac_logger_s logger;
|
||||||
|
|
||||||
pac_ast_s pac_grow_ast (pac_tlist_s tokens);
|
} pac_ast_builder_s;
|
||||||
|
|
||||||
bool_t pac_ast_handle_invalid_reference_name_token (pac_ast_grower_s *grower);
|
pac_ast_s pac_build_ast (pac_tlist_s tokens);
|
||||||
bool_t pac_ast_handle_missing_reference_close_tag (pac_ast_grower_s *grower);
|
|
||||||
bool_t pac_ast_handle_reference_with_equals_sign (pac_ast_grower_s *grower);
|
pac_ast_status_e pac_ast_handle_invalid_reference_name_token (pac_ast_builder_s *builder);
|
||||||
|
pac_ast_status_e pac_ast_handle_missing_reference_close_tag (pac_ast_builder_s *builder);
|
||||||
|
pac_ast_status_e pac_ast_handle_reference_with_equals_sign (pac_ast_builder_s *builder);
|
||||||
|
pac_ast_status_e pac_ast_handle_missing_item_delimiter (pac_ast_builder_s *builder);
|
||||||
|
pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header (pac_ast_builder_s *builder);
|
||||||
|
|
||||||
#endif // PARCEL_AST_H
|
#endif // PARCEL_AST_H
|
||||||
|
|
|
@ -48,9 +48,6 @@ struct pac_tlist
|
||||||
char *source;
|
char *source;
|
||||||
usz_t num_tokens;
|
usz_t num_tokens;
|
||||||
pac_token_s *tokens;
|
pac_token_s *tokens;
|
||||||
|
|
||||||
// cursor: An index into the 'tokens'-array; used in later stages.
|
|
||||||
usz_t cursor;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
pac_token_e pac_word_to_token_type (char *word, usz_t length);
|
pac_token_e pac_word_to_token_type (char *word, usz_t length);
|
||||||
|
|
|
@ -6,41 +6,41 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
#define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset]
|
#define CURRENT_STRING &builder->token_list->source[builder->token_list->tokens[builder->cursor].offset]
|
||||||
#define CURTOK (grower->token_list->tokens[grower->token_list->cursor])
|
#define CURRENT_TOKEN (builder->token_list->tokens[builder->cursor])
|
||||||
#define CURSOR (grower->token_list->cursor)
|
#define TOKEN_CURSOR (builder->cursor)
|
||||||
#define SKIP_TOKEN ++grower->token_list->cursor
|
#define SKIP_TOKEN ++builder->cursor
|
||||||
#define REWIND_TOKEN --grower->token_list->cursor
|
#define REWIND_TOKEN --builder->cursor
|
||||||
#define TOKEN_AT(index) (grower->token_list->tokens[index])
|
#define TOKEN_AT(index) (builder->token_list->tokens[(index)])
|
||||||
#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens)
|
#define END_REACHED (builder->cursor >= builder->token_list->num_tokens)
|
||||||
|
|
||||||
|
|
||||||
i32_t pac_grow_reference(pac_ast_grower_s *grower, pac_ast_reference_s *reference)
|
pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_reference_s *reference)
|
||||||
{
|
{
|
||||||
if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG)
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG)
|
||||||
{
|
{
|
||||||
// This is only possible with malformed state.
|
// This is only possible with malformed state.
|
||||||
return -1;
|
return PAC_AST_STATUS_NOT_RECOVERABLE;
|
||||||
}
|
}
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
|
|
||||||
if(CURTOK.type != PAC_TOKEN_WORD)
|
if(CURRENT_TOKEN.type != PAC_TOKEN_WORD)
|
||||||
{
|
{
|
||||||
return pac_ast_handle_invalid_reference_name_token(grower);
|
return pac_ast_handle_invalid_reference_name_token(builder);
|
||||||
}
|
}
|
||||||
usz_t len_name = CURTOK.length;
|
usz_t len_name = CURRENT_TOKEN.length;
|
||||||
char *name = CURSTR;
|
char *name = CURRENT_STRING;
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
|
|
||||||
if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG)
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG)
|
||||||
{
|
{
|
||||||
return pac_ast_handle_missing_reference_close_tag(grower);
|
return pac_ast_handle_missing_reference_close_tag(builder);
|
||||||
}
|
}
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
|
|
||||||
if(CURTOK.type == PAC_TOKEN_SIGN_EQUALS)
|
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_EQUALS)
|
||||||
{
|
{
|
||||||
return pac_ast_handle_reference_with_equals_sign(grower);
|
return pac_ast_handle_reference_with_equals_sign(builder);
|
||||||
}
|
}
|
||||||
|
|
||||||
reference->len_name = len_name;
|
reference->len_name = len_name;
|
||||||
|
@ -48,52 +48,52 @@ i32_t pac_grow_reference(pac_ast_grower_s *grower, pac_ast_reference_s *referenc
|
||||||
pac_memory_copy(reference->name, name, len_name);
|
pac_memory_copy(reference->name, name, len_name);
|
||||||
reference->name[len_name] = 0x00;
|
reference->name[len_name] = 0x00;
|
||||||
|
|
||||||
return 3;
|
return PAC_AST_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
i32_t pac_grow_item(pac_ast_grower_s *grower, pac_ast_item_s *item)
|
pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *item)
|
||||||
{
|
{
|
||||||
item->type = PAC_AST_ITEM_INVALID;
|
item->type = PAC_AST_ITEM_INVALID;
|
||||||
pac_memory_zero(item, sizeof(pac_ast_item_s));
|
pac_memory_zero(item, sizeof(pac_ast_item_s));
|
||||||
if(CURTOK.type == PAC_TOKEN_LIT_STRING)
|
if(CURRENT_TOKEN.type == PAC_TOKEN_LIT_STRING)
|
||||||
{
|
{
|
||||||
item->type = PAC_AST_ITEM_LITERAL;
|
item->type = PAC_AST_ITEM_LITERAL;
|
||||||
item->data.literal.length = CURTOK.length;
|
item->data.literal.length = CURRENT_TOKEN.length;
|
||||||
item->data.literal.string = malloc(item->data.literal.length + 1);
|
item->data.literal.string = malloc(item->data.literal.length + 1);
|
||||||
pac_memory_copy(item->data.literal.string, CURSTR, CURTOK.length);
|
pac_memory_copy(item->data.literal.string, CURRENT_STRING, CURRENT_TOKEN.length);
|
||||||
item->data.literal.string[item->data.literal.length] = 0x00;
|
item->data.literal.string[item->data.literal.length] = 0x00;
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
return 1;
|
return PAC_AST_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(CURTOK.type == PAC_TOKEN_SIGN_OPEN_TAG)
|
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPEN_TAG)
|
||||||
{
|
{
|
||||||
item->type = PAC_AST_ITEM_REFERENCE;
|
item->type = PAC_AST_ITEM_REFERENCE;
|
||||||
return pac_grow_reference(grower, &item->data.reference);
|
return pac_build_ast_reference(builder, &item->data.reference);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(CURTOK.type == PAC_TOKEN_KEYWORD_WORD)
|
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_WORD)
|
||||||
{
|
{
|
||||||
item->type = PAC_AST_ITEM_SET;
|
item->type = PAC_AST_ITEM_SET;
|
||||||
item->data.set = PAC_AST_SET_WORD;
|
item->data.set = PAC_AST_SET_WORD;
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
return 1;
|
return PAC_AST_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(CURTOK.type == PAC_TOKEN_KEYWORD_INTEGER)
|
if(CURRENT_TOKEN.type == PAC_TOKEN_KEYWORD_INTEGER)
|
||||||
{
|
{
|
||||||
item->type = PAC_AST_ITEM_SET;
|
item->type = PAC_AST_ITEM_SET;
|
||||||
item->data.set = PAC_AST_SET_INTEGER;
|
item->data.set = PAC_AST_SET_INTEGER;
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
return 1;
|
return PAC_AST_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
return -1;
|
builder->failed = TRUE;
|
||||||
|
SKIP_TOKEN; // Skip to (probably) the next item.
|
||||||
|
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
|
||||||
}
|
}
|
||||||
|
|
||||||
i32_t pac_grow_variant(pac_ast_grower_s *grower, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index)
|
pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index)
|
||||||
{
|
{
|
||||||
usz_t start_index = CURSOR;
|
|
||||||
|
|
||||||
pac_memory_zero(variant, sizeof(pac_ast_variant_s));
|
pac_memory_zero(variant, sizeof(pac_ast_variant_s));
|
||||||
|
|
||||||
usz_t items_capacity = 8;
|
usz_t items_capacity = 8;
|
||||||
|
@ -105,68 +105,93 @@ i32_t pac_grow_variant(pac_ast_grower_s *grower, pac_ast_variant_s *variant, cha
|
||||||
items_capacity *= 2;
|
items_capacity *= 2;
|
||||||
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
|
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
|
||||||
}
|
}
|
||||||
i32_t success = pac_grow_item(grower, &variant->items[variant->num_items]);
|
pac_ast_status_e status = pac_build_ast_item(builder, &variant->items[variant->num_items]);
|
||||||
++variant->num_items;
|
++variant->num_items;
|
||||||
|
|
||||||
if(success < 0)
|
if(status == PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER)
|
||||||
{
|
{
|
||||||
return success - 1;
|
continue;
|
||||||
}
|
|
||||||
if(
|
|
||||||
(CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
|
|
||||||
|| (CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON)
|
|
||||||
) {
|
|
||||||
return CURSOR - start_index;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if(CURTOK.type != PAC_TOKEN_SIGN_COMMA)
|
if(status != PAC_AST_STATUS_SUCCESS)
|
||||||
{
|
{
|
||||||
return -1;
|
return status;
|
||||||
|
}
|
||||||
|
if(
|
||||||
|
(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
|
||||||
|
|| (CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
|
||||||
|
) {
|
||||||
|
return PAC_AST_STATUS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA)
|
||||||
|
{
|
||||||
|
pac_ast_status_e comma_missing_status = pac_ast_handle_missing_item_delimiter(builder);
|
||||||
|
if(comma_missing_status != PAC_AST_STATUS_ERROR_HANDLED) return comma_missing_status;
|
||||||
|
|
||||||
}
|
}
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
}
|
}
|
||||||
return -1;
|
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
i32_t pac_grow_rule(pac_ast_grower_s *grower, pac_ast_rule_s *rule)
|
|
||||||
|
|
||||||
|
pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder)
|
||||||
|
{
|
||||||
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG)
|
||||||
|
{
|
||||||
|
// Invalid state
|
||||||
|
return PAC_AST_STATUS_NOT_RECOVERABLE;
|
||||||
|
}
|
||||||
|
SKIP_TOKEN;
|
||||||
|
|
||||||
|
pac_ast_status_e status;
|
||||||
|
|
||||||
|
|
||||||
|
if(CURRENT_TOKEN.type != PAC_TOKEN_WORD)
|
||||||
|
{
|
||||||
|
if((status = pac_ast_handle_invalid_rule_name(builder)) != PAC_AST_STATUS_ERROR_HANDLED)
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
SKIP_TOKEN;
|
||||||
|
|
||||||
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG)
|
||||||
|
{
|
||||||
|
if((status = pac_ast_handle_missing_rule_closing_sign(builder) != PAC_AST_STATUS_ERROR_HANDLED))
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
SKIP_TOKEN;
|
||||||
|
|
||||||
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_EQUALS)
|
||||||
|
{
|
||||||
|
pac_ast_status_e status = pac_ast_handle_missing_equals_sign_after_rule_header(builder);
|
||||||
|
if(status == PAC_AST_STATUS_ERROR_HANDLED)
|
||||||
|
{
|
||||||
|
REWIND_TOKEN;
|
||||||
|
}
|
||||||
|
else return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
|
||||||
|
}
|
||||||
|
SKIP_TOKEN;
|
||||||
|
|
||||||
|
return PAC_AST_STATUS_SUCCESS;
|
||||||
|
}
|
||||||
|
|
||||||
|
pac_ast_status_e pac_build_ast_rule(pac_ast_builder_s *builder, pac_ast_rule_s *rule)
|
||||||
{
|
{
|
||||||
pac_memory_zero(rule, sizeof(pac_ast_rule_s));
|
pac_memory_zero(rule, sizeof(pac_ast_rule_s));
|
||||||
|
|
||||||
// Parse the header
|
usz_t start_index = TOKEN_CURSOR;
|
||||||
|
pac_skip_ast_rule_header(builder);
|
||||||
usz_t start_index = CURSOR;
|
|
||||||
|
|
||||||
if(CURTOK.type != PAC_TOKEN_SIGN_OPEN_TAG)
|
|
||||||
{
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
SKIP_TOKEN;
|
|
||||||
|
|
||||||
if(CURTOK.type != PAC_TOKEN_WORD)
|
|
||||||
{
|
|
||||||
puts("A rule name must be a single word!");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
usz_t len_name = CURTOK.length;
|
|
||||||
usz_t name_in_source = CURSTR;
|
|
||||||
SKIP_TOKEN;
|
|
||||||
|
|
||||||
if(CURTOK.type != PAC_TOKEN_SIGN_CLOSE_TAG)
|
|
||||||
{
|
|
||||||
puts("Missing Tag closing sign!");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
SKIP_TOKEN;
|
|
||||||
|
|
||||||
if(CURTOK.type != PAC_TOKEN_SIGN_EQUALS)
|
|
||||||
return -1;
|
|
||||||
SKIP_TOKEN;
|
|
||||||
|
|
||||||
|
|
||||||
// Parse all variants
|
// Parse all variants
|
||||||
|
|
||||||
|
usz_t len_name = TOKEN_AT(start_index+1).length;
|
||||||
|
usz_t name_offset = TOKEN_AT(start_index+1).offset;
|
||||||
|
char *name_pointer = &builder->token_list->source[name_offset];
|
||||||
|
|
||||||
rule->name = malloc(len_name + 1);
|
rule->name = malloc(len_name + 1);
|
||||||
pac_memory_copy(rule->name, name_in_source, len_name);
|
pac_memory_copy(rule->name, name_pointer, len_name);
|
||||||
rule->name[len_name] = 0;
|
rule->name[len_name] = 0;
|
||||||
|
|
||||||
usz_t variants_capacity = 4;
|
usz_t variants_capacity = 4;
|
||||||
|
@ -178,68 +203,114 @@ i32_t pac_grow_rule(pac_ast_grower_s *grower, pac_ast_rule_s *rule)
|
||||||
variants_capacity *= 2;
|
variants_capacity *= 2;
|
||||||
rule->variants = realloc(rule->variants, sizeof(pac_ast_variant_s) * variants_capacity);
|
rule->variants = realloc(rule->variants, sizeof(pac_ast_variant_s) * variants_capacity);
|
||||||
}
|
}
|
||||||
i32_t success = pac_grow_variant(grower, &rule->variants[rule->num_variants], rule->name, rule->num_variants);
|
pac_ast_status_e status = pac_build_ast_variant(builder, &rule->variants[rule->num_variants], rule->name, rule->num_variants);
|
||||||
++rule->num_variants;
|
++rule->num_variants;
|
||||||
|
|
||||||
if(success < 0)
|
switch(status)
|
||||||
{
|
{
|
||||||
printf("Failed parsing a rule's variant!");
|
case PAC_AST_STATUS_SUCCESS:
|
||||||
|
break;
|
||||||
|
case PAC_AST_STATUS_NOT_RECOVERABLE:
|
||||||
|
return PAC_AST_STATUS_NOT_RECOVERABLE;
|
||||||
|
|
||||||
|
case PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE:
|
||||||
|
puts("Failed parsing a rule's variant, recovering at next rule!");
|
||||||
|
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
|
||||||
|
|
||||||
|
case PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER:
|
||||||
|
{
|
||||||
|
builder->failed = TRUE;
|
||||||
|
printf("Failed parsing a rule's variant, ");
|
||||||
while(!END_REACHED)
|
while(!END_REACHED)
|
||||||
{
|
{
|
||||||
if(CURTOK.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
|
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
|
||||||
{
|
{
|
||||||
printf("Continuing with next variant.\n");
|
puts("continuing with the next variant.");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON)
|
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
|
||||||
{
|
{
|
||||||
printf("Continuing with next rule.\n");
|
puts("continuing with next rule.");
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
return 2;
|
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
|
||||||
}
|
}
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
}
|
}
|
||||||
continue;
|
} break;
|
||||||
|
|
||||||
|
case PAC_AST_STATUS_UNEXPECTED_FILE_END:
|
||||||
|
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
|
||||||
|
|
||||||
|
default:
|
||||||
|
printf("Internal Error: An invalid status code was produced: %d\n", status);
|
||||||
|
return PAC_AST_STATUS_NOT_RECOVERABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(CURTOK.type == PAC_TOKEN_SIGN_SEMICOLON)
|
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
|
||||||
{
|
{
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
return CURSOR - start_index;
|
return PAC_AST_STATUS_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
if(CURTOK.type != PAC_TOKEN_SIGN_VERTICAL_BAR)
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_VERTICAL_BAR)
|
||||||
{
|
{
|
||||||
return -1;
|
return PAC_AST_STATUS_NOT_RECOVERABLE;
|
||||||
}
|
}
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
}
|
}
|
||||||
return -1;
|
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
|
||||||
}
|
}
|
||||||
|
|
||||||
pac_ast_s pac_grow_ast(pac_tlist_s tokens)
|
void pac_ast_find_next_rule(pac_ast_builder_s *builder)
|
||||||
|
{
|
||||||
|
while(!END_REACHED)
|
||||||
|
{
|
||||||
|
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
|
||||||
|
{
|
||||||
|
SKIP_TOKEN;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
SKIP_TOKEN;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
pac_ast_s pac_build_ast(pac_tlist_s tokens)
|
||||||
{
|
{
|
||||||
usz_t rules_capacity = 32;
|
usz_t rules_capacity = 32;
|
||||||
pac_ast_s ast;
|
pac_ast_s ast;
|
||||||
ast.num_rules = 0;
|
ast.num_rules = 0;
|
||||||
ast.rules = malloc(sizeof(pac_ast_rule_s) * rules_capacity);
|
ast.rules = malloc(sizeof(pac_ast_rule_s) * rules_capacity);
|
||||||
|
|
||||||
pac_ast_grower_s grower;
|
pac_ast_builder_s builder;
|
||||||
grower.logger = pac_create_logger();
|
builder.cursor = 0;
|
||||||
grower.token_list = &tokens;
|
builder.logger = pac_create_logger();
|
||||||
grower.failed = FALSE;
|
builder.token_list = &tokens;
|
||||||
|
builder.failed = FALSE;
|
||||||
|
|
||||||
while(tokens.cursor < tokens.num_tokens)
|
while(builder.cursor < tokens.num_tokens)
|
||||||
{
|
{
|
||||||
if(ast.num_rules >= rules_capacity)
|
if(ast.num_rules >= rules_capacity)
|
||||||
{
|
{
|
||||||
rules_capacity *= 2;
|
rules_capacity *= 2;
|
||||||
ast.rules = realloc(ast.rules, sizeof(pac_ast_rule_s) * rules_capacity);
|
ast.rules = realloc(ast.rules, sizeof(pac_ast_rule_s) * rules_capacity);
|
||||||
}
|
}
|
||||||
int success = pac_grow_rule(&grower, &ast.rules[ast.num_rules]);
|
|
||||||
if(success < 0)
|
pac_ast_status_e status = pac_build_ast_rule(&builder, &ast.rules[ast.num_rules]);
|
||||||
|
|
||||||
|
if(status == PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE)
|
||||||
|
pac_ast_find_next_rule(&builder);
|
||||||
|
|
||||||
|
if(status == PAC_AST_STATUS_UNEXPECTED_FILE_END)
|
||||||
{
|
{
|
||||||
printf("Failed parsing a rule at index: %lu!\n", tokens.cursor);
|
puts("File ended prematurely!");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(status != PAC_AST_STATUS_SUCCESS)
|
||||||
|
{
|
||||||
|
printf("Failed parsing a rule at index: %lu with status code: %d!\n", builder.cursor, status);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
++ast.num_rules;
|
++ast.num_rules;
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,21 +2,35 @@
|
||||||
#include <logger.h>
|
#include <logger.h>
|
||||||
#include <tokenizer.h>
|
#include <tokenizer.h>
|
||||||
|
|
||||||
#define CURSTR &grower->token_list->source[grower->token_list->tokens[grower->token_list->cursor].offset]
|
#define CURRENT_STRING &builder->token_list->source[builder->token_list->tokens[builder->cursor].offset]
|
||||||
#define CURTOK (grower->token_list->tokens[grower->token_list->cursor])
|
#define CURRENT_TOKEN (builder->token_list->tokens[builder->cursor])
|
||||||
#define CURSOR (grower->token_list->cursor)
|
#define TOKEN_CURSOR builder->cursor
|
||||||
#define SKIP_TOKEN ++grower->token_list->cursor
|
#define SKIP_TOKEN ++builder->cursor
|
||||||
#define REWIND_TOKEN --grower->token_list->cursor
|
#define REWIND_TOKEN --builder->cursor
|
||||||
#define TOKEN_AT(index) (grower->token_list->tokens[(index)])
|
#define TOKEN_AT(index) (builder->token_list->tokens[(index)])
|
||||||
#define END_REACHED (grower->token_list->cursor >= grower->token_list->num_tokens)
|
#define END_REACHED (builder->cursor >= builder->token_list->num_tokens)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
bool_t pac_ast_builder_forward_seek_token_type(pac_ast_builder_s *builder, pac_token_e type, usz_t max_steps)
|
||||||
|
{
|
||||||
|
usz_t cursor_backup = TOKEN_CURSOR;
|
||||||
|
usz_t step_counter = 0;
|
||||||
|
while(!END_REACHED)
|
||||||
|
{
|
||||||
|
if(step_counter >= max_steps) break;
|
||||||
|
if(CURRENT_TOKEN.type == type) return TRUE;
|
||||||
|
++step_counter;
|
||||||
|
}
|
||||||
|
TOKEN_CURSOR = cursor_backup;
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
// pac_ast_builder_is_at_item_start: An utility function which returns wheter
|
// pac_ast_builder_is_at_item_start: An utility function which returns wheter
|
||||||
// the parser is at the start of an item at in the current state.
|
// the parser is at the start of an item at in the current state.
|
||||||
bool_t pac_ast_builder_is_at_item_start(pac_ast_grower_s *grower)
|
bool_t pac_ast_builder_is_at_item_start(pac_ast_builder_s *builder)
|
||||||
{
|
{
|
||||||
switch(CURTOK.type)
|
switch(CURRENT_TOKEN.type)
|
||||||
{
|
{
|
||||||
case PAC_TOKEN_LIT_STRING:
|
case PAC_TOKEN_LIT_STRING:
|
||||||
case PAC_TOKEN_SIGN_OPEN_TAG:
|
case PAC_TOKEN_SIGN_OPEN_TAG:
|
||||||
|
@ -28,88 +42,149 @@ bool_t pac_ast_builder_is_at_item_start(pac_ast_grower_s *grower)
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool_t pac_ast_builder_forward_seek_item_start(pac_ast_builder_s *builder, usz_t max_steps)
|
||||||
|
|
||||||
// Returns whether the parser can continue to catch errors
|
|
||||||
bool_t pac_ast_handle_invalid_reference_name_token(pac_ast_grower_s *grower)
|
|
||||||
{
|
{
|
||||||
usz_t open_tag_src_offset = TOKEN_AT(CURSOR-1).offset;
|
usz_t cursor_backup = TOKEN_CURSOR;
|
||||||
|
usz_t step_counter = 0;
|
||||||
|
while(!END_REACHED)
|
||||||
|
{
|
||||||
|
if(step_counter >= max_steps) break;
|
||||||
|
if(pac_ast_builder_is_at_item_start(builder)) return TRUE;
|
||||||
|
++step_counter;
|
||||||
|
}
|
||||||
|
TOKEN_CURSOR = cursor_backup;
|
||||||
|
return FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s *builder)
|
||||||
|
{
|
||||||
|
usz_t open_tag_src_offset = TOKEN_AT(TOKEN_CURSOR-1).offset;
|
||||||
usz_t len_reference_name = 0;
|
usz_t len_reference_name = 0;
|
||||||
|
|
||||||
// Find closing tag for getting the name of the word for the error message.
|
// Find closing tag for getting the name of the word for the error message.
|
||||||
usz_t tried_tokens = 0;
|
usz_t tried_tokens = 0;
|
||||||
while(tried_tokens < 3)
|
while(tried_tokens < 3)
|
||||||
{
|
{
|
||||||
if(TOKEN_AT(CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSE_TAG)
|
if(TOKEN_AT(TOKEN_CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSE_TAG)
|
||||||
{
|
{
|
||||||
len_reference_name = TOKEN_AT(CURSOR + tried_tokens).offset - open_tag_src_offset;
|
len_reference_name = TOKEN_AT(TOKEN_CURSOR + tried_tokens).offset - open_tag_src_offset;
|
||||||
++len_reference_name; // Take the closing tag into the name
|
++len_reference_name; // Take the closing tag into the name
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
++tried_tokens;
|
++tried_tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
// If no closing tag could be found, use the token after the opening tag.
|
// If no closing tag could be found, use the token after the opening tag.
|
||||||
if(len_reference_name == 0)
|
if(len_reference_name == 0)
|
||||||
len_reference_name = ((CURTOK.offset + CURTOK.length) - open_tag_src_offset);
|
{
|
||||||
|
len_reference_name = ((CURRENT_TOKEN.offset + CURRENT_TOKEN.length) - open_tag_src_offset);
|
||||||
|
}
|
||||||
|
char *given_rule_name = pac_log_alloc(&builder->logger, len_reference_name + 1);
|
||||||
|
pac_memory_copy(given_rule_name, &builder->token_list->source[open_tag_src_offset], len_reference_name);
|
||||||
pac_naming_error_s error;
|
pac_naming_error_s error;
|
||||||
error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME;
|
error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME;
|
||||||
error.line = CURTOK.line;
|
error.line = CURRENT_TOKEN.line;
|
||||||
error.column = CURTOK.column;
|
error.column = CURRENT_TOKEN.column;
|
||||||
error.specifics.invalid_rule_name.given_rule_name = pac_log_alloc(&grower->logger, len_reference_name + 1);
|
error.specifics.invalid_rule_name.given_rule_name = given_rule_name;
|
||||||
pac_memory_copy(error.specifics.invalid_rule_name.given_rule_name, &grower->token_list->source[open_tag_src_offset], len_reference_name);
|
pac_log_naming_error(&builder->logger, error);
|
||||||
pac_log_naming_error(&grower->logger, error);
|
|
||||||
|
|
||||||
|
|
||||||
grower->failed = TRUE;
|
builder->failed = TRUE;
|
||||||
return TRUE;
|
|
||||||
|
if(!pac_ast_builder_forward_seek_item_start(builder, 4))
|
||||||
|
return PAC_AST_STATUS_NOT_RECOVERABLE;
|
||||||
|
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns whether the parser can continue to catch errors
|
pac_ast_status_e pac_ast_handle_missing_reference_close_tag(pac_ast_builder_s *builder)
|
||||||
bool_t pac_ast_handle_missing_reference_close_tag(pac_ast_grower_s *grower)
|
|
||||||
{
|
{
|
||||||
pac_syntax_error_s error;
|
pac_syntax_error_s error;
|
||||||
error.type = PAC_SYNTAX_ERROR_MISSING_TOKEN;
|
error.type = PAC_SYNTAX_ERROR_MISSING_TOKEN;
|
||||||
error.line = CURTOK.line;
|
error.line = CURRENT_TOKEN.line;
|
||||||
error.column = CURTOK.column;
|
error.column = CURRENT_TOKEN.column;
|
||||||
error.specifics.missing_token.hint = NULL;
|
error.specifics.missing_token.hint = NULL;
|
||||||
error.specifics.missing_token.wanted_token = "Tag Closer (>)";
|
error.specifics.missing_token.wanted_token = "Tag Closer (>)";
|
||||||
pac_log_syntax_error(&grower->logger, error);
|
pac_log_syntax_error(&builder->logger, error);
|
||||||
|
|
||||||
return TRUE;
|
builder->failed = TRUE;
|
||||||
|
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns whether the parser can continue to catch errors
|
pac_ast_status_e pac_ast_handle_reference_with_equals_sign(pac_ast_builder_s *builder)
|
||||||
bool_t pac_ast_handle_reference_with_equals_sign(pac_ast_grower_s *grower)
|
|
||||||
{
|
{
|
||||||
pac_syntax_error_s error;
|
pac_syntax_error_s error;
|
||||||
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
|
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
|
||||||
error.line = CURTOK.line;
|
error.line = CURRENT_TOKEN.line;
|
||||||
error.column = CURTOK.column;
|
error.column = CURRENT_TOKEN.column;
|
||||||
error.specifics.odd_token.hint = "There might be a semicolon missing 4 tokens ago.";
|
error.specifics.odd_token.hint = "There might be a semicolon missing 4 tokens ago.";
|
||||||
error.specifics.odd_token.num_valid_options = 1;
|
error.specifics.odd_token.num_valid_options = 1;
|
||||||
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
|
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
|
||||||
pac_log_syntax_error(&grower->logger, error);
|
pac_log_syntax_error(&builder->logger, error);
|
||||||
|
|
||||||
// TODO: Handle the tokens following this as a new rule.
|
builder->failed = TRUE;
|
||||||
|
return PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER;
|
||||||
return FALSE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Returns whether the parser can continue to catch errors
|
pac_ast_status_e pac_ast_handle_unknown_item_type(pac_ast_builder_s *builder)
|
||||||
bool_t pac_ast_handle_unknown_item_type(pac_ast_grower_s *grower)
|
|
||||||
{
|
{
|
||||||
pac_syntax_error_s error;
|
pac_syntax_error_s error;
|
||||||
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
|
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
|
||||||
error.line = CURTOK.line;
|
error.line = CURRENT_TOKEN.line;
|
||||||
error.column = CURTOK.column;
|
error.column = CURRENT_TOKEN.column;
|
||||||
error.specifics.odd_token.num_valid_options = 1;
|
error.specifics.odd_token.num_valid_options = 1;
|
||||||
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
|
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
|
||||||
pac_log_syntax_error(&grower->logger, error);
|
pac_log_syntax_error(&builder->logger, error);
|
||||||
while(!pac_ast_builder_is_at_item_start(grower))
|
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
|
if(!pac_ast_builder_forward_seek_item_start(builder, 3))
|
||||||
|
return PAC_AST_STATUS_NOT_RECOVERABLE;
|
||||||
|
|
||||||
return TRUE;
|
builder->failed = TRUE;
|
||||||
|
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
|
||||||
|
}
|
||||||
|
|
||||||
|
pac_ast_status_e pac_ast_handle_missing_item_delimiter(pac_ast_builder_s *builder)
|
||||||
|
{
|
||||||
|
SKIP_TOKEN;
|
||||||
|
puts("Missing the item delimiter!");
|
||||||
|
builder->failed = TRUE;
|
||||||
|
return PAC_AST_STATUS_ERROR_HANDLED;
|
||||||
|
}
|
||||||
|
|
||||||
|
pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header(pac_ast_builder_s *builder)
|
||||||
|
{
|
||||||
|
puts("Missing Equals sign after rule header!");
|
||||||
|
builder->failed = TRUE;
|
||||||
|
|
||||||
|
// Continue at first item (if possible)
|
||||||
|
if(!pac_ast_builder_forward_seek_item_start(builder, 2))
|
||||||
|
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
|
||||||
|
return PAC_AST_STATUS_ERROR_HANDLED;
|
||||||
|
}
|
||||||
|
|
||||||
|
pac_ast_status_e pac_ast_handle_invalid_rule_name(pac_ast_builder_s *builder)
|
||||||
|
{
|
||||||
|
builder->failed = TRUE;
|
||||||
|
if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 3))
|
||||||
|
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
|
||||||
|
return PAC_AST_STATUS_ERROR_HANDLED;
|
||||||
|
}
|
||||||
|
|
||||||
|
pac_ast_status_e pac_ast_handle_missing_rule_closing_sign(pac_ast_builder_s *builder)
|
||||||
|
{
|
||||||
|
puts("Missing Tag closing sign!");
|
||||||
|
builder->failed = TRUE;
|
||||||
|
|
||||||
|
// Continue at equals sign (if possible)
|
||||||
|
if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 2))
|
||||||
|
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
|
||||||
|
|
||||||
|
// The next step in the calling function is to advance and check for the equals sign,
|
||||||
|
// thus, it is necessary to go one back for it to find it again when advancing.
|
||||||
|
REWIND_TOKEN;
|
||||||
|
return PAC_AST_STATUS_ERROR_HANDLED;
|
||||||
}
|
}
|
||||||
|
|
|
@ -10,7 +10,7 @@ pac_grammar_s pac_convert_grammar(char *source)
|
||||||
{
|
{
|
||||||
usz_t len_source = strlen(source);
|
usz_t len_source = strlen(source);
|
||||||
pac_tlist_s tokens = pac_tokenize_grammar(source, len_source);
|
pac_tlist_s tokens = pac_tokenize_grammar(source, len_source);
|
||||||
pac_ast_s ast = pac_grow_ast(tokens);
|
pac_ast_s ast = pac_build_ast(tokens);
|
||||||
pac_grammar_s grammar = pac_link_grammar(ast);
|
pac_grammar_s grammar = pac_link_grammar(ast);
|
||||||
|
|
||||||
return grammar;
|
return grammar;
|
||||||
|
|
|
@ -48,7 +48,6 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
|
||||||
usz_t tokens_capacity = 1024;
|
usz_t tokens_capacity = 1024;
|
||||||
|
|
||||||
pac_tlist_s list;
|
pac_tlist_s list;
|
||||||
list.cursor = 0;
|
|
||||||
list.source = source;
|
list.source = source;
|
||||||
list.num_tokens = 0;
|
list.num_tokens = 0;
|
||||||
list.tokens = calloc(sizeof(pac_token_s), tokens_capacity);
|
list.tokens = calloc(sizeof(pac_token_s), tokens_capacity);
|
||||||
|
|
Loading…
Reference in New Issue