Cleaned up code and fixed memory leaks

This commit is contained in:
Eric-Paul Ickhorn 2023-11-29 14:00:43 +01:00
parent b8f4fd9feb
commit 3251632dbe
12 changed files with 457 additions and 149 deletions

View File

@ -4,7 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
char * load_file(char *path)
char * load_file(char *path, long *len)
{
FILE *file = fopen(path, "r");
if(file == NULL)
@ -20,6 +20,9 @@ char * load_file(char *path)
char *content = malloc(length+1);
content[length] = 0x00;
fread(content, 1, length, file);
if(len != NULL)
(*len) = length;
fclose(file);
return content;
@ -32,12 +35,18 @@ int main(int argc, char **argv)
printf("Usage: %s <filename>\n", argv[0]);
return -1;
}
char *source = load_file(argv[1]);
usz_t len_source;
char *source = load_file(argv[1], &len_source);
if(source == NULL)
{
puts("Stopping due to previous error!");
return -2;
}
pac_convert_grammar(source);
pac_grammar_s grammar = pac_convert_grammar(source, len_source);
free(source);
pac_display_grammar(grammar);
pac_delete_grammar(grammar);
return 0;
}

View File

@ -23,9 +23,7 @@ char * load_file(char *path, long *len)
fread(content, 1, length, file);
if(len != NULL)
{
(*len) = length;
}
fclose(file);
return content;
@ -46,6 +44,8 @@ int main(int argc, char **argv)
return -2;
}
pac_tlist_s tokens = pac_tokenize_grammar(source, len_source);
free(source);
pac_display_tlist(tokens);
return 0;

View File

@ -37,6 +37,7 @@ typedef enum
PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER,
PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER,
PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER,
PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_VARIANT,
PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE,
PAC_AST_STATUS_NOT_RECOVERABLE,
PAC_AST_STATUS_UNEXPECTED_FILE_END
@ -46,7 +47,7 @@ typedef enum
typedef struct pac_ast pac_ast_s;
typedef struct pac_ast_rule pac_ast_rule_s;
typedef struct pac_ast_variant pac_ast_variant_s;
typedef struct pac_ast_literal pac_ast_literal_s;
typedef struct pac_ast_string_literal pac_ast_string_literal_s;
typedef struct pac_ast_reference pac_ast_reference_s;
typedef struct pac_ast_item pac_ast_item_s;
@ -54,6 +55,7 @@ struct pac_ast
{
usz_t num_rules;
pac_ast_rule_s *rules;
pac_arena_s string_arena;
};
struct pac_ast_rule
@ -78,7 +80,7 @@ struct pac_ast_reference
char *name;
};
struct pac_ast_literal
struct pac_ast_string_literal
{
usz_t length;
char *string;
@ -89,7 +91,7 @@ struct pac_ast_item
pac_ast_item_e type;
union pac_ast_item_data
{
pac_ast_literal_s literal;
pac_ast_string_literal_s string_literal;
pac_ast_set_e set;
pac_ast_reference_s reference;
} data;
@ -101,16 +103,22 @@ typedef struct pac_ast_builder
pac_tlist_s *token_list;
bool_t failed;
pac_logger_s logger;
pac_logger_s *logger;
pac_arena_s string_arena;
} pac_ast_builder_s;
pac_ast_s pac_build_ast (pac_tlist_s tokens);
pac_ast_s pac_build_ast (pac_tlist_s tokens, pac_logger_s *logger);
void pac_delete_ast (pac_ast_s ast);
pac_ast_status_e pac_ast_handle_invalid_reference_name_token (pac_ast_builder_s *builder);
pac_ast_status_e pac_ast_handle_missing_reference_close_tag (pac_ast_builder_s *builder);
pac_ast_status_e pac_ast_handle_missing_rule_header_closing_sign (pac_ast_builder_s *builder);
pac_ast_status_e pac_ast_handle_reference_with_equals_sign (pac_ast_builder_s *builder);
pac_ast_status_e pac_ast_handle_missing_item_delimiter (pac_ast_builder_s *builder);
pac_ast_status_e pac_ast_handle_missing_item_delimiter (pac_ast_builder_s *builder, char *rule_name, usz_t variant_index, usz_t item_index);
pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header (pac_ast_builder_s *builder);
pac_ast_status_e pac_ast_handle_unknown_item_type (pac_ast_builder_s *builder, char *rule_name, usz_t variant_index, usz_t item_index);
char * pac_ast_stringify_status (pac_ast_status_e status);
#endif // PARCEL_AST_H

View File

@ -8,6 +8,7 @@ typedef enum
{
PAC_SYNTAX_ERROR,
PAC_NAMING_ERROR,
PAC_VALIDATION_ERROR,
PAC_INTERNAL_ERROR
} pac_error_e;
@ -17,20 +18,52 @@ typedef enum
PAC_SYNTAX_ERROR_UNSPECIFIED,
PAC_SYNTAX_ERROR_STRAY,
PAC_SYNTAX_ERROR_MISSING_TOKEN,
PAC_SYNTAX_ERROR_ODD_TOKEN
PAC_SYNTAX_ERROR_ODD_TOKEN,
PAC_SYNTAX_ERROR_INVALID_ITEM
} pac_syntax_error_e;
typedef enum
{
PAC_NAMING_ERROR_INVALID_REFERENCE_NAME,
PAC_NAMING_ERROR_INVALID_RULE_NAME
} pac_naming_error_e;
typedef struct pac_syntax_error
typedef enum
{
PAC_VALIDATION_ERROR_UNKNOWN_RULE,
PAC_VALIDATION_ERROR_RULE_NAMING_CONFLICT,
PAC_VALIDATION_ERROR_VARIANT_DUPLICATION,
PAC_VALIDATION_ERROR_CIRCULAR_REFERENCE
} pac_validation_error_e;
typedef enum
{
PAC_INTERNAL_ERROR_INVALID_ARGUMENTS,
PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS,
PAC_INTERNAL_ERROR_INVALID_FUNCTION_ENTRY,
PAC_INTERNAL_ERROR_NOT_IMPLEMENTED
} pac_internal_error_e;
typedef struct pac_source_location
{
char *file;
usz_t line;
usz_t column;
char *rule;
usz_t variant_index;
usz_t item_index;
} pac_source_location_s;
typedef struct pac_syntax_error
{
pac_source_location_s location;
pac_syntax_error_e type;
union pac_syntax_error_specifics
@ -50,10 +83,17 @@ typedef struct pac_syntax_error
{
usz_t num_valid_options;
char *valid_options[16];
char *found_token;
char *present_token;
char *hint;
} odd_token;
struct pac_syntax_error_invalid_item
{
char *present_construct;
char *hint;
} invalid_item;
} specifics;
} pac_syntax_error_s;
@ -74,6 +114,80 @@ typedef struct pac_naming_error
} pac_naming_error_s;
typedef struct pac_validation_error
{
char *rule;
usz_t variant;
pac_validation_error_e type;
union pac_validation_error_specifics
{
struct pac_naming_error_unknown_rule
{
char *searched_name;
} unknown_rule;
struct pac_naming_error_rule_naming_conflict
{
char *rule_name;
usz_t original_line;
usz_t duplicate_line;
} rule_naming_conflict;
struct pac_naming_error_variant_duplication
{
char *rule_name;
usz_t original;
usz_t duplicate;
} variant_doubling;
} specifics;
} pac_validation_error_s;
typedef struct pac_internal_error
{
pac_internal_error_e type;
union pac_internal_error_specifics
{
struct pac_internal_error_invalid_arguments
{
char *argument;
char *value;
char *error_description;
} invalid_arguments;
struct pac_internal_error_invalid_returned_status
{
char *function;
char *immediate_returner;
char *value;
} invalid_returned_status;
struct pac_internal_error_invalid_function_entry
{
char *function;
char *error_description;
} invalid_function_entry;
struct pac_internal_error_not_implemented
{
char *feature_name;
char *feature_description;
} not_implemented;
} specifics;
} pac_internal_error_s;
typedef struct pac_error
{
@ -82,6 +196,9 @@ typedef struct pac_error
{
pac_syntax_error_s syntax_error;
pac_naming_error_s naming_error;
pac_validation_error_s validation_error;
pac_internal_error_s internal_error;
} specifics;
} pac_error_s;
@ -97,8 +214,12 @@ typedef struct pac_logger
} pac_logger_s;
pac_logger_s pac_create_logger ();
void pac_delete_logger (pac_logger_s logger);
void pac_log_syntax_error (pac_logger_s *logger, pac_syntax_error_s error);
void pac_log_naming_error (pac_logger_s *logger, pac_naming_error_s error);
void pac_log_validation_error (pac_logger_s *logger, pac_validation_error_s error);
void pac_log_internal_error (pac_logger_s *logger, pac_internal_error_s error);
void * pac_log_alloc (pac_logger_s *logger, usz_t num_bytes);

View File

@ -3,6 +3,7 @@
#define PARCEL_H
#include <utility.h>
#include <logger.h>
typedef struct pac_grammar pac_grammar_s;
typedef struct pac_rule pac_rule_s;
@ -71,11 +72,14 @@ struct pac_grammar
usz_t num_rules;
pac_rule_s *rules;
pac_logger_s log;
pac_arena_s result_arena;
};
pac_grammar_s pac_convert_grammar (char *source);
pac_grammar_s pac_convert_grammar (char *source, usz_t len_source);
void pac_delete_grammar (pac_grammar_s grammar);
void pac_display_log (pac_grammar_s grammar);

View File

@ -50,9 +50,11 @@ struct pac_tlist
pac_token_s *tokens;
};
pac_token_e pac_word_to_token_type (char *word, usz_t length);
pac_tlist_s pac_tokenize_grammar (char *source, usz_t len_source);
char * pac_stringify_token_type (pac_token_e type);
void pac_delete_token_list (pac_tlist_s list);
void pac_display_tlist (pac_tlist_s list);
pac_token_e pac_word_to_token_type (char *word, usz_t length);
char * pac_stringify_token_type (pac_token_e type);
#endif

View File

@ -1,6 +1,4 @@
#include <ast.h>
#include <logger.h>
#include <tokenizer.h>
#include <string.h>
#include <stdio.h>
@ -11,15 +9,22 @@
#define TOKEN_CURSOR (builder->cursor)
#define SKIP_TOKEN ++builder->cursor
#define REWIND_TOKEN --builder->cursor
#define TOKEN_AT(index) (builder->token_list->tokens[(index)])
#define TOKEN_AT(index) (builder->token_list->tokens[index])
#define STRING_AT(index) &builder->token_list->source[builder->token_list->tokens[index].offset]
#define END_REACHED (builder->cursor >= builder->token_list->num_tokens)
pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_reference_s *reference)
{
usz_t start_cursor = TOKEN_CURSOR;
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG)
{
// This is only possible with malformed state.
pac_internal_error_s error;
error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS;
error.specifics.invalid_function_entry.function = "__FILE__:pac_build_ast_reference";
error.specifics.invalid_function_entry.error_description = "The function must be entered while at an opening tag token.";
pac_log_internal_error(builder->logger, error);
return PAC_AST_STATUS_NOT_RECOVERABLE;
}
SKIP_TOKEN;
@ -28,8 +33,6 @@ pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_ref
{
return pac_ast_handle_invalid_reference_name_token(builder);
}
usz_t len_name = CURRENT_TOKEN.length;
char *name = CURRENT_STRING;
SKIP_TOKEN;
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG)
@ -38,32 +41,41 @@ pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_ref
}
SKIP_TOKEN;
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_EQUALS)
{
return pac_ast_handle_reference_with_equals_sign(builder);
}
// Test if this reference is followed by an equals sign. That equals sign shouldn't
// be there anyways and it being there can be a hint for a missing semicolon making
// this reference actually be a rule header.
if(!END_REACHED)
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_EQUALS)
return pac_ast_handle_reference_with_equals_sign(builder);
reference->len_name = len_name;
reference->name = malloc(len_name+1);
pac_memory_copy(reference->name, name, len_name);
reference->name[len_name] = 0x00;
reference->len_name = TOKEN_AT(TOKEN_CURSOR-2).length;
reference->name = pac_arena_alloc(&builder->string_arena, reference->len_name + 1);
reference->name[reference->len_name] = 0x00;
pac_memory_copy(reference->name, STRING_AT(start_cursor+1), reference->len_name);
return PAC_AST_STATUS_SUCCESS;
}
pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *item)
pac_ast_status_e pac_build_ast_string_literal(pac_ast_builder_s *builder, pac_ast_string_literal_s *literal)
{
usz_t len_literal = CURRENT_TOKEN.length;
char *text = CURRENT_STRING;
literal->length = len_literal;
literal->string = pac_arena_alloc(&builder->string_arena, len_literal + 1);
literal->string[len_literal] = 0x00;
pac_memory_copy(literal->string, text, len_literal);
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *item, char *rule_name, usz_t variant_index, usz_t item_index)
{
item->type = PAC_AST_ITEM_INVALID;
pac_memory_zero(item, sizeof(pac_ast_item_s));
if(CURRENT_TOKEN.type == PAC_TOKEN_LIT_STRING)
{
item->type = PAC_AST_ITEM_LITERAL;
item->data.literal.length = CURRENT_TOKEN.length;
item->data.literal.string = malloc(item->data.literal.length + 1);
pac_memory_copy(item->data.literal.string, CURRENT_STRING, CURRENT_TOKEN.length);
item->data.literal.string[item->data.literal.length] = 0x00;
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
return pac_build_ast_string_literal(builder, &item->data.string_literal);
}
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPEN_TAG)
@ -87,9 +99,7 @@ pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *
SKIP_TOKEN;
return PAC_AST_STATUS_SUCCESS;
}
builder->failed = TRUE;
SKIP_TOKEN; // Skip to (probably) the next item.
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
return pac_ast_handle_unknown_item_type(builder, rule_name, variant_index, item_index);
}
pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_variant_s *variant, char *rule_name, usz_t variant_index)
@ -105,18 +115,18 @@ pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_varia
items_capacity *= 2;
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
}
pac_ast_status_e status = pac_build_ast_item(builder, &variant->items[variant->num_items]);
++variant->num_items;
pac_ast_status_e status =
pac_build_ast_item(builder, &variant->items[variant->num_items], rule_name, variant->num_items, variant->num_items);
if(status == PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER)
{
continue;
}
if(status != PAC_AST_STATUS_SUCCESS)
{
return status;
}
++variant->num_items;
// Test for the end of this variant (and possibly the rule it belongs to).
if(
(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
|| (CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
@ -124,11 +134,14 @@ pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_varia
return PAC_AST_STATUS_SUCCESS;
}
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA)
{
pac_ast_status_e comma_missing_status = pac_ast_handle_missing_item_delimiter(builder);
if(comma_missing_status != PAC_AST_STATUS_ERROR_HANDLED) return comma_missing_status;
// Test if there is no delimiter and, if three is none, handle the error.
if(
(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA)
&& (CURRENT_TOKEN.type != PAC_TOKEN_SIGN_HYPHEN)
) {
status = pac_ast_handle_missing_item_delimiter(builder, rule_name, variant_index, variant->num_items);
if(status != PAC_AST_STATUS_ERROR_HANDLED) return status;
continue;
}
SKIP_TOKEN;
}
@ -141,7 +154,12 @@ pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder)
{
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG)
{
// Invalid state
pac_internal_error_s error;
error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS;
error.specifics.invalid_function_entry.function = "__FILE__:pac_skip_ast_rule_header";
error.specifics.invalid_function_entry.error_description = "The function must be entered while at an opening tag token.";
pac_log_internal_error(builder->logger, error);
return PAC_AST_STATUS_NOT_RECOVERABLE;
}
SKIP_TOKEN;
@ -158,7 +176,7 @@ pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder)
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG)
{
if((status = pac_ast_handle_missing_rule_closing_sign(builder) != PAC_AST_STATUS_ERROR_HANDLED))
if((status = pac_ast_handle_missing_rule_header_closing_sign(builder) != PAC_AST_STATUS_ERROR_HANDLED))
return status;
}
SKIP_TOKEN;
@ -184,13 +202,11 @@ pac_ast_status_e pac_build_ast_rule(pac_ast_builder_s *builder, pac_ast_rule_s *
usz_t start_index = TOKEN_CURSOR;
pac_skip_ast_rule_header(builder);
// Parse all variants
usz_t len_name = TOKEN_AT(start_index+1).length;
usz_t name_offset = TOKEN_AT(start_index+1).offset;
char *name_pointer = &builder->token_list->source[name_offset];
rule->name = malloc(len_name + 1);
rule->name = pac_arena_alloc(&builder->string_arena, len_name + 1);
pac_memory_copy(rule->name, name_pointer, len_name);
rule->name[len_name] = 0;
@ -210,41 +226,39 @@ pac_ast_status_e pac_build_ast_rule(pac_ast_builder_s *builder, pac_ast_rule_s *
{
case PAC_AST_STATUS_SUCCESS:
break;
case PAC_AST_STATUS_NOT_RECOVERABLE:
return PAC_AST_STATUS_NOT_RECOVERABLE;
case PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE:
puts("Failed parsing a rule's variant, recovering at next rule!");
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
case PAC_AST_STATUS_UNEXPECTED_FILE_END:
return status;
case PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER:
{
builder->failed = TRUE;
printf("Failed parsing a rule's variant, ");
while(!END_REACHED)
{
// For continuing with the next variant of this rule.
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
{
puts("continuing with the next variant.");
break;
}
// For exiting this rule and continuing with a possible next one.
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
{
puts("continuing with next rule.");
SKIP_TOKEN;
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
}
SKIP_TOKEN;
}
} break;
} return PAC_AST_STATUS_UNEXPECTED_FILE_END;
case PAC_AST_STATUS_UNEXPECTED_FILE_END:
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
default:
printf("Internal Error: An invalid status code was produced: %d\n", status);
pac_internal_error_s error;
error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS;
error.specifics.invalid_returned_status.function = "__FILE__:pac_build_ast_rule";
error.specifics.invalid_returned_status.immediate_returner = "__FILE__:pac_build_ast_variant";
error.specifics.invalid_returned_status.value = pac_ast_stringify_status(status);
pac_log_internal_error(builder->logger, error);
return PAC_AST_STATUS_NOT_RECOVERABLE;
}
// It only goes here in the case of PAC_AST_STATUS_SUCCESS
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_SEMICOLON)
{
@ -261,7 +275,7 @@ pac_ast_status_e pac_build_ast_rule(pac_ast_builder_s *builder, pac_ast_rule_s *
return PAC_AST_STATUS_UNEXPECTED_FILE_END;
}
void pac_ast_find_next_rule(pac_ast_builder_s *builder)
void pac_ast_skip_to_next_rule(pac_ast_builder_s *builder)
{
while(!END_REACHED)
{
@ -275,7 +289,7 @@ void pac_ast_find_next_rule(pac_ast_builder_s *builder)
return;
}
pac_ast_s pac_build_ast(pac_tlist_s tokens)
pac_ast_s pac_build_ast(pac_tlist_s tokens, pac_logger_s *logger)
{
usz_t rules_capacity = 32;
pac_ast_s ast;
@ -284,9 +298,10 @@ pac_ast_s pac_build_ast(pac_tlist_s tokens)
pac_ast_builder_s builder;
builder.cursor = 0;
builder.logger = pac_create_logger();
builder.token_list = &tokens;
builder.failed = FALSE;
builder.string_arena = pac_create_arena(1024 * 256); // 262144 (2^18)
builder.logger = logger;
while(builder.cursor < tokens.num_tokens)
{
@ -299,20 +314,65 @@ pac_ast_s pac_build_ast(pac_tlist_s tokens)
pac_ast_status_e status = pac_build_ast_rule(&builder, &ast.rules[ast.num_rules]);
if(status == PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE)
pac_ast_find_next_rule(&builder);
pac_ast_skip_to_next_rule(&builder);
if(status == PAC_AST_STATUS_UNEXPECTED_FILE_END)
{
puts("File ended prematurely!");
break;
}
if(status != PAC_AST_STATUS_SUCCESS)
{
printf("Failed parsing a rule at index: %lu with status code: %d!\n", builder.cursor, status);
continue;
}
break;
++ast.num_rules;
}
ast.string_arena = builder.string_arena;
return ast;
}
void pac_delete_ast_variant(pac_ast_variant_s *variant)
{
free(variant->items);
}
void pac_delete_ast_rule(pac_ast_rule_s *rule)
{
usz_t variant_index = 0;
while(variant_index < rule->num_variants)
{
pac_delete_ast_variant(&rule->variants[variant_index]);
++variant_index;
}
free(rule->variants);
}
void pac_delete_ast(pac_ast_s ast)
{
usz_t rule_index = 0;
while(rule_index < ast.num_rules)
{
pac_delete_ast_rule(&ast.rules[rule_index]);
++rule_index;
}
free(ast.rules);
pac_delete_arena(ast.string_arena);
}
char * pac_ast_stringify_status(pac_ast_status_e status)
{
switch(status)
{
case PAC_AST_STATUS_SUCCESS: return "SUCCESS";
case PAC_AST_STATUS_ERROR_HANDLED: return "ERROR_HANDLED";
case PAC_AST_STATUS_STOP_ESCALATING_IN_ITEM_PARSER: return "ESCALATE_TO_ITEM_PARSER";
case PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER: return "ESCALATE_TO_VARIANT_PARSER";
case PAC_AST_STATUS_STOP_ESCALATING_IN_RULE_PARSER: return "ESCALATE_TO_RULE_PARSER";
case PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_VARIANT: return "FIND_NEXT_VARIANT";
case PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE: return "FIND_NEXT_RULE";
case PAC_AST_STATUS_NOT_RECOVERABLE: return "NOT_RECOVERABLE";
case PAC_AST_STATUS_UNEXPECTED_FILE_END: return "UNEXPECTED_END";
}
return " - ";
}

View File

@ -12,6 +12,15 @@
char * pac_identify_present_construct(pac_ast_builder_s *builder)
{
switch(CURRENT_TOKEN.type)
{
case PAC_TOKEN_SIGN_OPEN_TAG: return "RULE_IDENTIFIER";
default: return "UNKNOWN";
}
}
bool_t pac_ast_builder_forward_seek_token_type(pac_ast_builder_s *builder, pac_token_e type, usz_t max_steps)
{
usz_t cursor_backup = TOKEN_CURSOR;
@ -58,6 +67,8 @@ bool_t pac_ast_builder_forward_seek_item_start(pac_ast_builder_s *builder, usz_t
pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s *builder)
{
usz_t open_tag_src_offset = TOKEN_AT(TOKEN_CURSOR-1).offset;
@ -84,7 +95,7 @@ pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s *
char *given_rule_name = pac_log_alloc(&builder->logger, len_reference_name + 1);
pac_memory_copy(given_rule_name, &builder->token_list->source[open_tag_src_offset], len_reference_name);
pac_naming_error_s error;
error.type = PAC_NAMING_ERROR_INVALID_RULE_NAME;
error.type = PAC_NAMING_ERROR_INVALID_REFERENCE_NAME;
error.line = CURRENT_TOKEN.line;
error.column = CURRENT_TOKEN.column;
error.specifics.invalid_rule_name.given_rule_name = given_rule_name;
@ -94,22 +105,23 @@ pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s *
builder->failed = TRUE;
if(!pac_ast_builder_forward_seek_item_start(builder, 4))
return PAC_AST_STATUS_NOT_RECOVERABLE;
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
}
pac_ast_status_e pac_ast_handle_missing_reference_close_tag(pac_ast_builder_s *builder)
{
builder->failed = TRUE;
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_MISSING_TOKEN;
error.line = CURRENT_TOKEN.line;
error.column = CURRENT_TOKEN.column;
error.location.line = CURRENT_TOKEN.line;
error.location.column = CURRENT_TOKEN.column;
error.specifics.missing_token.hint = NULL;
error.specifics.missing_token.wanted_token = "Tag Closer (>)";
pac_log_syntax_error(&builder->logger, error);
builder->failed = TRUE;
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
}
@ -117,8 +129,8 @@ pac_ast_status_e pac_ast_handle_reference_with_equals_sign(pac_ast_builder_s *bu
{
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.line = CURRENT_TOKEN.line;
error.column = CURRENT_TOKEN.column;
error.location.line = CURRENT_TOKEN.line;
error.location.column = CURRENT_TOKEN.column;
error.specifics.odd_token.hint = "There might be a semicolon missing 4 tokens ago.";
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
@ -129,62 +141,117 @@ pac_ast_status_e pac_ast_handle_reference_with_equals_sign(pac_ast_builder_s *bu
}
pac_ast_status_e pac_ast_handle_unknown_item_type(pac_ast_builder_s *builder)
pac_ast_status_e pac_ast_handle_missing_item_delimiter(pac_ast_builder_s *builder, char *rule_name, usz_t variant_index, usz_t item_index)
{
builder->failed = TRUE;
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.line = CURRENT_TOKEN.line;
error.column = CURRENT_TOKEN.column;
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = "Any Rule Item";
error.location.line = CURRENT_TOKEN.line;
error.location.column = CURRENT_TOKEN.column;
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
error.specifics.odd_token.num_valid_options = 2;
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_COMMA);
error.specifics.odd_token.valid_options[1] = pac_stringify_token_type(PAC_TOKEN_SIGN_HYPHEN);
pac_log_syntax_error(&builder->logger, error);
SKIP_TOKEN;
if(!pac_ast_builder_forward_seek_item_start(builder, 3))
return PAC_AST_STATUS_NOT_RECOVERABLE;
builder->failed = TRUE;
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
}
pac_ast_status_e pac_ast_handle_missing_item_delimiter(pac_ast_builder_s *builder)
{
SKIP_TOKEN;
puts("Missing the item delimiter!");
builder->failed = TRUE;
return PAC_AST_STATUS_ERROR_HANDLED;
}
pac_ast_status_e pac_ast_handle_missing_equals_sign_after_rule_header(pac_ast_builder_s *builder)
{
puts("Missing Equals sign after rule header!");
builder->failed = TRUE;
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.location.line = CURRENT_TOKEN.line;
error.location.column = CURRENT_TOKEN.column;
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_EQUALS);
// Continue at first item (if possible)
if(!pac_ast_builder_forward_seek_item_start(builder, 2))
{
error.specifics.odd_token.hint = "Missing equals sign after rule header. Continuing at next rule.";
pac_log_syntax_error(&builder->logger, error);
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
}
error.specifics.odd_token.hint = "Missing equals sign after rule header. Continuing at next item.";
pac_log_syntax_error(&builder->logger, error);
return PAC_AST_STATUS_ERROR_HANDLED;
}
pac_ast_status_e pac_ast_handle_invalid_rule_name(pac_ast_builder_s *builder)
{
builder->failed = TRUE;
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.location.line = CURRENT_TOKEN.line;
error.location.column = CURRENT_TOKEN.column;
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_WORD);
if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 3))
{
error.specifics.odd_token.hint = "Invalid rule name. Continuing at next rule due to missing equals sign.";
pac_log_syntax_error(&builder->logger, error);
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
}
error.specifics.odd_token.hint = "Invalid rule name. Continuing after the equals sign.";
pac_log_syntax_error(&builder->logger, error);
return PAC_AST_STATUS_ERROR_HANDLED;
}
pac_ast_status_e pac_ast_handle_missing_rule_closing_sign(pac_ast_builder_s *builder)
pac_ast_status_e pac_ast_handle_missing_rule_header_closing_sign(pac_ast_builder_s *builder)
{
puts("Missing Tag closing sign!");
builder->failed = TRUE;
builder->failed = TRUE;
// Continue at equals sign (if possible)
if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 2))
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
// The next step in the calling function is to advance and check for the equals sign,
// thus, it is necessary to go one back for it to find it again when advancing.
REWIND_TOKEN;
return PAC_AST_STATUS_ERROR_HANDLED;
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.location.line = CURRENT_TOKEN.line;
error.location.column = CURRENT_TOKEN.column;
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_CLOSE_TAG);
// Continue at equals sign (if possible)
if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 3))
{
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
}
error.specifics.odd_token.hint = "Continuing parser process after the next equals sign.";
// The next step in the calling function is to advance and check for the equals sign,
// thus, it is necessary to go one back for it to find it again when advancing.
REWIND_TOKEN;
return PAC_AST_STATUS_ERROR_HANDLED;
}
pac_ast_status_e pac_ast_handle_unknown_item_type(pac_ast_builder_s *builder, char *rule_name, usz_t variant_index, usz_t item_index)
{
builder->failed = TRUE;
pac_syntax_error_s error;
error.type = PAC_SYNTAX_ERROR_ODD_TOKEN;
error.location.line = CURRENT_TOKEN.line;
error.location.column = CURRENT_TOKEN.column;
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
error.specifics.odd_token.num_valid_options = 1;
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_CLOSE_TAG);
if(!pac_ast_builder_forward_seek_item_start(builder, 5))
{
if(pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_VERTICAL_BAR, 5))
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_VARIANT;
return PAC_AST_STATUS_CONTINUE_AFTER_FINDING_NEXT_RULE;
}
return PAC_AST_STATUS_STOP_ESCALATING_IN_VARIANT_PARSER;
}

View File

@ -4,14 +4,21 @@
#include <string.h>
extern pac_grammar_s pac_link_grammar(pac_ast_s ast);
extern void pac_link_grammar(pac_grammar_s *grammar, pac_ast_s ast);
pac_grammar_s pac_convert_grammar(char *source)
pac_grammar_s pac_convert_grammar(char *source, usz_t len_source)
{
usz_t len_source = strlen(source);
pac_logger_s logger = pac_create_logger();
pac_grammar_s grammar;
grammar.result_arena = pac_create_arena(1024 * 1024);
pac_tlist_s tokens = pac_tokenize_grammar(source, len_source);
pac_ast_s ast = pac_build_ast(tokens);
pac_grammar_s grammar = pac_link_grammar(ast);
pac_ast_s ast = pac_build_ast(tokens, &logger);
grammar.log = logger;
pac_link_grammar(&grammar, ast);
pac_delete_ast(ast);
pac_delete_token_list(tokens);
return grammar;
}
@ -23,5 +30,6 @@ void pac_display_grammar(pac_grammar_s grammar)
void pac_delete_grammar(pac_grammar_s grammar)
{
pac_delete_arena(grammar.result_arena);
pac_delete_logger(grammar.log);
}

View File

@ -14,7 +14,7 @@ void pac_copy_empty_rules_from_ast_to_grammar(pac_grammar_s *grammar, pac_ast_s
pac_ast_rule_s ast_rule = ast->rules[rule_index];
pac_rule_s *rule = &grammar->rules[rule_index];
usz_t len_rule_name = strlen(ast_rule.name);
rule->name = malloc(len_rule_name + 1);
rule->name = pac_arena_alloc(&grammar->result_arena, len_rule_name + 1);
pac_memory_copy(rule->name, ast_rule.name, len_rule_name);
rule->name[len_rule_name] = 0x00;
}
@ -46,7 +46,7 @@ pac_rule_s * pac_find_rule(pac_grammar_s *grammar, char *name)
void pac_copy_single_variant(pac_grammar_s *grammar, pac_variant_s *variant, pac_ast_variant_s *ast_variant)
{
variant->num_items = ast_variant->num_items;
variant->items = malloc(sizeof(pac_item_s) * variant->num_items);
variant->items = pac_arena_alloc(&grammar->result_arena, sizeof(pac_item_s) * variant->num_items);
for(usz_t item_index = 0; item_index < variant->num_items; ++item_index)
{
pac_ast_item_s ast_item = ast_variant->items[item_index];
@ -61,8 +61,8 @@ void pac_copy_single_variant(pac_grammar_s *grammar, pac_variant_s *variant, pac
case PAC_AST_ITEM_LITERAL:
{
item->type = PAC_ITEM_LITERAL;
item->data.literal.length = ast_item.data.literal.length;
item->data.literal.string = ast_item.data.literal.string; // TODO: Copy this into a grammar-owned arena!
item->data.literal.length = ast_item.data.string_literal.length;
item->data.literal.string = ast_item.data.string_literal.string; // TODO: Copy this into a grammar-owned arena!
} break;
case PAC_AST_ITEM_REFERENCE:
@ -88,23 +88,21 @@ void pac_copy_variants_and_link_references(pac_grammar_s *grammar, pac_ast_s *as
pac_ast_rule_s ast_rule = ast->rules[rule_index];
pac_rule_s *rule = &grammar->rules[rule_index];
rule->num_variants = ast_rule.num_variants;
rule->variants = malloc(sizeof(pac_variant_s) * rule->num_variants);
rule->variants = pac_arena_alloc(&grammar->result_arena, sizeof(pac_variant_s) * rule->num_variants);
for(usz_t variant_index = 0; variant_index < rule->num_variants; ++variant_index)
{
pac_copy_single_variant(grammar, &rule->variants[variant_index], &ast_rule.variants[variant_index]);
}
}
}
pac_grammar_s pac_link_grammar(pac_ast_s ast)
void pac_link_grammar(pac_grammar_s *grammar, pac_ast_s ast)
{
pac_grammar_s grammar;
grammar.num_rules = ast.num_rules;
grammar.rules = malloc(sizeof(pac_rule_s) * ast.num_rules);
grammar->num_rules = ast.num_rules;
grammar->rules = pac_arena_alloc(&grammar->result_arena, sizeof(pac_rule_s) * ast.num_rules);
pac_copy_empty_rules_from_ast_to_grammar(&grammar, &ast);
pac_copy_variants_and_link_references(&grammar, &ast);
pac_copy_empty_rules_from_ast_to_grammar(grammar, &ast);
pac_copy_variants_and_link_references(grammar, &ast);
return grammar;
}

View File

@ -13,6 +13,12 @@ pac_logger_s pac_create_logger()
return logger;
}
void pac_delete_logger(pac_logger_s logger)
{
free(logger.errors);
pac_delete_arena(logger.string_arena);
}
void pac_resize_log_if_needed(pac_logger_s *logger)
{
if(logger->num_errors >= logger->allocated_errors)
@ -26,16 +32,32 @@ void pac_resize_log_if_needed(pac_logger_s *logger)
void pac_log_syntax_error(pac_logger_s *logger, pac_syntax_error_s error)
{
pac_resize_log_if_needed(logger);
logger->errors[logger->num_errors].type = PAC_SYNTAX_ERROR;
logger->errors[logger->num_errors].specifics.syntax_error = error;
logger->errors[logger->num_errors].type = PAC_SYNTAX_ERROR;
logger->errors[logger->num_errors].specifics.syntax_error = error;
++logger->num_errors;
}
void pac_log_naming_error(pac_logger_s *logger, pac_naming_error_s error)
{
pac_resize_log_if_needed(logger);
logger->errors[logger->num_errors].type = PAC_NAMING_ERROR;
logger->errors[logger->num_errors].specifics.naming_error = error;
logger->errors[logger->num_errors].type = PAC_NAMING_ERROR;
logger->errors[logger->num_errors].specifics.naming_error = error;
++logger->num_errors;
}
void pac_log_validation_error(pac_logger_s *logger, pac_validation_error_s error)
{
pac_resize_log_if_needed(logger);
logger->errors[logger->num_errors].type = PAC_VALIDATION_ERROR;
logger->errors[logger->num_errors].specifics.validation_error = error;
++logger->num_errors;
}
void pac_log_internal_error(pac_logger_s *logger, pac_internal_error_s error)
{
pac_resize_log_if_needed(logger);
logger->errors[logger->num_errors].type = PAC_INTERNAL_ERROR;
logger->errors[logger->num_errors].specifics.internal_error = error;
++logger->num_errors;
}

View File

@ -48,9 +48,11 @@ pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
usz_t tokens_capacity = 1024;
pac_tlist_s list;
list.source = source;
list.num_tokens = 0;
list.tokens = calloc(sizeof(pac_token_s), tokens_capacity);
list.source = malloc(len_source + 1);
pac_memory_copy(list.source, source, len_source);
list.source[len_source] = 0;
usz_t line = 1;
usz_t column = 1;
@ -257,4 +259,11 @@ void pac_display_tlist(pac_tlist_s list)
content
);
}
}
}
void pac_delete_token_list(pac_tlist_s list)
{
free(list.tokens);
free(list.source);
pac_memory_fill(&list, sizeof(pac_tlist_s), 0);
}