Enhanced Tokenizer to support more special signs
This commit is contained in:
parent
e70e9c5cef
commit
c5e21b4dc8
|
@ -1,4 +1,4 @@
|
||||||
#include <parcel.h>
|
#include <grammar.h>
|
||||||
#include <ast.h>
|
#include <ast.h>
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include <parcel.h>
|
#include <grammar.h>
|
||||||
#include <tokenizer.h>
|
#include <tokenizer.h>
|
||||||
#include <ast.h>
|
#include <ast.h>
|
||||||
|
|
||||||
|
|
|
@ -21,15 +21,23 @@ typedef enum
|
||||||
PAC_TOKEN_LIT_RUNE, // TODO
|
PAC_TOKEN_LIT_RUNE, // TODO
|
||||||
PAC_TOKEN_LIT_INTEGER, // TODO
|
PAC_TOKEN_LIT_INTEGER, // TODO
|
||||||
|
|
||||||
PAC_TOKEN_SIGN_OPEN_TAG,
|
PAC_TOKEN_SIGN_OPENING_TAG,
|
||||||
PAC_TOKEN_SIGN_CLOSE_TAG,
|
PAC_TOKEN_SIGN_CLOSING_TAG,
|
||||||
|
PAC_TOKEN_SIGN_OPENING_BRACKET,
|
||||||
|
PAC_TOKEN_SIGN_CLOSING_BRACKET,
|
||||||
PAC_TOKEN_SIGN_EQUALS,
|
PAC_TOKEN_SIGN_EQUALS,
|
||||||
|
PAC_TOKEN_SIGN_SEMICOLON,
|
||||||
PAC_TOKEN_SIGN_COLON,
|
PAC_TOKEN_SIGN_COLON,
|
||||||
PAC_TOKEN_SIGN_COMMA,
|
PAC_TOKEN_SIGN_COMMA,
|
||||||
PAC_TOKEN_SIGN_HYPHEN,
|
|
||||||
PAC_TOKEN_SIGN_UNDERSCORE,
|
PAC_TOKEN_SIGN_UNDERSCORE,
|
||||||
PAC_TOKEN_SIGN_VERTICAL_BAR,
|
PAC_TOKEN_SIGN_VERTICAL_BAR,
|
||||||
PAC_TOKEN_SIGN_SEMICOLON
|
PAC_TOKEN_SIGN_AMPERSAND,
|
||||||
|
PAC_TOKEN_SIGN_DOLLAR,
|
||||||
|
PAC_TOKEN_SIGN_SLASH,
|
||||||
|
PAC_TOKEN_SIGN_HASH,
|
||||||
|
PAC_TOKEN_SIGN_AT,
|
||||||
|
PAC_TOKEN_SIGN_PLUS,
|
||||||
|
PAC_TOKEN_SIGN_MINUS,
|
||||||
|
|
||||||
} pac_token_e;
|
} pac_token_e;
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,7 @@ pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_ref
|
||||||
{
|
{
|
||||||
usz_t start_cursor = TOKEN_CURSOR;
|
usz_t start_cursor = TOKEN_CURSOR;
|
||||||
|
|
||||||
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG)
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPENING_TAG)
|
||||||
{
|
{
|
||||||
pac_internal_error_s error;
|
pac_internal_error_s error;
|
||||||
error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS;
|
error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS;
|
||||||
|
@ -35,7 +35,7 @@ pac_ast_status_e pac_build_ast_reference(pac_ast_builder_s *builder, pac_ast_ref
|
||||||
}
|
}
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
|
|
||||||
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG)
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSING_TAG)
|
||||||
{
|
{
|
||||||
return pac_ast_handle_missing_reference_close_tag(builder);
|
return pac_ast_handle_missing_reference_close_tag(builder);
|
||||||
}
|
}
|
||||||
|
@ -78,7 +78,7 @@ pac_ast_status_e pac_build_ast_item(pac_ast_builder_s *builder, pac_ast_item_s *
|
||||||
return pac_build_ast_string_literal(builder, &item->data.string_literal);
|
return pac_build_ast_string_literal(builder, &item->data.string_literal);
|
||||||
}
|
}
|
||||||
|
|
||||||
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPEN_TAG)
|
if(CURRENT_TOKEN.type == PAC_TOKEN_SIGN_OPENING_TAG)
|
||||||
{
|
{
|
||||||
item->type = PAC_AST_ITEM_REFERENCE;
|
item->type = PAC_AST_ITEM_REFERENCE;
|
||||||
return pac_build_ast_reference(builder, &item->data.reference);
|
return pac_build_ast_reference(builder, &item->data.reference);
|
||||||
|
@ -137,7 +137,7 @@ pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_varia
|
||||||
// Test if there is no separator and, if there is none, handle the error.
|
// Test if there is no separator and, if there is none, handle the error.
|
||||||
if(
|
if(
|
||||||
(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA)
|
(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_COMMA)
|
||||||
&& (CURRENT_TOKEN.type != PAC_TOKEN_SIGN_HYPHEN)
|
&& (CURRENT_TOKEN.type != PAC_TOKEN_SIGN_MINUS)
|
||||||
) {
|
) {
|
||||||
status = pac_ast_handle_missing_item_separator(builder, rule_name, variant_index, variant->num_items);
|
status = pac_ast_handle_missing_item_separator(builder, rule_name, variant_index, variant->num_items);
|
||||||
if(status != PAC_AST_STATUS_ERROR_HANDLED) return status;
|
if(status != PAC_AST_STATUS_ERROR_HANDLED) return status;
|
||||||
|
@ -152,7 +152,7 @@ pac_ast_status_e pac_build_ast_variant(pac_ast_builder_s *builder, pac_ast_varia
|
||||||
|
|
||||||
pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder)
|
pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder)
|
||||||
{
|
{
|
||||||
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPEN_TAG)
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_OPENING_TAG)
|
||||||
{
|
{
|
||||||
pac_internal_error_s error;
|
pac_internal_error_s error;
|
||||||
error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS;
|
error.type = PAC_INTERNAL_ERROR_INVALID_RETURNED_STATUS;
|
||||||
|
@ -174,7 +174,7 @@ pac_ast_status_e pac_skip_ast_rule_header(pac_ast_builder_s *builder)
|
||||||
}
|
}
|
||||||
SKIP_TOKEN;
|
SKIP_TOKEN;
|
||||||
|
|
||||||
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSE_TAG)
|
if(CURRENT_TOKEN.type != PAC_TOKEN_SIGN_CLOSING_TAG)
|
||||||
{
|
{
|
||||||
if((status = pac_ast_handle_missing_rule_header_closing_sign(builder) != PAC_AST_STATUS_ERROR_HANDLED))
|
if((status = pac_ast_handle_missing_rule_header_closing_sign(builder) != PAC_AST_STATUS_ERROR_HANDLED))
|
||||||
return status;
|
return status;
|
||||||
|
|
|
@ -16,7 +16,7 @@ char * pac_identify_present_construct(pac_ast_builder_s *builder)
|
||||||
{
|
{
|
||||||
switch(CURRENT_TOKEN.type)
|
switch(CURRENT_TOKEN.type)
|
||||||
{
|
{
|
||||||
case PAC_TOKEN_SIGN_OPEN_TAG: return "RULE_IDENTIFIER";
|
case PAC_TOKEN_SIGN_OPENING_TAG: return "RULE_IDENTIFIER";
|
||||||
default: return "UNKNOWN";
|
default: return "UNKNOWN";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -42,7 +42,7 @@ bool_t pac_ast_builder_is_at_item_start(pac_ast_builder_s *builder)
|
||||||
switch(CURRENT_TOKEN.type)
|
switch(CURRENT_TOKEN.type)
|
||||||
{
|
{
|
||||||
case PAC_TOKEN_LIT_STRING:
|
case PAC_TOKEN_LIT_STRING:
|
||||||
case PAC_TOKEN_SIGN_OPEN_TAG:
|
case PAC_TOKEN_SIGN_OPENING_TAG:
|
||||||
case PAC_TOKEN_KEYWORD_WORD:
|
case PAC_TOKEN_KEYWORD_WORD:
|
||||||
case PAC_TOKEN_KEYWORD_INTEGER:
|
case PAC_TOKEN_KEYWORD_INTEGER:
|
||||||
return TRUE;
|
return TRUE;
|
||||||
|
@ -78,7 +78,7 @@ pac_ast_status_e pac_ast_handle_invalid_reference_name_token(pac_ast_builder_s *
|
||||||
usz_t tried_tokens = 0;
|
usz_t tried_tokens = 0;
|
||||||
while(tried_tokens < 3)
|
while(tried_tokens < 3)
|
||||||
{
|
{
|
||||||
if(TOKEN_AT(TOKEN_CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSE_TAG)
|
if(TOKEN_AT(TOKEN_CURSOR + tried_tokens).type == PAC_TOKEN_SIGN_CLOSING_TAG)
|
||||||
{
|
{
|
||||||
len_reference_name = TOKEN_AT(TOKEN_CURSOR + tried_tokens).offset - open_tag_src_offset;
|
len_reference_name = TOKEN_AT(TOKEN_CURSOR + tried_tokens).offset - open_tag_src_offset;
|
||||||
++len_reference_name; // Take the closing tag into the name
|
++len_reference_name; // Take the closing tag into the name
|
||||||
|
@ -152,7 +152,7 @@ pac_ast_status_e pac_ast_handle_missing_item_separator(pac_ast_builder_s *builde
|
||||||
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
|
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
|
||||||
error.specifics.odd_token.num_valid_options = 2;
|
error.specifics.odd_token.num_valid_options = 2;
|
||||||
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_COMMA);
|
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_COMMA);
|
||||||
error.specifics.odd_token.valid_options[1] = pac_stringify_token_type(PAC_TOKEN_SIGN_HYPHEN);
|
error.specifics.odd_token.valid_options[1] = pac_stringify_token_type(PAC_TOKEN_SIGN_MINUS);
|
||||||
pac_log_syntax_error(builder->logger, error);
|
pac_log_syntax_error(builder->logger, error);
|
||||||
|
|
||||||
return PAC_AST_STATUS_ERROR_HANDLED;
|
return PAC_AST_STATUS_ERROR_HANDLED;
|
||||||
|
@ -219,7 +219,7 @@ pac_ast_status_e pac_ast_handle_missing_rule_header_closing_sign(pac_ast_builder
|
||||||
error.location.column = CURRENT_TOKEN.column;
|
error.location.column = CURRENT_TOKEN.column;
|
||||||
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
|
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
|
||||||
error.specifics.odd_token.num_valid_options = 1;
|
error.specifics.odd_token.num_valid_options = 1;
|
||||||
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_CLOSE_TAG);
|
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_CLOSING_TAG);
|
||||||
|
|
||||||
// Continue at equals sign (if possible)
|
// Continue at equals sign (if possible)
|
||||||
if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 3))
|
if(!pac_ast_builder_forward_seek_token_type(builder, PAC_TOKEN_SIGN_EQUALS, 3))
|
||||||
|
@ -244,7 +244,7 @@ pac_ast_status_e pac_ast_handle_unknown_item_type(pac_ast_builder_s *builder, ch
|
||||||
error.location.column = CURRENT_TOKEN.column;
|
error.location.column = CURRENT_TOKEN.column;
|
||||||
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
|
error.specifics.odd_token.present_token = pac_stringify_token_type(CURRENT_TOKEN.type);
|
||||||
error.specifics.odd_token.num_valid_options = 1;
|
error.specifics.odd_token.num_valid_options = 1;
|
||||||
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_CLOSE_TAG);
|
error.specifics.odd_token.valid_options[0] = pac_stringify_token_type(PAC_TOKEN_SIGN_CLOSING_TAG);
|
||||||
|
|
||||||
if(!pac_ast_builder_forward_seek_item_start(builder, 5))
|
if(!pac_ast_builder_forward_seek_item_start(builder, 5))
|
||||||
{
|
{
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
#include <grammar_parser.h>
|
#include <grammar.h>
|
||||||
#include <ast.h>
|
#include <ast.h>
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -32,17 +32,6 @@ pac_set_e pac_convert_ast_set_to_grammar_set(pac_ast_set_e set)
|
||||||
return PAC_SET_INVALID;
|
return PAC_SET_INVALID;
|
||||||
}
|
}
|
||||||
|
|
||||||
pac_rule_s * pac_find_rule(pac_grammar_s *grammar, char *name)
|
|
||||||
{
|
|
||||||
for(usz_t index = 0; index < grammar->num_rules; ++index)
|
|
||||||
{
|
|
||||||
if(!strcmp(grammar->rules[index].name, name))
|
|
||||||
return &grammar->rules[index];
|
|
||||||
}
|
|
||||||
printf("Couldn't find refernced rule: %s\n", name);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
void pac_copy_single_variant(pac_grammar_s *grammar, pac_variant_s *variant, pac_ast_variant_s *ast_variant)
|
void pac_copy_single_variant(pac_grammar_s *grammar, pac_variant_s *variant, pac_ast_variant_s *ast_variant)
|
||||||
{
|
{
|
||||||
variant->num_items = ast_variant->num_items;
|
variant->num_items = ast_variant->num_items;
|
||||||
|
|
|
@ -30,15 +30,23 @@ pac_token_e pac_convert_sign_to_token_type(rune_t sign)
|
||||||
{
|
{
|
||||||
switch(sign)
|
switch(sign)
|
||||||
{
|
{
|
||||||
case '<': return PAC_TOKEN_SIGN_OPEN_TAG;
|
case '<': return PAC_TOKEN_SIGN_OPENING_TAG;
|
||||||
case '>': return PAC_TOKEN_SIGN_CLOSE_TAG;
|
case '>': return PAC_TOKEN_SIGN_CLOSING_TAG;
|
||||||
|
case '[': return PAC_TOKEN_SIGN_OPENING_BRACKET;
|
||||||
|
case ']': return PAC_TOKEN_SIGN_CLOSING_BRACKET;
|
||||||
case '=': return PAC_TOKEN_SIGN_EQUALS;
|
case '=': return PAC_TOKEN_SIGN_EQUALS;
|
||||||
|
case ';': return PAC_TOKEN_SIGN_SEMICOLON;
|
||||||
case ':': return PAC_TOKEN_SIGN_COLON;
|
case ':': return PAC_TOKEN_SIGN_COLON;
|
||||||
case ',': return PAC_TOKEN_SIGN_COMMA;
|
case ',': return PAC_TOKEN_SIGN_COMMA;
|
||||||
case '-': return PAC_TOKEN_SIGN_HYPHEN;
|
|
||||||
case '_': return PAC_TOKEN_SIGN_UNDERSCORE;
|
case '_': return PAC_TOKEN_SIGN_UNDERSCORE;
|
||||||
case '|': return PAC_TOKEN_SIGN_VERTICAL_BAR;
|
case '|': return PAC_TOKEN_SIGN_VERTICAL_BAR;
|
||||||
case ';': return PAC_TOKEN_SIGN_SEMICOLON;
|
case '&': return PAC_TOKEN_SIGN_AMPERSAND;
|
||||||
|
case '$': return PAC_TOKEN_SIGN_DOLLAR;
|
||||||
|
case '/': return PAC_TOKEN_SIGN_SLASH;
|
||||||
|
case '#': return PAC_TOKEN_SIGN_HASH;
|
||||||
|
case '@': return PAC_TOKEN_SIGN_AT;
|
||||||
|
case '+': return PAC_TOKEN_SIGN_PLUS;
|
||||||
|
case '-': return PAC_TOKEN_SIGN_MINUS;
|
||||||
}
|
}
|
||||||
return PAC_TOKEN_STRAY;
|
return PAC_TOKEN_STRAY;
|
||||||
}
|
}
|
||||||
|
@ -204,28 +212,36 @@ char * pac_stringify_token_type(pac_token_e type)
|
||||||
{
|
{
|
||||||
switch(type)
|
switch(type)
|
||||||
{
|
{
|
||||||
case PAC_TOKEN_STRAY: return " - ";
|
case PAC_TOKEN_STRAY: return "STRAY";
|
||||||
|
|
||||||
case PAC_TOKEN_WORD: return "Word";
|
case PAC_TOKEN_WORD: return "Word";
|
||||||
case PAC_TOKEN_KEYWORD_TRUE: return "Keyword: \"true\"";
|
case PAC_TOKEN_KEYWORD_TRUE: return "Keyword: \"true\"";
|
||||||
case PAC_TOKEN_KEYWORD_FALSE: return "Keyword: \"false\"";
|
case PAC_TOKEN_KEYWORD_FALSE: return "Keyword: \"false\"";
|
||||||
case PAC_TOKEN_KEYWORD_ALPHA: return "Keyword: \"alpha\"";
|
case PAC_TOKEN_KEYWORD_ALPHA: return "Keyword: \"alpha\"";
|
||||||
case PAC_TOKEN_KEYWORD_WORD: return "Keyword: \"word\"";
|
case PAC_TOKEN_KEYWORD_WORD: return "Keyword: \"word\"";
|
||||||
case PAC_TOKEN_KEYWORD_INTEGER: return "Keyword: \"integer\"";
|
case PAC_TOKEN_KEYWORD_INTEGER: return "Keyword: \"integer\"";
|
||||||
|
|
||||||
case PAC_TOKEN_SIGN_OPEN_TAG: return "Sign: <";
|
case PAC_TOKEN_SIGN_OPENING_TAG: return "Sign: <";
|
||||||
case PAC_TOKEN_SIGN_CLOSE_TAG: return "Sign: >";
|
case PAC_TOKEN_SIGN_CLOSING_TAG: return "Sign: >";
|
||||||
case PAC_TOKEN_SIGN_EQUALS: return "Sign: =";
|
case PAC_TOKEN_SIGN_OPENING_BRACKET: return "Sign: [";
|
||||||
case PAC_TOKEN_SIGN_COLON: return "Sign: :";
|
case PAC_TOKEN_SIGN_CLOSING_BRACKET: return "Sign: ]";
|
||||||
case PAC_TOKEN_SIGN_COMMA: return "Sign: ,";
|
case PAC_TOKEN_SIGN_EQUALS: return "Sign: =";
|
||||||
case PAC_TOKEN_SIGN_HYPHEN: return "Sign: -";
|
case PAC_TOKEN_SIGN_SEMICOLON: return "Sign: ;";
|
||||||
case PAC_TOKEN_SIGN_UNDERSCORE: return "Sign: _";
|
case PAC_TOKEN_SIGN_COLON: return "Sign: :";
|
||||||
case PAC_TOKEN_SIGN_VERTICAL_BAR: return "Sign: |";
|
case PAC_TOKEN_SIGN_COMMA: return "Sign: ,";
|
||||||
case PAC_TOKEN_SIGN_SEMICOLON: return "Sign: ;";
|
case PAC_TOKEN_SIGN_UNDERSCORE: return "Sign: _";
|
||||||
|
case PAC_TOKEN_SIGN_VERTICAL_BAR: return "Sign: |";
|
||||||
case PAC_TOKEN_LIT_STRING: return "String";
|
case PAC_TOKEN_SIGN_AMPERSAND: return "Sign: &";
|
||||||
case PAC_TOKEN_LIT_RUNE: return "Rune";
|
case PAC_TOKEN_SIGN_DOLLAR: return "Sign: $";
|
||||||
case PAC_TOKEN_LIT_INTEGER: return "Integer";
|
case PAC_TOKEN_SIGN_SLASH: return "Sign: /";
|
||||||
|
case PAC_TOKEN_SIGN_HASH: return "Sign: #";
|
||||||
|
case PAC_TOKEN_SIGN_AT: return "Sign: @";
|
||||||
|
case PAC_TOKEN_SIGN_PLUS: return "Sign: +";
|
||||||
|
case PAC_TOKEN_SIGN_MINUS: return "Sign: -";
|
||||||
|
|
||||||
|
case PAC_TOKEN_LIT_STRING: return "String";
|
||||||
|
case PAC_TOKEN_LIT_RUNE: return "Rune";
|
||||||
|
case PAC_TOKEN_LIT_INTEGER: return "Integer";
|
||||||
}
|
}
|
||||||
return "Invalid";
|
return "Invalid";
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue