diff --git a/.gitignore b/.gitignore index 550d80b..9ce33f8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,122 +1,12 @@ -# ---> C -# Prerequisites -*.d -# Object files -*.o -*.ko -*.obj +# Binary Files on *nix systems *.elf - -# Linker output -*.ilk -*.map -*.exp - -# Precompiled Headers -*.gch -*.pch - -# Libraries -*.lib -*.a -*.la -*.lo - -# Shared objects (inc. Windows DLLs) -*.dll -*.so -*.so.* -*.dylib - -# Executables -*.exe -*.out -*.app -*.i*86 -*.x86_64 -*.hex - -# Debug files -*.dSYM/ -*.su -*.idb -*.pdb - -# Kernel Module Compile Results -*.mod* -*.cmd -.tmp_versions/ -modules.order -Module.symvers -Mkfile.old -dkms.conf - -# ---> C++ -# Prerequisites -*.d - -# Compiled Object files -*.slo -*.lo *.o -*.obj - -# Precompiled Headers -*.gch -*.pch - -# Compiled Dynamic libraries -*.so -*.dylib -*.dll - -# Fortran module files -*.mod -*.smod - -# Compiled Static libraries -*.lai -*.la *.a -*.lib +*.so -# Executables -*.exe -*.out -*.app - -# ---> Archives -# It's better to unpack these files and commit the raw source because -# git has its own built in compression methods. -*.7z -*.jar -*.rar -*.zip -*.gz -*.gzip -*.tgz -*.bzip -*.bzip2 -*.bz2 -*.xz -*.lzma -*.cab -*.xar - -# Packing-only formats -*.iso -*.tar - -# Package management formats -*.dmg -*.xpi -*.gem -*.egg -*.deb -*.rpm -*.msi -*.msm -*.msp -*.txz +# Stuff for Microsoft Visual Studio Code +*.vscode/ +# Stuff for GNU nano +*.swp \ No newline at end of file diff --git a/build.bash b/build.bash new file mode 100755 index 0000000..e23a901 --- /dev/null +++ b/build.bash @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +gcc -o carrot.elf src/*.c -I inc/ + diff --git a/inc/defgen.h b/inc/defgen.h new file mode 100644 index 0000000..b2c8cd9 --- /dev/null +++ b/inc/defgen.h @@ -0,0 +1,91 @@ + +#ifndef CARROT_DEFINITION_GENERATOR_H +#define CARROT_DEFINITION_GENERATOR_H + +#include +#include +#include +#include +#include + +#include + +typedef struct carrot_definition carrot_definition_s; +typedef struct carrot_rule carrot_rule_s; +typedef struct carrot_compound carrot_compound_s; +typedef struct carrot_element carrot_element_s; +typedef struct carrot_end_node carrot_end_node_s; + +struct carrot_definition +{ + uint32_t num_rules; + carrot_rule_s *rules; +}; + +struct carrot_rule +{ + uint32_t len_name; + char *name; + + uint32_t num_elements; + carrot_element_s *elements; +}; + +struct carrot_compound +{ + carrot_rule_s *up; + + uint32_t num_rules; + carrot_rule_s *rules; +}; + +typedef enum +{ + CARROT_ELEMENT_RULE_INSERTION, + CARROT_ELEMENT_PATTERN, + CARROT_ELEMENT_RAW_LITERAL + +} carrot_element_e; + +typedef enum +{ + CARROT_PATTERN_INVALID, + CARROT_PATTERN_WORD, // Keyword: "|word|" + CARROT_PATTERN_KEYWORD, // Keyword: "|keyword|" (belonging to a list of user-defined keywords + CARROT_PATTERN_NUMBER // Keyword: "|number|" + +} carrot_pattern_e; + +struct carrot_element +{ + // char *text; + + carrot_element_e type; + union carrot_specific_literal + { + struct carrot_tag_name + { + uint32_t len_name; + char *name; + + } nonterminal; + + struct carrot_pattern + { + carrot_pattern_e value; + + } pattern; + + struct carrot_raw_literal + { + uint32_t num_bytes; + char *bytes; + } raw_literal; + + } specific; +}; + +carrot_definition_s carrot_parse_definition (carrot_token_stream_s *stream); + +#endif // CARROT_DEFINITION_GENERATOR_H + diff --git a/inc/print_def.h b/inc/print_def.h new file mode 100644 index 0000000..878bce5 --- /dev/null +++ b/inc/print_def.h @@ -0,0 +1,11 @@ + +#ifndef CARROT_PRINT_DEFINITION_H +#define CARROT_PRINT_DEFINITION_H + +#include + +void carrot_print_definition (carrot_definition_s *definition); +void carrot_print_element (carrot_element_s *element); +void carrot_print_rule (carrot_rule_s *rule); + +#endif diff --git a/inc/tokenizer.h b/inc/tokenizer.h new file mode 100644 index 0000000..8336a10 --- /dev/null +++ b/inc/tokenizer.h @@ -0,0 +1,62 @@ + +#ifndef CARROT_TOKENIZER_H +#define CARROT_TOKENIZER_H + +#include +#include +#include +#include +#include + +typedef enum +{ + CARROT_TOKEN_WORD, + CARROT_TOKEN_SPECIAL, + CARROT_TOKEN_SPECIAL_POINT, + CARROT_TOKEN_SPECIAL_COMMA, + CARROT_TOKEN_SPECIAL_COLON, + CARROT_TOKEN_SPECIAL_SEMICOLON, + CARROT_TOKEN_SPECIAL_EQUALS_SIGN, + CARROT_TOKEN_SPECIAL_AMPERSAND, + CARROT_TOKEN_SPECIAL_VERTICAL_BAR, + CARROT_TOKEN_SPECIAL_OPENING_ARROW, + CARROT_TOKEN_SPECIAL_CLOSING_ARROW, + CARROT_TOKEN_SPECIAL_OPENING_CURLY_BRACE, + CARROT_TOKEN_SPECIAL_CLOSING_CURLY_BRACE, + CARROT_TOKEN_SPECIAL_OPENING_PARENTHESIS, + CARROT_TOKEN_SPECIAL_CLOSING_PARENTHESIS, + CARROT_TOKEN_SPECIAL_OPENING_SQUARE_BRACKET, + CARROT_TOKEN_SPECIAL_CLOSING_SQUARE_BRACKET, + CARROT_TOKEN_LITERAL_NUMERIC, + CARROT_TOKEN_LITERAL_STRING, + CARROT_TOKEN_LITERAL_CHARACTER, + CARROT_TOKEN_STREAM_END + +} carrot_token_e; + +typedef struct +{ + carrot_token_e type; + uint32_t length; + uint32_t char_index; + + uint32_t line_index; + uint32_t column_index; + +} carrot_token_s; + +typedef struct +{ + uint32_t num_tokens; + carrot_token_s *tokens; + + uint32_t len_source; + char *source; + +} carrot_token_stream_s; + +carrot_token_stream_s carrot_tokenize (char *source, uint32_t len_source); +void carrot_print_tokens (carrot_token_stream_s *stream); + +#endif + diff --git a/src/defgen.c b/src/defgen.c new file mode 100644 index 0000000..75c8aba --- /dev/null +++ b/src/defgen.c @@ -0,0 +1,239 @@ +#include + +#define TOKEN_AT(index) (stream->tokens[(index)]) +#define TOKEN_TEXT_AT(index) (&stream->source[stream->tokens[(index)].char_index]) + +/* +bool carrot_token_text_equals(carrot_token_stream_s *stream, int index, char *text) +{ + +} +*/ + +// int32_t carrot_parse_compound + +bool carrot_tag_name_starts_here(carrot_token_stream_s *stream, uint32_t index) +{ + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_OPENING_ARROW) return false; + ++index; // Skip the opening arrow in front of the rule-name + + if(TOKEN_AT(index).type != CARROT_TOKEN_WORD) return false; + ++index; // Skip the name of the rule + + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_CLOSING_ARROW) return false; + ++index; // Skip the opening arrow in front of the rule-name + + return true; +} + +bool carrot_pattern_starts_here(carrot_token_stream_s *stream, uint32_t index) +{ + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_VERTICAL_BAR) + { + return false; + } + + ++index; + + if(TOKEN_AT(index).type != CARROT_TOKEN_WORD) + { + return false; + } + + ++index; + + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_VERTICAL_BAR) + { + return false; + } + + return true; +} + +int32_t carrot_parse_tag_name(carrot_token_stream_s *stream, uint32_t index, carrot_element_s *out_element) +{ + + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_OPENING_ARROW) + { + return -1; + } + + ++index; // Skip the opening arrow + + uint32_t tag_name_index = index; + + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_CLOSING_ARROW) + { + return -1; + } + ++index; // Skip the closing arrow + + out_element->specific.nonterminal.len_name = TOKEN_AT(tag_name_index).length; + out_element->specific.nonterminal.name = malloc(out_element->specific.nonterminal.len_name+1); + out_element->specific.nonterminal.name[out_element->specific.nonterminal.len_name] = 0; + + return 3; +} + +carrot_pattern_e carrot_resolve_keyword_to_enum_value(char *keyword) +{ + if(strcpy(keyword, "word")) return CARROT_PATTERN_WORD; + if(strcpy(keyword, "keyword")) return CARROT_PATTERN_KEYWORD; + if(strcpy(keyword, "number")) return CARROT_PATTERN_NUMBER; + return CARROT_PATTERN_INVALID; +} + +int32_t carrot_parse_pattern(carrot_token_stream_s *stream, uint32_t index, carrot_element_s *out_element) +{ + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_VERTICAL_BAR) + { + return -1; + } + + ++index; // Skip the opening vertical bar + + uint32_t word_index = index; + + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_VERTICAL_BAR) + { + return -1; + } + + ++index; // Skip the opening vertical bar + + uint32_t len_word = TOKEN_AT(word_index).length; + char word[len_word+1]; + memcpy(word, TOKEN_TEXT_AT(word_index), len_word); + word[len_word] = 0x00; + + out_element->type = CARROT_ELEMENT_PATTERN; + out_element->specific.pattern.value = carrot_resolve_keyword_to_enum_value(word); + + return 3; +} + +bool carrot_element_starts_here(carrot_token_stream_s *stream, uint32_t index) +{ + if(carrot_tag_name_starts_here(stream, index)) + { + return true; + } + + if(carrot_pattern_starts_here(stream, index)) + { + return true; + } + return -1; +} + +int32_t carrot_parse_element(carrot_token_stream_s *stream, uint32_t index, carrot_element_s *out_element) +{ + uint32_t first_index = index; + + if(carrot_tag_name_starts_here(stream, index)) + { + index += carrot_parse_tag_name(stream, index, out_element); + return index - first_index; + } + + if(carrot_pattern_starts_here(stream, index)) + { + index += carrot_parse_pattern(stream, index, out_element); + return index - first_index; + } + return -1; +} + +int32_t carrot_parse_rule(carrot_token_stream_s *stream, uint32_t index, carrot_rule_s *out_rule) +{ + uint32_t first_index = index; + + carrot_rule_s rule; + + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_OPENING_ARROW) return -1; + ++index; // Skip the opening arrow in front of the rule-name + + rule.len_name = TOKEN_AT(index).length; + rule.name = malloc(rule.len_name + 1); + memcpy(rule.name, &stream->source[TOKEN_AT(index).char_index], rule.len_name); + rule.name[rule.len_name] = 0x00; + ++index; // Skip the name of the rule + + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_CLOSING_ARROW) return -2; + ++index; // Skip the opening arrow in front of the rule-name + + if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_EQUALS_SIGN) return -3; + ++index; // Skip the equals sign between the rule's identifier and the rule + + uint32_t max_elements_in_rule = 64; + + int element_index = 0; + bool running = true; + while(running) + { + if(!carrot_element_starts_here(stream, index)) + { + if(TOKEN_AT(index).type == CARROT_TOKEN_SPECIAL_SEMICOLON) + { + printf("found semicolon'n"); + break; + } + printf("non-element at token: %d'\n", index); + return -1024; + } + if(out_rule->num_elements >= max_elements_in_rule) + { + max_elements_in_rule += 64; + + out_rule->elements = realloc( + out_rule->elements, + sizeof(carrot_element_s) * max_elements_in_rule + ); + } + carrot_element_s element; + int32_t len_element = carrot_parse_element(stream, index, &element); + + if(len_element < 1) + { + printf("Failed parsing element #%d.\n", element_index); + return (-element_index)-1; + } + index += len_element; + + out_rule->elements[out_rule->num_elements] = element; + ++element_index; + ++out_rule->num_elements; + } + (*out_rule) = rule; + + return index - first_index; +} + +carrot_definition_s carrot_parse_definition(carrot_token_stream_s *stream) +{ + uint32_t rules_capacity = 64; + + carrot_definition_s definition; + definition.num_rules = 0; + definition.rules = calloc(sizeof(carrot_definition_s), rules_capacity); + + uint32_t rule_index = 0; + uint32_t token_index = 0; + while(token_index < stream->num_tokens) + { + puts("Getting a rule of a definition."); + if(definition.num_rules >= rules_capacity) + { + rules_capacity *= 2; + definition.rules = realloc(definition.rules, sizeof(carrot_rule_s) * rules_capacity); + } + uint32_t len_rule_in_tokens = carrot_parse_rule(stream, token_index, &definition.rules[rule_index]); + printf("Rule is %d tokens long.\n", len_rule_in_tokens); + if(len_rule_in_tokens < 1) return definition; + token_index += len_rule_in_tokens; + ++rule_index; + } + + return definition; +} + diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..db57a4f --- /dev/null +++ b/src/main.c @@ -0,0 +1,53 @@ +#include +#include +#include + +char * carrot_load_file(char *path, uint32_t *out_len_file) +{ + FILE *file = fopen(path, "r"); + + if(file == NULL) return NULL; + + fseek(file, 0, SEEK_END); + uint32_t len_file = ftell(file); + fseek(file, 0, SEEK_SET); + + if(out_len_file != NULL) (*out_len_file) = len_file; + + char *content = malloc(len_file+1); + fread(content, len_file, 1, file); + content[len_file] = 0x00; + + fclose(file); + + return content; +} + +int main(int argc, char **argv) +{ + if(argc != 2) + { + puts("Early Build. Please only give the script file's name!"); + return -1; + } + + uint32_t len_file_content = 0; + char *file_content = carrot_load_file(argv[1], &len_file_content); + + printf("len_file_content: %d\n", len_file_content); + + if(file_content == NULL) + { + printf("Unable to load file at path: '%s'\n", argv[1]); + return -2; + } + + carrot_token_stream_s stream = carrot_tokenize(file_content, len_file_content); + carrot_print_tokens(&stream); + + carrot_definition_s definition = carrot_parse_definition(&stream); + carrot_print_definition(&definition); + + return 0; +} + diff --git a/src/print_def.c b/src/print_def.c new file mode 100644 index 0000000..ebf0673 --- /dev/null +++ b/src/print_def.c @@ -0,0 +1,67 @@ +#include + +void carrot_print_definition(carrot_definition_s *definition) +{ + uint32_t index = 0; + while(index < definition->num_rules) + { + carrot_print_rule(&definition->rules[index]); + ++index; + } +} + +const char * carrot_constringify_pattern_name(carrot_pattern_e pattern) +{ + switch(pattern) + { + case CARROT_PATTERN_INVALID: return "Invalid"; + case CARROT_PATTERN_WORD: return "Word"; + case CARROT_PATTERN_KEYWORD: return "Keyword"; + case CARROT_PATTERN_NUMBER: return "Number"; + } + return "Unknown"; +} + +const char * carrot_constringify_element_type(carrot_element_e element) +{ + switch(element) + { + case CARROT_ELEMENT_RULE_INSERTION: return "Rule Insertion"; + case CARROT_ELEMENT_PATTERN: return "Pattern"; + case CARROT_ELEMENT_RAW_LITERAL: return "Raw Literal"; + } +} + +void carrot_print_element(carrot_element_s *element) +{ + char buffer[72]; + int len_buffer = 71; + switch(element->type) + { + case CARROT_ELEMENT_RULE_INSERTION: + len_buffer = snprintf(buffer, 71, "%d:\"%s\"", element->specific.raw_literal.num_bytes, element->specific.raw_literal.bytes); + break; + + case CARROT_ELEMENT_PATTERN: + len_buffer = snprintf(buffer, 71, "%d:\"%s\"", element->specific.raw_literal.num_bytes, carrot_constringify_pattern_name(element->specific.pattern.value)); + break; + + case CARROT_ELEMENT_RAW_LITERAL: + len_buffer = snprintf(buffer, 71, "%d:\"%s\"", element->specific.raw_literal.num_bytes, element->specific.raw_literal.bytes); + break; + } + buffer[len_buffer] = 0x00; + + printf("Element: %s", carrot_constringify_element_type(element->type), buffer); +} + +void carrot_print_rule(carrot_rule_s *rule) +{ + uint32_t index = 0; + while(index < rule->num_elements) + { + carrot_print_element(&rule->elements[index]); + ++index; + } +} + diff --git a/src/tokenizer.c b/src/tokenizer.c new file mode 100644 index 0000000..98d7100 --- /dev/null +++ b/src/tokenizer.c @@ -0,0 +1,323 @@ +#include + +char * carrot_stringify_token_type(carrot_token_e type); + +bool carrot_is_blank(char rune) +{ + if(rune == ' ') return true; + if(rune == '\t') return true; + return false; +} + +bool carrot_is_lower(char rune) +{ + if(rune < 'a') return false; + if(rune > 'z') return false; + return true; +} + +bool carrot_is_upper(char rune) +{ + if(rune < 'A') return false; + if(rune > 'Z') return false; + return true; +} + +bool carrot_is_alpha(char rune) +{ + if(carrot_is_lower(rune)) return true; + if(carrot_is_upper(rune)) return true; + return false; +} + +bool carrot_is_digit(char rune) +{ + if(rune < '0') return false; + if(rune > '9') return false; + return true; +} + +bool carrot_is_special_1(char rune) +{ + if(rune < 33) return false; + if(rune > 47) return false; + return true; +} + +bool carrot_is_special_2(char rune) +{ + if(rune < 58) return false; + if(rune > 64) return false; + return true; +} + +bool carrot_is_special_3(char rune) +{ + if(rune < 91) return false; + if(rune > 96) return false; + return true; +} + +bool carrot_is_special_4(char rune) +{ + if(rune < 123) return false; + if(rune > 127) return false; + return true; +} + +bool carrot_is_special(char rune) +{ + if(carrot_is_special_1(rune)) return true; + if(carrot_is_special_2(rune)) return true; + if(carrot_is_special_3(rune)) return true; + if(carrot_is_special_4(rune)) return true; + return false; +} + +carrot_token_e carrot_resolve_special_char_token_type(char rune) +{ + switch(rune) + { + case '.': return CARROT_TOKEN_SPECIAL_POINT; + case ',': return CARROT_TOKEN_SPECIAL_COMMA; + case ':': return CARROT_TOKEN_SPECIAL_COLON; + case ';': return CARROT_TOKEN_SPECIAL_SEMICOLON; + case '=': return CARROT_TOKEN_SPECIAL_EQUALS_SIGN; + case '&': return CARROT_TOKEN_SPECIAL_AMPERSAND; + case '|': return CARROT_TOKEN_SPECIAL_VERTICAL_BAR; + case '<': return CARROT_TOKEN_SPECIAL_OPENING_ARROW; + case '>': return CARROT_TOKEN_SPECIAL_CLOSING_ARROW; + case '{': return CARROT_TOKEN_SPECIAL_OPENING_CURLY_BRACE; + case '}': return CARROT_TOKEN_SPECIAL_CLOSING_CURLY_BRACE; + case '(': return CARROT_TOKEN_SPECIAL_OPENING_PARENTHESIS; + case ')': return CARROT_TOKEN_SPECIAL_CLOSING_PARENTHESIS; + case '[': return CARROT_TOKEN_SPECIAL_OPENING_SQUARE_BRACKET; + case ']': return CARROT_TOKEN_SPECIAL_CLOSING_SQUARE_BRACKET; + } + return CARROT_TOKEN_SPECIAL; +} + +carrot_token_stream_s carrot_tokenize(char *source, uint32_t len_source) +{ + carrot_token_stream_s stream; + stream.len_source = len_source; + stream.source = malloc(len_source+1); + memcpy(stream.source, source, len_source); + stream.source[len_source] = 0x00; + + uint32_t token_capacity = 512; + stream.tokens = calloc(sizeof(carrot_token_s), token_capacity); + + uint32_t token_index = 0; + uint32_t index = 0; + uint32_t line_index = 1; + uint32_t column_index = 1; + while(index < len_source) + { + char rune = source[index]; + if(carrot_is_blank(rune)) + { + ++index; + continue; + } + + if(rune == '\n') + { + ++line_index; + column_index = 1; + ++index; + continue; + } + + if(token_index >= token_capacity) + { + token_capacity += 2048; + stream.tokens = realloc(stream.tokens, sizeof(carrot_token_s) * token_capacity); + } + + if(carrot_is_alpha(rune) || (rune == '_')) + { + uint32_t start_index = index; + while(index < len_source) + { + if(!carrot_is_alpha(rune) && (rune != '_')) break; + ++column_index; + ++index; + rune = source[index]; + } + carrot_token_s token; + token.type = CARROT_TOKEN_WORD; + token.char_index = start_index; + token.length = index - start_index; + token.line_index = line_index; + token.column_index = column_index; + + stream.tokens[token_index] = token; + ++token_index; + + continue; + } + + if(rune == '"') + { + ++index; + uint32_t start_index = index; + while(index < len_source) + { + if(source[index] == '"') + { + ++index; + break; + } + if(source[index] == '\\') + { + ++index; + } + ++index; + } + carrot_token_s token; + token.type = CARROT_TOKEN_LITERAL_STRING; + token.char_index = start_index; + token.length = (index - start_index) - 1; + token.line_index = line_index; + token.column_index = column_index; + + stream.tokens[token_index] = token; + ++token_index; + + continue; + } + + if(rune == '\'') + { + ++index; + uint32_t start_index = index; + + if(source[index] == '\\') + { + ++index; + } + ++index; + + if(index >= len_source) + { + --index; + } + + carrot_token_s token; + token.type = CARROT_TOKEN_LITERAL_CHARACTER; + token.char_index = start_index; + token.length = (index - start_index) - 1; + token.line_index = line_index; + token.column_index = column_index; + + stream.tokens[token_index] = token; + ++token_index; + + continue; + } + + if(carrot_is_special(rune)) + { + carrot_token_s token; + token.type = carrot_resolve_special_char_token_type(source[index]); + token.char_index = index; + token.length = 1; + token.line_index = line_index; + token.column_index = column_index; + + stream.tokens[token_index] = token; + ++token_index; + + printf("Rune: '%c' resolves to Token-Type:> \"%s\"\n", rune, carrot_stringify_token_type(token.type)); + + ++index; + continue; + } + ++index; + } + + // The edge case for when the array is full at the perfect timing. + if(token_index >= token_capacity) + { + token_capacity += 1; + stream.tokens = realloc(stream.tokens, sizeof(carrot_token_s) * token_capacity); + } + + carrot_token_s last_token; + last_token.type = CARROT_TOKEN_STREAM_END; + last_token.length = 0; + last_token.char_index = 0; + last_token.line_index = line_index; + last_token.column_index = column_index; + + stream.tokens[token_index] = last_token; + ++token_index; + + stream.num_tokens = token_index; + return stream; +} + +char * carrot_stringify_token_type(carrot_token_e type) +{ + switch(type) + { + case CARROT_TOKEN_WORD: return "Word"; + case CARROT_TOKEN_SPECIAL: return "Unknown Special"; + case CARROT_TOKEN_LITERAL_NUMERIC: return "Literal Numeric"; + case CARROT_TOKEN_LITERAL_STRING: return "Literal String"; + case CARROT_TOKEN_LITERAL_CHARACTER: return "Literal Character"; + case CARROT_TOKEN_STREAM_END: return "End of Stream"; + case CARROT_TOKEN_SPECIAL_POINT: return "Special: Point ( . )"; + case CARROT_TOKEN_SPECIAL_COMMA: return "Special: Comma ( , )"; + case CARROT_TOKEN_SPECIAL_COLON: return "Special: Colon ( : )"; + case CARROT_TOKEN_SPECIAL_SEMICOLON: return "Special: Semicolon ( ; )"; + case CARROT_TOKEN_SPECIAL_EQUALS_SIGN: return "Special: Equals ( = )"; + case CARROT_TOKEN_SPECIAL_AMPERSAND: return "Special: Ampersand ( & )"; + case CARROT_TOKEN_SPECIAL_VERTICAL_BAR: return "Special: Vertical Bar ( | )"; + case CARROT_TOKEN_SPECIAL_OPENING_ARROW: return "Special: <"; + case CARROT_TOKEN_SPECIAL_CLOSING_ARROW: return "Special: >"; + case CARROT_TOKEN_SPECIAL_OPENING_CURLY_BRACE: return "Special: {"; + case CARROT_TOKEN_SPECIAL_CLOSING_CURLY_BRACE: return "Special: }"; + case CARROT_TOKEN_SPECIAL_OPENING_PARENTHESIS: return "Special: ("; + case CARROT_TOKEN_SPECIAL_CLOSING_PARENTHESIS: return "Special: )"; + case CARROT_TOKEN_SPECIAL_OPENING_SQUARE_BRACKET: return "Special: ["; + case CARROT_TOKEN_SPECIAL_CLOSING_SQUARE_BRACKET: return "Special: ]"; + } + return "Unknown"; +} + +void carrot_print_single_token(carrot_token_stream_s *stream, uint32_t index) +{ + carrot_token_s token = stream->tokens[index]; + if(token.type == CARROT_TOKEN_STREAM_END) + { + puts("Stream End!"); + return; + } + + char text[token.length+1]; + memcpy(&text[0], &stream->source[token.char_index], token.length); + text[token.length] = 0x00; + + char *token_type = carrot_stringify_token_type(token.type); + char justified_token_type[32]; + memset(justified_token_type, ' ', 31); + memcpy(justified_token_type, token_type, strlen(token_type)); + justified_token_type[31] = 0x00; + + printf("T=%s '%s':%d\n", justified_token_type, text, token.length); +} + +void carrot_print_tokens(carrot_token_stream_s *stream) +{ + printf("Number of Tokens: %d\n", stream->num_tokens); + + uint32_t index = 0; + while(index < stream->num_tokens) + { + carrot_print_single_token(stream, index); + ++index; + } +} + diff --git a/test-sources/simple/simple-00.txt b/test-sources/simple/simple-00.txt new file mode 100644 index 0000000..ab56cdc --- /dev/null +++ b/test-sources/simple/simple-00.txt @@ -0,0 +1,6 @@ + = ; + = | ; + = | ; + = "if" ; + = "1" | "0"; + = "{" "}";