Initial Stuff
This commit is contained in:
parent
442c0ffb5c
commit
a7f93cabaa
|
@ -1,122 +1,12 @@
|
|||
# ---> C
|
||||
# Prerequisites
|
||||
*.d
|
||||
|
||||
# Object files
|
||||
*.o
|
||||
*.ko
|
||||
*.obj
|
||||
# Binary Files on *nix systems
|
||||
*.elf
|
||||
|
||||
# Linker output
|
||||
*.ilk
|
||||
*.map
|
||||
*.exp
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Libraries
|
||||
*.lib
|
||||
*.a
|
||||
*.la
|
||||
*.lo
|
||||
|
||||
# Shared objects (inc. Windows DLLs)
|
||||
*.dll
|
||||
*.so
|
||||
*.so.*
|
||||
*.dylib
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
*.i*86
|
||||
*.x86_64
|
||||
*.hex
|
||||
|
||||
# Debug files
|
||||
*.dSYM/
|
||||
*.su
|
||||
*.idb
|
||||
*.pdb
|
||||
|
||||
# Kernel Module Compile Results
|
||||
*.mod*
|
||||
*.cmd
|
||||
.tmp_versions/
|
||||
modules.order
|
||||
Module.symvers
|
||||
Mkfile.old
|
||||
dkms.conf
|
||||
|
||||
# ---> C++
|
||||
# Prerequisites
|
||||
*.d
|
||||
|
||||
# Compiled Object files
|
||||
*.slo
|
||||
*.lo
|
||||
*.o
|
||||
*.obj
|
||||
|
||||
# Precompiled Headers
|
||||
*.gch
|
||||
*.pch
|
||||
|
||||
# Compiled Dynamic libraries
|
||||
*.so
|
||||
*.dylib
|
||||
*.dll
|
||||
|
||||
# Fortran module files
|
||||
*.mod
|
||||
*.smod
|
||||
|
||||
# Compiled Static libraries
|
||||
*.lai
|
||||
*.la
|
||||
*.a
|
||||
*.lib
|
||||
*.so
|
||||
|
||||
# Executables
|
||||
*.exe
|
||||
*.out
|
||||
*.app
|
||||
|
||||
# ---> Archives
|
||||
# It's better to unpack these files and commit the raw source because
|
||||
# git has its own built in compression methods.
|
||||
*.7z
|
||||
*.jar
|
||||
*.rar
|
||||
*.zip
|
||||
*.gz
|
||||
*.gzip
|
||||
*.tgz
|
||||
*.bzip
|
||||
*.bzip2
|
||||
*.bz2
|
||||
*.xz
|
||||
*.lzma
|
||||
*.cab
|
||||
*.xar
|
||||
|
||||
# Packing-only formats
|
||||
*.iso
|
||||
*.tar
|
||||
|
||||
# Package management formats
|
||||
*.dmg
|
||||
*.xpi
|
||||
*.gem
|
||||
*.egg
|
||||
*.deb
|
||||
*.rpm
|
||||
*.msi
|
||||
*.msm
|
||||
*.msp
|
||||
*.txz
|
||||
# Stuff for Microsoft Visual Studio Code
|
||||
*.vscode/
|
||||
|
||||
# Stuff for GNU nano
|
||||
*.swp
|
|
@ -0,0 +1,4 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
gcc -o carrot.elf src/*.c -I inc/
|
||||
|
|
@ -0,0 +1,91 @@
|
|||
|
||||
#ifndef CARROT_DEFINITION_GENERATOR_H
|
||||
#define CARROT_DEFINITION_GENERATOR_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <tokenizer.h>
|
||||
|
||||
typedef struct carrot_definition carrot_definition_s;
|
||||
typedef struct carrot_rule carrot_rule_s;
|
||||
typedef struct carrot_compound carrot_compound_s;
|
||||
typedef struct carrot_element carrot_element_s;
|
||||
typedef struct carrot_end_node carrot_end_node_s;
|
||||
|
||||
struct carrot_definition
|
||||
{
|
||||
uint32_t num_rules;
|
||||
carrot_rule_s *rules;
|
||||
};
|
||||
|
||||
struct carrot_rule
|
||||
{
|
||||
uint32_t len_name;
|
||||
char *name;
|
||||
|
||||
uint32_t num_elements;
|
||||
carrot_element_s *elements;
|
||||
};
|
||||
|
||||
struct carrot_compound
|
||||
{
|
||||
carrot_rule_s *up;
|
||||
|
||||
uint32_t num_rules;
|
||||
carrot_rule_s *rules;
|
||||
};
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CARROT_ELEMENT_RULE_INSERTION,
|
||||
CARROT_ELEMENT_PATTERN,
|
||||
CARROT_ELEMENT_RAW_LITERAL
|
||||
|
||||
} carrot_element_e;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CARROT_PATTERN_INVALID,
|
||||
CARROT_PATTERN_WORD, // Keyword: "|word|"
|
||||
CARROT_PATTERN_KEYWORD, // Keyword: "|keyword|" (belonging to a list of user-defined keywords
|
||||
CARROT_PATTERN_NUMBER // Keyword: "|number|"
|
||||
|
||||
} carrot_pattern_e;
|
||||
|
||||
struct carrot_element
|
||||
{
|
||||
// char *text;
|
||||
|
||||
carrot_element_e type;
|
||||
union carrot_specific_literal
|
||||
{
|
||||
struct carrot_tag_name
|
||||
{
|
||||
uint32_t len_name;
|
||||
char *name;
|
||||
|
||||
} nonterminal;
|
||||
|
||||
struct carrot_pattern
|
||||
{
|
||||
carrot_pattern_e value;
|
||||
|
||||
} pattern;
|
||||
|
||||
struct carrot_raw_literal
|
||||
{
|
||||
uint32_t num_bytes;
|
||||
char *bytes;
|
||||
} raw_literal;
|
||||
|
||||
} specific;
|
||||
};
|
||||
|
||||
carrot_definition_s carrot_parse_definition (carrot_token_stream_s *stream);
|
||||
|
||||
#endif // CARROT_DEFINITION_GENERATOR_H
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
|
||||
#ifndef CARROT_PRINT_DEFINITION_H
|
||||
#define CARROT_PRINT_DEFINITION_H
|
||||
|
||||
#include <defgen.h>
|
||||
|
||||
void carrot_print_definition (carrot_definition_s *definition);
|
||||
void carrot_print_element (carrot_element_s *element);
|
||||
void carrot_print_rule (carrot_rule_s *rule);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,62 @@
|
|||
|
||||
#ifndef CARROT_TOKENIZER_H
|
||||
#define CARROT_TOKENIZER_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
CARROT_TOKEN_WORD,
|
||||
CARROT_TOKEN_SPECIAL,
|
||||
CARROT_TOKEN_SPECIAL_POINT,
|
||||
CARROT_TOKEN_SPECIAL_COMMA,
|
||||
CARROT_TOKEN_SPECIAL_COLON,
|
||||
CARROT_TOKEN_SPECIAL_SEMICOLON,
|
||||
CARROT_TOKEN_SPECIAL_EQUALS_SIGN,
|
||||
CARROT_TOKEN_SPECIAL_AMPERSAND,
|
||||
CARROT_TOKEN_SPECIAL_VERTICAL_BAR,
|
||||
CARROT_TOKEN_SPECIAL_OPENING_ARROW,
|
||||
CARROT_TOKEN_SPECIAL_CLOSING_ARROW,
|
||||
CARROT_TOKEN_SPECIAL_OPENING_CURLY_BRACE,
|
||||
CARROT_TOKEN_SPECIAL_CLOSING_CURLY_BRACE,
|
||||
CARROT_TOKEN_SPECIAL_OPENING_PARENTHESIS,
|
||||
CARROT_TOKEN_SPECIAL_CLOSING_PARENTHESIS,
|
||||
CARROT_TOKEN_SPECIAL_OPENING_SQUARE_BRACKET,
|
||||
CARROT_TOKEN_SPECIAL_CLOSING_SQUARE_BRACKET,
|
||||
CARROT_TOKEN_LITERAL_NUMERIC,
|
||||
CARROT_TOKEN_LITERAL_STRING,
|
||||
CARROT_TOKEN_LITERAL_CHARACTER,
|
||||
CARROT_TOKEN_STREAM_END
|
||||
|
||||
} carrot_token_e;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
carrot_token_e type;
|
||||
uint32_t length;
|
||||
uint32_t char_index;
|
||||
|
||||
uint32_t line_index;
|
||||
uint32_t column_index;
|
||||
|
||||
} carrot_token_s;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
uint32_t num_tokens;
|
||||
carrot_token_s *tokens;
|
||||
|
||||
uint32_t len_source;
|
||||
char *source;
|
||||
|
||||
} carrot_token_stream_s;
|
||||
|
||||
carrot_token_stream_s carrot_tokenize (char *source, uint32_t len_source);
|
||||
void carrot_print_tokens (carrot_token_stream_s *stream);
|
||||
|
||||
#endif
|
||||
|
|
@ -0,0 +1,239 @@
|
|||
#include <defgen.h>
|
||||
|
||||
#define TOKEN_AT(index) (stream->tokens[(index)])
|
||||
#define TOKEN_TEXT_AT(index) (&stream->source[stream->tokens[(index)].char_index])
|
||||
|
||||
/*
|
||||
bool carrot_token_text_equals(carrot_token_stream_s *stream, int index, char *text)
|
||||
{
|
||||
|
||||
}
|
||||
*/
|
||||
|
||||
// int32_t carrot_parse_compound
|
||||
|
||||
bool carrot_tag_name_starts_here(carrot_token_stream_s *stream, uint32_t index)
|
||||
{
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_OPENING_ARROW) return false;
|
||||
++index; // Skip the opening arrow in front of the rule-name
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_WORD) return false;
|
||||
++index; // Skip the name of the rule
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_CLOSING_ARROW) return false;
|
||||
++index; // Skip the opening arrow in front of the rule-name
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool carrot_pattern_starts_here(carrot_token_stream_s *stream, uint32_t index)
|
||||
{
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_VERTICAL_BAR)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
++index;
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_WORD)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
++index;
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_VERTICAL_BAR)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int32_t carrot_parse_tag_name(carrot_token_stream_s *stream, uint32_t index, carrot_element_s *out_element)
|
||||
{
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_OPENING_ARROW)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
++index; // Skip the opening arrow
|
||||
|
||||
uint32_t tag_name_index = index;
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_CLOSING_ARROW)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
++index; // Skip the closing arrow
|
||||
|
||||
out_element->specific.nonterminal.len_name = TOKEN_AT(tag_name_index).length;
|
||||
out_element->specific.nonterminal.name = malloc(out_element->specific.nonterminal.len_name+1);
|
||||
out_element->specific.nonterminal.name[out_element->specific.nonterminal.len_name] = 0;
|
||||
|
||||
return 3;
|
||||
}
|
||||
|
||||
carrot_pattern_e carrot_resolve_keyword_to_enum_value(char *keyword)
|
||||
{
|
||||
if(strcpy(keyword, "word")) return CARROT_PATTERN_WORD;
|
||||
if(strcpy(keyword, "keyword")) return CARROT_PATTERN_KEYWORD;
|
||||
if(strcpy(keyword, "number")) return CARROT_PATTERN_NUMBER;
|
||||
return CARROT_PATTERN_INVALID;
|
||||
}
|
||||
|
||||
int32_t carrot_parse_pattern(carrot_token_stream_s *stream, uint32_t index, carrot_element_s *out_element)
|
||||
{
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_VERTICAL_BAR)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
++index; // Skip the opening vertical bar
|
||||
|
||||
uint32_t word_index = index;
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_VERTICAL_BAR)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
++index; // Skip the opening vertical bar
|
||||
|
||||
uint32_t len_word = TOKEN_AT(word_index).length;
|
||||
char word[len_word+1];
|
||||
memcpy(word, TOKEN_TEXT_AT(word_index), len_word);
|
||||
word[len_word] = 0x00;
|
||||
|
||||
out_element->type = CARROT_ELEMENT_PATTERN;
|
||||
out_element->specific.pattern.value = carrot_resolve_keyword_to_enum_value(word);
|
||||
|
||||
return 3;
|
||||
}
|
||||
|
||||
bool carrot_element_starts_here(carrot_token_stream_s *stream, uint32_t index)
|
||||
{
|
||||
if(carrot_tag_name_starts_here(stream, index))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
if(carrot_pattern_starts_here(stream, index))
|
||||
{
|
||||
return true;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t carrot_parse_element(carrot_token_stream_s *stream, uint32_t index, carrot_element_s *out_element)
|
||||
{
|
||||
uint32_t first_index = index;
|
||||
|
||||
if(carrot_tag_name_starts_here(stream, index))
|
||||
{
|
||||
index += carrot_parse_tag_name(stream, index, out_element);
|
||||
return index - first_index;
|
||||
}
|
||||
|
||||
if(carrot_pattern_starts_here(stream, index))
|
||||
{
|
||||
index += carrot_parse_pattern(stream, index, out_element);
|
||||
return index - first_index;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t carrot_parse_rule(carrot_token_stream_s *stream, uint32_t index, carrot_rule_s *out_rule)
|
||||
{
|
||||
uint32_t first_index = index;
|
||||
|
||||
carrot_rule_s rule;
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_OPENING_ARROW) return -1;
|
||||
++index; // Skip the opening arrow in front of the rule-name
|
||||
|
||||
rule.len_name = TOKEN_AT(index).length;
|
||||
rule.name = malloc(rule.len_name + 1);
|
||||
memcpy(rule.name, &stream->source[TOKEN_AT(index).char_index], rule.len_name);
|
||||
rule.name[rule.len_name] = 0x00;
|
||||
++index; // Skip the name of the rule
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_CLOSING_ARROW) return -2;
|
||||
++index; // Skip the opening arrow in front of the rule-name
|
||||
|
||||
if(TOKEN_AT(index).type != CARROT_TOKEN_SPECIAL_EQUALS_SIGN) return -3;
|
||||
++index; // Skip the equals sign between the rule's identifier and the rule
|
||||
|
||||
uint32_t max_elements_in_rule = 64;
|
||||
|
||||
int element_index = 0;
|
||||
bool running = true;
|
||||
while(running)
|
||||
{
|
||||
if(!carrot_element_starts_here(stream, index))
|
||||
{
|
||||
if(TOKEN_AT(index).type == CARROT_TOKEN_SPECIAL_SEMICOLON)
|
||||
{
|
||||
printf("found semicolon'n");
|
||||
break;
|
||||
}
|
||||
printf("non-element at token: %d'\n", index);
|
||||
return -1024;
|
||||
}
|
||||
if(out_rule->num_elements >= max_elements_in_rule)
|
||||
{
|
||||
max_elements_in_rule += 64;
|
||||
|
||||
out_rule->elements = realloc(
|
||||
out_rule->elements,
|
||||
sizeof(carrot_element_s) * max_elements_in_rule
|
||||
);
|
||||
}
|
||||
carrot_element_s element;
|
||||
int32_t len_element = carrot_parse_element(stream, index, &element);
|
||||
|
||||
if(len_element < 1)
|
||||
{
|
||||
printf("Failed parsing element #%d.\n", element_index);
|
||||
return (-element_index)-1;
|
||||
}
|
||||
index += len_element;
|
||||
|
||||
out_rule->elements[out_rule->num_elements] = element;
|
||||
++element_index;
|
||||
++out_rule->num_elements;
|
||||
}
|
||||
(*out_rule) = rule;
|
||||
|
||||
return index - first_index;
|
||||
}
|
||||
|
||||
carrot_definition_s carrot_parse_definition(carrot_token_stream_s *stream)
|
||||
{
|
||||
uint32_t rules_capacity = 64;
|
||||
|
||||
carrot_definition_s definition;
|
||||
definition.num_rules = 0;
|
||||
definition.rules = calloc(sizeof(carrot_definition_s), rules_capacity);
|
||||
|
||||
uint32_t rule_index = 0;
|
||||
uint32_t token_index = 0;
|
||||
while(token_index < stream->num_tokens)
|
||||
{
|
||||
puts("Getting a rule of a definition.");
|
||||
if(definition.num_rules >= rules_capacity)
|
||||
{
|
||||
rules_capacity *= 2;
|
||||
definition.rules = realloc(definition.rules, sizeof(carrot_rule_s) * rules_capacity);
|
||||
}
|
||||
uint32_t len_rule_in_tokens = carrot_parse_rule(stream, token_index, &definition.rules[rule_index]);
|
||||
printf("Rule is %d tokens long.\n", len_rule_in_tokens);
|
||||
if(len_rule_in_tokens < 1) return definition;
|
||||
token_index += len_rule_in_tokens;
|
||||
++rule_index;
|
||||
}
|
||||
|
||||
return definition;
|
||||
}
|
||||
|
|
@ -0,0 +1,53 @@
|
|||
#include <tokenizer.h>
|
||||
#include <defgen.h>
|
||||
#include <print_def.h>
|
||||
|
||||
char * carrot_load_file(char *path, uint32_t *out_len_file)
|
||||
{
|
||||
FILE *file = fopen(path, "r");
|
||||
|
||||
if(file == NULL) return NULL;
|
||||
|
||||
fseek(file, 0, SEEK_END);
|
||||
uint32_t len_file = ftell(file);
|
||||
fseek(file, 0, SEEK_SET);
|
||||
|
||||
if(out_len_file != NULL) (*out_len_file) = len_file;
|
||||
|
||||
char *content = malloc(len_file+1);
|
||||
fread(content, len_file, 1, file);
|
||||
content[len_file] = 0x00;
|
||||
|
||||
fclose(file);
|
||||
|
||||
return content;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if(argc != 2)
|
||||
{
|
||||
puts("Early Build. Please only give the script file's name!");
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t len_file_content = 0;
|
||||
char *file_content = carrot_load_file(argv[1], &len_file_content);
|
||||
|
||||
printf("len_file_content: %d\n", len_file_content);
|
||||
|
||||
if(file_content == NULL)
|
||||
{
|
||||
printf("Unable to load file at path: '%s'\n", argv[1]);
|
||||
return -2;
|
||||
}
|
||||
|
||||
carrot_token_stream_s stream = carrot_tokenize(file_content, len_file_content);
|
||||
carrot_print_tokens(&stream);
|
||||
|
||||
carrot_definition_s definition = carrot_parse_definition(&stream);
|
||||
carrot_print_definition(&definition);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,67 @@
|
|||
#include <print_def.h>
|
||||
|
||||
void carrot_print_definition(carrot_definition_s *definition)
|
||||
{
|
||||
uint32_t index = 0;
|
||||
while(index < definition->num_rules)
|
||||
{
|
||||
carrot_print_rule(&definition->rules[index]);
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
const char * carrot_constringify_pattern_name(carrot_pattern_e pattern)
|
||||
{
|
||||
switch(pattern)
|
||||
{
|
||||
case CARROT_PATTERN_INVALID: return "Invalid";
|
||||
case CARROT_PATTERN_WORD: return "Word";
|
||||
case CARROT_PATTERN_KEYWORD: return "Keyword";
|
||||
case CARROT_PATTERN_NUMBER: return "Number";
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
const char * carrot_constringify_element_type(carrot_element_e element)
|
||||
{
|
||||
switch(element)
|
||||
{
|
||||
case CARROT_ELEMENT_RULE_INSERTION: return "Rule Insertion";
|
||||
case CARROT_ELEMENT_PATTERN: return "Pattern";
|
||||
case CARROT_ELEMENT_RAW_LITERAL: return "Raw Literal";
|
||||
}
|
||||
}
|
||||
|
||||
void carrot_print_element(carrot_element_s *element)
|
||||
{
|
||||
char buffer[72];
|
||||
int len_buffer = 71;
|
||||
switch(element->type)
|
||||
{
|
||||
case CARROT_ELEMENT_RULE_INSERTION:
|
||||
len_buffer = snprintf(buffer, 71, "%d:\"%s\"", element->specific.raw_literal.num_bytes, element->specific.raw_literal.bytes);
|
||||
break;
|
||||
|
||||
case CARROT_ELEMENT_PATTERN:
|
||||
len_buffer = snprintf(buffer, 71, "%d:\"%s\"", element->specific.raw_literal.num_bytes, carrot_constringify_pattern_name(element->specific.pattern.value));
|
||||
break;
|
||||
|
||||
case CARROT_ELEMENT_RAW_LITERAL:
|
||||
len_buffer = snprintf(buffer, 71, "%d:\"%s\"", element->specific.raw_literal.num_bytes, element->specific.raw_literal.bytes);
|
||||
break;
|
||||
}
|
||||
buffer[len_buffer] = 0x00;
|
||||
|
||||
printf("Element<T: '%s'>: %s", carrot_constringify_element_type(element->type), buffer);
|
||||
}
|
||||
|
||||
void carrot_print_rule(carrot_rule_s *rule)
|
||||
{
|
||||
uint32_t index = 0;
|
||||
while(index < rule->num_elements)
|
||||
{
|
||||
carrot_print_element(&rule->elements[index]);
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,323 @@
|
|||
#include <tokenizer.h>
|
||||
|
||||
char * carrot_stringify_token_type(carrot_token_e type);
|
||||
|
||||
bool carrot_is_blank(char rune)
|
||||
{
|
||||
if(rune == ' ') return true;
|
||||
if(rune == '\t') return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool carrot_is_lower(char rune)
|
||||
{
|
||||
if(rune < 'a') return false;
|
||||
if(rune > 'z') return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool carrot_is_upper(char rune)
|
||||
{
|
||||
if(rune < 'A') return false;
|
||||
if(rune > 'Z') return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool carrot_is_alpha(char rune)
|
||||
{
|
||||
if(carrot_is_lower(rune)) return true;
|
||||
if(carrot_is_upper(rune)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool carrot_is_digit(char rune)
|
||||
{
|
||||
if(rune < '0') return false;
|
||||
if(rune > '9') return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool carrot_is_special_1(char rune)
|
||||
{
|
||||
if(rune < 33) return false;
|
||||
if(rune > 47) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool carrot_is_special_2(char rune)
|
||||
{
|
||||
if(rune < 58) return false;
|
||||
if(rune > 64) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool carrot_is_special_3(char rune)
|
||||
{
|
||||
if(rune < 91) return false;
|
||||
if(rune > 96) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool carrot_is_special_4(char rune)
|
||||
{
|
||||
if(rune < 123) return false;
|
||||
if(rune > 127) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool carrot_is_special(char rune)
|
||||
{
|
||||
if(carrot_is_special_1(rune)) return true;
|
||||
if(carrot_is_special_2(rune)) return true;
|
||||
if(carrot_is_special_3(rune)) return true;
|
||||
if(carrot_is_special_4(rune)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
carrot_token_e carrot_resolve_special_char_token_type(char rune)
|
||||
{
|
||||
switch(rune)
|
||||
{
|
||||
case '.': return CARROT_TOKEN_SPECIAL_POINT;
|
||||
case ',': return CARROT_TOKEN_SPECIAL_COMMA;
|
||||
case ':': return CARROT_TOKEN_SPECIAL_COLON;
|
||||
case ';': return CARROT_TOKEN_SPECIAL_SEMICOLON;
|
||||
case '=': return CARROT_TOKEN_SPECIAL_EQUALS_SIGN;
|
||||
case '&': return CARROT_TOKEN_SPECIAL_AMPERSAND;
|
||||
case '|': return CARROT_TOKEN_SPECIAL_VERTICAL_BAR;
|
||||
case '<': return CARROT_TOKEN_SPECIAL_OPENING_ARROW;
|
||||
case '>': return CARROT_TOKEN_SPECIAL_CLOSING_ARROW;
|
||||
case '{': return CARROT_TOKEN_SPECIAL_OPENING_CURLY_BRACE;
|
||||
case '}': return CARROT_TOKEN_SPECIAL_CLOSING_CURLY_BRACE;
|
||||
case '(': return CARROT_TOKEN_SPECIAL_OPENING_PARENTHESIS;
|
||||
case ')': return CARROT_TOKEN_SPECIAL_CLOSING_PARENTHESIS;
|
||||
case '[': return CARROT_TOKEN_SPECIAL_OPENING_SQUARE_BRACKET;
|
||||
case ']': return CARROT_TOKEN_SPECIAL_CLOSING_SQUARE_BRACKET;
|
||||
}
|
||||
return CARROT_TOKEN_SPECIAL;
|
||||
}
|
||||
|
||||
carrot_token_stream_s carrot_tokenize(char *source, uint32_t len_source)
|
||||
{
|
||||
carrot_token_stream_s stream;
|
||||
stream.len_source = len_source;
|
||||
stream.source = malloc(len_source+1);
|
||||
memcpy(stream.source, source, len_source);
|
||||
stream.source[len_source] = 0x00;
|
||||
|
||||
uint32_t token_capacity = 512;
|
||||
stream.tokens = calloc(sizeof(carrot_token_s), token_capacity);
|
||||
|
||||
uint32_t token_index = 0;
|
||||
uint32_t index = 0;
|
||||
uint32_t line_index = 1;
|
||||
uint32_t column_index = 1;
|
||||
while(index < len_source)
|
||||
{
|
||||
char rune = source[index];
|
||||
if(carrot_is_blank(rune))
|
||||
{
|
||||
++index;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(rune == '\n')
|
||||
{
|
||||
++line_index;
|
||||
column_index = 1;
|
||||
++index;
|
||||
continue;
|
||||
}
|
||||
|
||||
if(token_index >= token_capacity)
|
||||
{
|
||||
token_capacity += 2048;
|
||||
stream.tokens = realloc(stream.tokens, sizeof(carrot_token_s) * token_capacity);
|
||||
}
|
||||
|
||||
if(carrot_is_alpha(rune) || (rune == '_'))
|
||||
{
|
||||
uint32_t start_index = index;
|
||||
while(index < len_source)
|
||||
{
|
||||
if(!carrot_is_alpha(rune) && (rune != '_')) break;
|
||||
++column_index;
|
||||
++index;
|
||||
rune = source[index];
|
||||
}
|
||||
carrot_token_s token;
|
||||
token.type = CARROT_TOKEN_WORD;
|
||||
token.char_index = start_index;
|
||||
token.length = index - start_index;
|
||||
token.line_index = line_index;
|
||||
token.column_index = column_index;
|
||||
|
||||
stream.tokens[token_index] = token;
|
||||
++token_index;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if(rune == '"')
|
||||
{
|
||||
++index;
|
||||
uint32_t start_index = index;
|
||||
while(index < len_source)
|
||||
{
|
||||
if(source[index] == '"')
|
||||
{
|
||||
++index;
|
||||
break;
|
||||
}
|
||||
if(source[index] == '\\')
|
||||
{
|
||||
++index;
|
||||
}
|
||||
++index;
|
||||
}
|
||||
carrot_token_s token;
|
||||
token.type = CARROT_TOKEN_LITERAL_STRING;
|
||||
token.char_index = start_index;
|
||||
token.length = (index - start_index) - 1;
|
||||
token.line_index = line_index;
|
||||
token.column_index = column_index;
|
||||
|
||||
stream.tokens[token_index] = token;
|
||||
++token_index;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if(rune == '\'')
|
||||
{
|
||||
++index;
|
||||
uint32_t start_index = index;
|
||||
|
||||
if(source[index] == '\\')
|
||||
{
|
||||
++index;
|
||||
}
|
||||
++index;
|
||||
|
||||
if(index >= len_source)
|
||||
{
|
||||
--index;
|
||||
}
|
||||
|
||||
carrot_token_s token;
|
||||
token.type = CARROT_TOKEN_LITERAL_CHARACTER;
|
||||
token.char_index = start_index;
|
||||
token.length = (index - start_index) - 1;
|
||||
token.line_index = line_index;
|
||||
token.column_index = column_index;
|
||||
|
||||
stream.tokens[token_index] = token;
|
||||
++token_index;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if(carrot_is_special(rune))
|
||||
{
|
||||
carrot_token_s token;
|
||||
token.type = carrot_resolve_special_char_token_type(source[index]);
|
||||
token.char_index = index;
|
||||
token.length = 1;
|
||||
token.line_index = line_index;
|
||||
token.column_index = column_index;
|
||||
|
||||
stream.tokens[token_index] = token;
|
||||
++token_index;
|
||||
|
||||
printf("Rune: '%c' resolves to Token-Type:> \"%s\"\n", rune, carrot_stringify_token_type(token.type));
|
||||
|
||||
++index;
|
||||
continue;
|
||||
}
|
||||
++index;
|
||||
}
|
||||
|
||||
// The edge case for when the array is full at the perfect timing.
|
||||
if(token_index >= token_capacity)
|
||||
{
|
||||
token_capacity += 1;
|
||||
stream.tokens = realloc(stream.tokens, sizeof(carrot_token_s) * token_capacity);
|
||||
}
|
||||
|
||||
carrot_token_s last_token;
|
||||
last_token.type = CARROT_TOKEN_STREAM_END;
|
||||
last_token.length = 0;
|
||||
last_token.char_index = 0;
|
||||
last_token.line_index = line_index;
|
||||
last_token.column_index = column_index;
|
||||
|
||||
stream.tokens[token_index] = last_token;
|
||||
++token_index;
|
||||
|
||||
stream.num_tokens = token_index;
|
||||
return stream;
|
||||
}
|
||||
|
||||
char * carrot_stringify_token_type(carrot_token_e type)
|
||||
{
|
||||
switch(type)
|
||||
{
|
||||
case CARROT_TOKEN_WORD: return "Word";
|
||||
case CARROT_TOKEN_SPECIAL: return "Unknown Special";
|
||||
case CARROT_TOKEN_LITERAL_NUMERIC: return "Literal Numeric";
|
||||
case CARROT_TOKEN_LITERAL_STRING: return "Literal String";
|
||||
case CARROT_TOKEN_LITERAL_CHARACTER: return "Literal Character";
|
||||
case CARROT_TOKEN_STREAM_END: return "End of Stream";
|
||||
case CARROT_TOKEN_SPECIAL_POINT: return "Special: Point ( . )";
|
||||
case CARROT_TOKEN_SPECIAL_COMMA: return "Special: Comma ( , )";
|
||||
case CARROT_TOKEN_SPECIAL_COLON: return "Special: Colon ( : )";
|
||||
case CARROT_TOKEN_SPECIAL_SEMICOLON: return "Special: Semicolon ( ; )";
|
||||
case CARROT_TOKEN_SPECIAL_EQUALS_SIGN: return "Special: Equals ( = )";
|
||||
case CARROT_TOKEN_SPECIAL_AMPERSAND: return "Special: Ampersand ( & )";
|
||||
case CARROT_TOKEN_SPECIAL_VERTICAL_BAR: return "Special: Vertical Bar ( | )";
|
||||
case CARROT_TOKEN_SPECIAL_OPENING_ARROW: return "Special: <";
|
||||
case CARROT_TOKEN_SPECIAL_CLOSING_ARROW: return "Special: >";
|
||||
case CARROT_TOKEN_SPECIAL_OPENING_CURLY_BRACE: return "Special: {";
|
||||
case CARROT_TOKEN_SPECIAL_CLOSING_CURLY_BRACE: return "Special: }";
|
||||
case CARROT_TOKEN_SPECIAL_OPENING_PARENTHESIS: return "Special: (";
|
||||
case CARROT_TOKEN_SPECIAL_CLOSING_PARENTHESIS: return "Special: )";
|
||||
case CARROT_TOKEN_SPECIAL_OPENING_SQUARE_BRACKET: return "Special: [";
|
||||
case CARROT_TOKEN_SPECIAL_CLOSING_SQUARE_BRACKET: return "Special: ]";
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
void carrot_print_single_token(carrot_token_stream_s *stream, uint32_t index)
|
||||
{
|
||||
carrot_token_s token = stream->tokens[index];
|
||||
if(token.type == CARROT_TOKEN_STREAM_END)
|
||||
{
|
||||
puts("Stream End!");
|
||||
return;
|
||||
}
|
||||
|
||||
char text[token.length+1];
|
||||
memcpy(&text[0], &stream->source[token.char_index], token.length);
|
||||
text[token.length] = 0x00;
|
||||
|
||||
char *token_type = carrot_stringify_token_type(token.type);
|
||||
char justified_token_type[32];
|
||||
memset(justified_token_type, ' ', 31);
|
||||
memcpy(justified_token_type, token_type, strlen(token_type));
|
||||
justified_token_type[31] = 0x00;
|
||||
|
||||
printf("T=%s '%s':%d\n", justified_token_type, text, token.length);
|
||||
}
|
||||
|
||||
void carrot_print_tokens(carrot_token_stream_s *stream)
|
||||
{
|
||||
printf("Number of Tokens: %d\n", stream->num_tokens);
|
||||
|
||||
uint32_t index = 0;
|
||||
while(index < stream->num_tokens)
|
||||
{
|
||||
carrot_print_single_token(stream, index);
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
<root> = <instruction_list>;
|
||||
<instruction_list> = <instruction> <instruction_list> | <instruction>;
|
||||
<instruction> = <if> | <scope>;
|
||||
<if> = "if" <condition> <scope>;
|
||||
<condition> = "1" | "0";
|
||||
<scope> = "{" <instruction_list> "}";
|
Loading…
Reference in New Issue