First code

This commit is contained in:
Eric-Paul Ickhorn 2023-11-26 19:49:54 +01:00
commit 816f613cc1
9 changed files with 761 additions and 0 deletions

71
code/inc/ast.h Normal file
View File

@ -0,0 +1,71 @@
#ifndef PARCEL_AST_H
#define PARCEL_AST_H
#include <utility.h>
typedef struct pac_ast pac_ast_s;
typedef struct pac_ast_rule pac_ast_rule_s;
typedef struct pac_ast_variant pac_ast_variant_s;
typedef struct pac_ast_literal pac_ast_literal_s;
typedef struct pac_ast_reference pac_ast_reference_s;
typedef struct pac_ast_item pac_ast_item_s;
struct pac_ast
{
usz_t num_rules;
pac_ast_rule_s *rules;
};
struct pac_ast_rule
{
char *name;
usz_t num_variants;
pac_ast_variant_s *variants;
};
struct pac_ast_variant
{
usz_t num_items;
pac_ast_item_s *items;
};
// pac_ast_reference: Also called non-terminal, a reference is an item
// which represents all the contents of another rule.
struct pac_ast_reference
{
usz_t len_name;
char *name;
};
struct pac_ast_literal
{
usz_t length;
char *string;
};
// pac_outline_e: An enumeration of all outlines known to Parcel.
//
// Outlines are tokens of which only the rough format is known, like
// with variable names; the format is known, but the actual name isn't.
typedef enum
{
PAC_OUTLINE_RUNE,
PAC_OUTLINE_WORD,
PAC_OUTLINE_INTEGER,
PAC_OUTLINE_FLOAT
} pac_outline_e;
struct pac_ast_item
{
bool_t is_literal;
union pac_item_data
{
pac_ast_literal_s literal;
pac_outline_e outline;
} data;
};
#endif // PARCEL_AST_H

32
code/inc/parcel.h Normal file
View File

@ -0,0 +1,32 @@
#ifndef PARCEL_H
#define PARCEL_H
#include <utility.h>
typedef struct pac_grammar pac_grammar_s;
typedef struct pac_log_entry pac_log_entry_s;
typedef struct pac_log pac_log_s;
struct pac_log_entry
{
usz_t length;
char *text;
};
struct pac_log
{
usz_t length;
pac_log_entry_s *entries;
};
struct pac_grammar
{
};
pac_grammar_s pac_convert_grammar (char *source);
// void pac_delete_grammar (pac_grammar_s grammar);
#endif // PARCEL_H

58
code/inc/tokenizer.h Normal file
View File

@ -0,0 +1,58 @@
#ifndef PARCEL_TOKENIZER_H
#define PARCEL_TOKENIZER_H
#include <utility.h>
typedef struct pac_token pac_token_s;
typedef struct pac_tlist pac_tlist_s; // Token List
typedef enum
{
PAC_TOKEN_STRAY = 0,
PAC_TOKEN_WORD,
PAC_TOKEN_KEYWORD_TRUE,
PAC_TOKEN_KEYWORD_FALSE,
PAC_TOKEN_KEYWORD_ALPHA,
PAC_TOKEN_KEYWORD_WORD,
PAC_TOKEN_KEYWORD_INTEGER,
PAC_TOKEN_LIT_STRING,
PAC_TOKEN_LIT_RUNE, // TODO
PAC_TOKEN_LIT_INTEGER, // TODO
PAC_TOKEN_SIGN_OPEN_TAG,
PAC_TOKEN_SIGN_CLOSE_TAG,
PAC_TOKEN_SIGN_EQUALS,
PAC_TOKEN_SIGN_COLON,
PAC_TOKEN_SIGN_COMMA,
PAC_TOKEN_SIGN_HYPHEN,
PAC_TOKEN_SIGN_UNDERSCORE,
PAC_TOKEN_SIGN_VERTICAL_BAR,
PAC_TOKEN_SIGN_SEMICOLON
} pac_token_e;
struct pac_token
{
pac_token_e type;
usz_t offset;
usz_t length;
};
struct pac_tlist
{
char *source;
usz_t num_tokens;
pac_token_s *tokens;
// cursor: An index into the 'tokens'-array; used in later stages.
usz_t cursor;
};
pac_token_e pac_word_to_token_type (char *word, usz_t length);
pac_tlist_s pac_tokenize_grammar (char *source, usz_t len_source);
char * pac_stringify_token_type (pac_token_e type);
void pac_display_tlist (pac_tlist_s list);
#endif

44
code/inc/utility.h Normal file
View File

@ -0,0 +1,44 @@
#ifndef TN_UTIL_TYPES_H
#define TN_UTIL_TYPES_H
typedef signed char i8_t;
typedef signed short i16_t;
typedef signed int i32_t;
typedef signed long i64_t;
typedef unsigned char u8_t;
typedef unsigned short u16_t;
typedef unsigned int u32_t;
typedef unsigned long u64_t;
typedef float f32_t;
typedef double f64_t;
typedef u32_t rune_t;
typedef u8_t bool_t;
#ifdef __TN_OLD_PROCESSOR__
typedef u32_t usz_t;
typedef i32_t isz_t;
#else
typedef u64_t usz_t;
typedef i64_t isz_t;
#endif
#define TRUE (1)
#define FALSE (0)
#define NULL ((void *) 0)
bool_t pac_rune_is_lower_letter (rune_t rune);
bool_t pac_rune_is_upper_letter (rune_t rune);
bool_t pac_rune_is_letter (rune_t rune);
bool_t pac_rune_is_digit (rune_t rune);
bool_t pac_rune_is_blank (rune_t rune);
bool_t pac_rune_is_sign (rune_t rune);
#endif // Include Guard (TN_UTIL_TYPES_H)

206
code/src/ast.c Normal file
View File

@ -0,0 +1,206 @@
#include <parcel.h>
#include <ast.h>
#include <tokenizer.h>
#include <string.h>
#include <stdio.h>
#include <stdlib.h>
#define CURRENT_STRING (&tlist->source[tlist->tokens[tlist->cursor].offset])
#define CURRENT (tlist->tokens[tlist->cursor])
#define SKIP_TOKEN ++tlist->cursor
#define TOKEN_AT(index) (tlist->tokens[index])
#define END_REACHED (tlist->cursor >= tlist->num_tokens)
i32_t pac_grow_reference(pac_tlist_s *tlist, pac_ast_reference_s *reference)
{
}
i32_t pac_grow_item(pac_tlist_s *tlist, pac_ast_item_s *item)
{
item->is_literal = FALSE;
memset(item, 0x00, sizeof(pac_ast_item_s));
switch(CURRENT.type)
{
case PAC_TOKEN_LIT_STRING:
{
item->is_literal = TRUE;
item->data.literal.length = CURRENT.length;
item->data.literal.string = malloc(item->data.literal.length + 1);
memcpy(item->data.literal.string, CURRENT_STRING, CURRENT.length);
item->data.literal.string[item->data.literal.length] = 0x00;
SKIP_TOKEN;
} return 1;
case PAC_TOKEN_KEYWORD_WORD:
{
item->data.outline = PAC_OUTLINE_WORD;
SKIP_TOKEN;
} return 1;
case PAC_TOKEN_KEYWORD_INTEGER:
{
item->data.outline = PAC_OUTLINE_INTEGER;
SKIP_TOKEN;
} return 1;
}
return -1;
}
i32_t pac_grow_variant(pac_tlist_s *tlist, pac_ast_variant_s *variant)
{
usz_t start_index = tlist->cursor;
memset(variant, 0x00, sizeof(pac_ast_variant_s));
usz_t items_capacity = 8;
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
while(!END_REACHED)
{
if(variant->num_items >= items_capacity)
{
items_capacity *= 2;
variant->items = calloc(sizeof(pac_ast_item_s), items_capacity);
}
i32_t success = pac_grow_item(tlist, &variant->items[variant->num_items]);
++variant->num_items;
if(success < 0)
return success - 1;
if(CURRENT.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
return tlist->cursor - start_index;
if(CURRENT.type == PAC_TOKEN_SIGN_SEMICOLON)
return tlist->cursor - start_index;
if(CURRENT.type != PAC_TOKEN_SIGN_COMMA)
return -1;
SKIP_TOKEN;
}
return -1;
}
i32_t pac_grow_rule(pac_tlist_s *tlist, pac_ast_rule_s *rule)
{
memset(rule, 0x00, sizeof(pac_ast_rule_s));
// Parse the header
usz_t start_index = tlist->cursor;
if(CURRENT.type != PAC_TOKEN_SIGN_OPEN_TAG)
return -1;
SKIP_TOKEN;
if(CURRENT.type != PAC_TOKEN_WORD)
{
puts("A rule name must be a single word!");
return -1;
}
usz_t len_name = CURRENT.length;
usz_t name_start = CURRENT.offset;
SKIP_TOKEN;
if(CURRENT.type != PAC_TOKEN_SIGN_CLOSE_TAG)
{
puts("Missing Tag closing sign!");
return -1;
}
SKIP_TOKEN;
if(CURRENT.type != PAC_TOKEN_SIGN_EQUALS)
return -1;
SKIP_TOKEN;
// Parse all variants
rule->name = malloc(len_name + 1);
memcpy(rule->name, &tlist->source[name_start], len_name);
rule->name[len_name] = 0;
usz_t variants_capacity = 4;
rule->variants = malloc(sizeof(pac_ast_variant_s) * variants_capacity);
while(!END_REACHED)
{
if(rule->num_variants >= variants_capacity)
{
variants_capacity *= 2;
rule->variants = realloc(rule->variants, sizeof(pac_ast_variant_s) * variants_capacity);
}
i32_t success = pac_grow_variant(tlist, &rule->variants[rule->num_variants]);
++rule->num_variants;
if(success < 0)
{
printf("Failed parsing variant %u of rule '%s'. ");
while(!END_REACHED)
{
if(CURRENT.type == PAC_TOKEN_SIGN_VERTICAL_BAR)
{
printf("Continuing with next variant.\n");
break;
}
if(CURRENT.type == PAC_TOKEN_SIGN_SEMICOLON)
{
printf("Continuing with next rule.\n");
SKIP_TOKEN;
return 2;
}
SKIP_TOKEN;
}
}
if(CURRENT.type == PAC_TOKEN_SIGN_SEMICOLON)
{
SKIP_TOKEN;
return tlist->cursor - start_index;
}
if(CURRENT.type != PAC_TOKEN_SIGN_VERTICAL_BAR)
{
return -1;
}
SKIP_TOKEN;
}
return -1;
}
pac_ast_s pac_grow_ast(pac_tlist_s tokens)
{
usz_t rules_capacity = 32;
pac_ast_s ast;
ast.num_rules = 0;
ast.rules = malloc(sizeof(pac_ast_rule_s) * rules_capacity);
while(tokens.cursor < tokens.num_tokens)
{
if(ast.num_rules >= rules_capacity)
{
rules_capacity *= 2;
ast.rules = realloc(ast.rules, sizeof(pac_ast_rule_s) * rules_capacity);
}
int success = pac_grow_rule(&tokens, &ast.rules[ast.num_rules]);
if(success < 0)
{
printf("Failed parsing a rule at index: %u!\n", tokens.cursor);
}
++ast.num_rules;
}
return ast;
}
pac_grammar_s pac_convert_grammar(char *source)
{
usz_t len_source = strlen(source);
pac_tlist_s tokens = pac_tokenize_grammar(source, len_source);
pac_display_tlist(tokens);
pac_ast_s ast = pac_grow_ast(tokens);
pac_grammar_s grammar;
return grammar;
}

43
code/src/main.c Normal file
View File

@ -0,0 +1,43 @@
#include <parcel.h>
#include <ast.h>
#include <stdio.h>
#include <stdlib.h>
char * load_file(char *path)
{
FILE *file = fopen(path, "r");
if(file == NULL)
{
printf("Failed opening file at '%s'\n", path);
return NULL;
}
fseek(file, 0, SEEK_END);
long length = ftell(file);
fseek(file, 0, SEEK_SET);
char *content = malloc(length+1);
content[length] = 0x00;
fread(content, 1, length, file);
fclose(file);
return content;
}
int main(int argc, char **argv)
{
if(argc != 2)
{
printf("Usage: %s <filename>\n", argv[0]);
return -1;
}
char *source = load_file(argv[1]);
if(source == NULL)
{
puts("Stopping due to previous error!");
return -2;
}
pac_convert_grammar(source);
return 0;
}

77
code/src/runes.c Normal file
View File

@ -0,0 +1,77 @@
#include <utility.h>
bool_t pac_rune_is_sign_of_block_1(rune_t rune)
{
if(rune < 0x21) return FALSE;
if(rune > 0x2f) return FALSE;
return TRUE;
}
bool_t pac_rune_is_sign_of_block_2(rune_t rune)
{
if(rune < 0x3a) return FALSE;
if(rune > 0x40) return FALSE;
return TRUE;
}
bool_t pac_rune_is_sign_of_block_3(rune_t rune)
{
if(rune < 0x5b) return FALSE;
if(rune > 0x60) return FALSE;
return TRUE;
}
bool_t pac_rune_is_sign_of_block_4(rune_t rune)
{
if(rune < 0x7b) return FALSE;
if(rune > 0x7e) return FALSE;
return TRUE;
}
bool_t pac_rune_is_lower_letter(rune_t rune)
{
if(rune < 'a') return FALSE;
if(rune > 'z') return FALSE;
return TRUE;
}
bool_t pac_rune_is_upper_letter(rune_t rune)
{
if(rune < 'A') return FALSE;
if(rune > 'Z') return FALSE;
return TRUE;
}
bool_t pac_rune_is_letter(rune_t rune)
{
if(pac_rune_is_lower_letter(rune)) return TRUE;
if(pac_rune_is_upper_letter(rune)) return TRUE;
return FALSE;
}
bool_t pac_rune_is_digit(rune_t rune)
{
if(rune < '0') return FALSE;
if(rune > '9') return FALSE;
return TRUE;
}
bool_t pac_rune_is_blank(rune_t rune)
{
if(rune == ' ') return TRUE;
if(rune == '\t') return TRUE;
return FALSE;
}
bool_t pac_rune_is_sign(rune_t rune)
{
if(pac_rune_is_sign_of_block_1(rune)) return TRUE;
if(pac_rune_is_sign_of_block_2(rune)) return TRUE;
if(pac_rune_is_sign_of_block_3(rune)) return TRUE;
if(pac_rune_is_sign_of_block_4(rune)) return TRUE;
return FALSE;
}

228
code/src/tokenizer.c Normal file
View File

@ -0,0 +1,228 @@
#include <tokenizer.h>
#include <string.h>
#include <stdlib.h>
#include <stdio.h>
pac_token_e pac_convert_word_to_token_type(char *word, usz_t length)
{
if(length == 4)
{
if(!memcmp(word, "true", 4)) return PAC_TOKEN_KEYWORD_TRUE;
if(!memcmp(word, "word", 4)) return PAC_TOKEN_KEYWORD_WORD;
return PAC_TOKEN_WORD;
}
if(length == 5)
{
if(!memcmp(word, "false", 5)) return PAC_TOKEN_KEYWORD_FALSE;
if(!memcmp(word, "alpha", 5)) return PAC_TOKEN_KEYWORD_ALPHA;
return PAC_TOKEN_WORD;
}
if(length == 7)
{
if(!memcmp(word, "integer", 7)) return PAC_TOKEN_KEYWORD_INTEGER;
return PAC_TOKEN_WORD;
}
return PAC_TOKEN_WORD;
}
pac_token_e pac_convert_sign_to_token_type(rune_t sign)
{
switch(sign)
{
case '<': return PAC_TOKEN_SIGN_OPEN_TAG;
case '>': return PAC_TOKEN_SIGN_CLOSE_TAG;
case '=': return PAC_TOKEN_SIGN_EQUALS;
case ':': return PAC_TOKEN_SIGN_COLON;
case ',': return PAC_TOKEN_SIGN_COMMA;
case '-': return PAC_TOKEN_SIGN_HYPHEN;
case '_': return PAC_TOKEN_SIGN_UNDERSCORE;
case '|': return PAC_TOKEN_SIGN_VERTICAL_BAR;
case ';': return PAC_TOKEN_SIGN_SEMICOLON;
}
return PAC_TOKEN_STRAY;
}
pac_tlist_s pac_tokenize_grammar(char *source, usz_t len_source)
{
usz_t tokens_capacity = 1024;
pac_tlist_s list;
list.cursor = 0;
list.source = source;
list.num_tokens = 0;
list.tokens = calloc(sizeof(pac_token_s), tokens_capacity);
for(usz_t offset = 0; offset < len_source; ++offset)
{
// The subject of the current iteration of this loop;
// the rune which is being looked at
rune_t subject = source[offset];
if(list.num_tokens >= tokens_capacity)
{
tokens_capacity *= 2;
list.tokens = realloc(list.tokens, sizeof(pac_token_s) * tokens_capacity);
}
if(pac_rune_is_blank(subject))
continue;
if(subject == '"')
{
usz_t start_offset = offset;
while(offset < len_source)
{
if(subject == '\\')
++offset;
++offset;
subject = source[offset];
if(subject == '"')
break;
}
pac_token_s token;
token.type = PAC_TOKEN_LIT_STRING;
token.offset = start_offset + 1; // +1 for skipping the starting quotation mark
token.length = (offset - start_offset) - 1; // -1 for leaving the ending quotation mark out
list.tokens[list.num_tokens] = token;
++list.num_tokens;
continue;
}
if(pac_rune_is_letter(subject) || (subject == '_'))
{
usz_t start_offset = offset;
while(offset < len_source)
{
++offset;
subject = source[offset];
if(!pac_rune_is_letter(subject) && (subject != '_'))
{
break;
}
}
pac_token_s token;
token.offset = start_offset;
token.length = offset - start_offset;
token.type = pac_convert_word_to_token_type(&source[start_offset], token.length);
--offset; // The for() - header will skip to the character after the word.
list.tokens[list.num_tokens] = token;
++list.num_tokens;
continue;
}
if(subject == '#')
{
usz_t offset_copy = offset;
++offset_copy;
if(offset_copy < len_source)
{
rune_t second_sign = source[offset_copy];
if(second_sign == '#')
{
while(offset_copy < len_source)
{
if(source[offset_copy] == '\n')
{
break;
}
++offset_copy;
}
}
else if(second_sign == '[')
{
while(offset_copy < len_source)
{
if(source[offset_copy] == ']')
{
break;
}
// If there's an escaped character here, do one jump more
if(source[offset_copy] == '\\')
++offset_copy;
++offset_copy;
}
}
offset = offset_copy;
}
continue;
}
if(pac_rune_is_sign(subject))
{
pac_token_s token;
token.type = pac_convert_sign_to_token_type(subject);
token.offset = offset;
token.length = 1;
list.tokens[list.num_tokens] = token;
++list.num_tokens;
continue;
}
}
return list;
}
char * pac_stringify_token_type(pac_token_e type)
{
switch(type)
{
case PAC_TOKEN_STRAY: return " - ";
case PAC_TOKEN_WORD: return "Word";
case PAC_TOKEN_KEYWORD_TRUE: return "Keyword: \"true\"";
case PAC_TOKEN_KEYWORD_FALSE: return "Keyword: \"false\"";
case PAC_TOKEN_KEYWORD_ALPHA: return "Keyword: \"alpha\"";
case PAC_TOKEN_KEYWORD_WORD: return "Keyword: \"word\"";
case PAC_TOKEN_KEYWORD_INTEGER: return "Keyword: \"integer\"";
case PAC_TOKEN_SIGN_OPEN_TAG: return "Sign: <";
case PAC_TOKEN_SIGN_CLOSE_TAG: return "Sign: >";
case PAC_TOKEN_SIGN_EQUALS: return "Sign: =";
case PAC_TOKEN_SIGN_COLON: return "Sign: :";
case PAC_TOKEN_SIGN_COMMA: return "Sign: ,";
case PAC_TOKEN_SIGN_HYPHEN: return "Sign: -";
case PAC_TOKEN_SIGN_UNDERSCORE: return "Sign: _";
case PAC_TOKEN_SIGN_VERTICAL_BAR: return "Sign: |";
case PAC_TOKEN_SIGN_SEMICOLON: return "Sign: ;";
case PAC_TOKEN_LIT_STRING: return "String";
case PAC_TOKEN_LIT_RUNE: return "Rune";
case PAC_TOKEN_LIT_INTEGER: return "Integer";
}
return "Invalid";
}
char pac_spaces[256];
char * pac_create_spaces_for_indent(u8_t count)
{
memset(pac_spaces, ' ', 256);
pac_spaces[count] = 0x00;
return &pac_spaces[0];
}
void pac_display_tlist(pac_tlist_s list)
{
printf("Displaying %lu Tokens:\n", list.num_tokens);
for(usz_t index = 0; index < list.num_tokens; ++index)
{
pac_token_s token = list.tokens[index];
char content[token.length+1];
memcpy(&content[0], &list.source[token.offset], token.length);
content[token.length] = 0;
char *token_type_string = pac_stringify_token_type(token.type);
char *token_type_indent = pac_create_spaces_for_indent(24 - strlen(token_type_string));
printf("[%s]:%s %s\n",
token_type_string,
token_type_indent,
content
);
}
}

View File

@ -0,0 +1,2 @@
<root> = integer, "Something", integer | integer, word;