Added initial code; tokenizer/token-display, main function and other boilerplate like the build script

This commit is contained in:
Eric-Paul Ickhorn 2024-02-11 07:10:01 +01:00
commit 1ebf0085aa
11 changed files with 607 additions and 0 deletions

13
.gitignore vendored Normal file
View File

@ -0,0 +1,13 @@
# Complete folders that are unwanted in commits
*.build/
*.local/
*.vscode/
# Machine Code
*.a
*.dll
*.elf
*.exe
*.so

271
action.bash Executable file
View File

@ -0,0 +1,271 @@
#!/usr/bin/env bash
cd $(dirname "$(pwd)/$0")
REPOSITORY_FOLDER=$(pwd)
PROJECT_NAME="mach"
DEBUG_CC_OPTIONS="-g3 -Wall -Wextra -Wpedantic"
RELEASE_CC_OPTIONS="-O3 -Wall"
MAIN_OBJECTS_FOLDER="$REPOSITORY_FOLDER/.build/objects"
CONFIG_FILE_INCLUDE_PATHS="build-config/include_paths.txt"
DEFAULT_TEST_INCLUDE_PATHS="
-I .build/depends/libRR/Core/core/exports/
-I .build/depends/libRR/Core/platform/exports/
-I core/exports/
-I core/inc-c/"
DEFAULT_TEST_LINKAGE_PATHS="
$REPOSITORY_FOLDER/.build/librr-core.a
$REPOSITORY_FOLDER/.build/librr-platform.a"
function clone_dependencies {
echo "================ Cloning Dependencies! ================"
mkdir -p .build/depends/libRR
cd .build/depends/libRR/
if [[ -d "Core" ]]
then
rm -rf Core
fi
git clone --depth=1 https://git.nerdcult.net/libRR/Core/
cd "$REPOSITORY_FOLDER"
}
function build_dependencies {
echo "================ Building Dependencies! ================"
mkdir -p "$REPOSITORY_FOLDER/.build/output"
cd .build/depends/libRR/Core/
bash build.bash release
cp .build/librr-core.a "$REPOSITORY_FOLDER/.build"
cp .build/librr-platform.a "$REPOSITORY_FOLDER/.build"
cd "$REPOSITORY_FOLDER"
}
function get_include_path_configuration {
MODULE_NAME=$1
INCLUDE_CONFIG_PATH="$REPOSITORY_FOLDER/$MODULE_NAME/$CONFIG_FILE_INCLUDE_PATHS"
INCLUDE_STATEMENTS="-I $REPOSITORY_FOLDER/$MODULE_NAME/inc-c/"
if [[ ! -f $INCLUDE_CONFIG_PATH ]]
then
return
fi
for LINE in $(cat $INCLUDE_CONFIG_PATH)
do
INCLUDE_STATEMENTS="$INCLUDE_STATEMENTS -I $REPOSITORY_FOLDER/$LINE"
done
}
function generate_object_name {
INPUT_NAME=$1
NUM_SUBPATHS=$(echo $INPUT_NAME | tr -cd "/" | wc -c)
SANITIZED_INPUT_NAME=$(echo $INPUT_NAME | tr "/" "_")
OBJECT_NAME="$NUM_SUBPATHS-$SANITIZED_INPUT_NAME.o"
}
function compile_module_c_sources {
MODULE_NAME=$1
get_include_path_configuration $MODULE_NAME
MODULE_SOURCE_PATH="$REPOSITORY_FOLDER/$MODULE_NAME/src-c"
MODULE_OBJECTS_FOLDER="$MAIN_OBJECTS_FOLDER/$MODULE_NAME"
rm -r $MODULE_OBJECTS_FOLDER
mkdir -p $MODULE_OBJECTS_FOLDER
# Loop through all files in the 'src-c'-folder and hand them over to GCC
cd $MODULE_SOURCE_PATH
MODULE_SOURCES=$(find . -mindepth 1)
for SOURCE_FOLDER_ITEM in $MODULE_SOURCES
do
# Cut away the dot-slash given by 'find' as abbrevation for the working directory
RELATIVE_SOURCE_PATH=$(echo $SOURCE_FOLDER_ITEM | cut -c "3-")
# If this folder item is a folder, it must be created as an
# output-folder for the object files to be placed in
if [[ -d $RELATIVE_SOURCE_PATH ]]
then
mkdir -p "$MODULE_OBJECTS_FOLDER/$RELATIVE_SOURCE_PATH"
continue
fi
# Check if this is a C source file by checking the last 2 characters (the ending),
# and if it isn't, continue with the next file.
LEN_SOURCE_FILE_NAME=${#RELATIVE_SOURCE_PATH}
let PENULTIMATE_OFFSET=$LEN_SOURCE_FILE_NAME-1
LAST_2_CHARACTERS=$(echo $RELATIVE_SOURCE_PATH | cut -c "$PENULTIMATE_OFFSET-")
if [[ $LAST_2_CHARACTERS != ".c" ]]; then continue; fi
# Status Message
echo "==> File: $RELATIVE_SOURCE_PATH"
# Finally, call GCC to compile the C-file and let it place the file in the
# objects folder or one of the possible subfolders which now could exist.
generate_object_name $RELATIVE_SOURCE_PATH
gcc -c $CC_OPTIONS -o \
"$MODULE_OBJECTS_FOLDER/$OBJECT_NAME" \
"$MODULE_SOURCE_PATH/$RELATIVE_SOURCE_PATH" \
$INCLUDE_STATEMENTS
done
ar -rvs $REPOSITORY_FOLDER/.build/$PROJECT_NAME-$MODULE_NAME.a $MODULE_OBJECTS_FOLDER/*
cd $REPOSITORY_FOLDER
}
function get_test_linkage_path_configuration() {
TEST_PATH=$1
LINKAGE_PATHS=$DEFAULT_TEST_LINKAGE_PATHS
if [[ -f "$TEST_PATH/linkage_paths.txt" ]]
then
for LINKAGE_ITEM in $(cat "$TEST_PATH/linkage_paths.txt")
do
LINKAGE_PATHS="$LINKAGE_PATHS $REPOSITORY_FOLDER/$LINKAGE_ITEM"
done
fi
}
function get_test_include_path_configuration() {
TEST_PATH=$1
INCLUDE_CONFIG_PATH="$TEST_PATH/include_paths.txt"
INCLUDE_STATEMENTS="$DEFAULT_TEST_INCLUDE_PATHS -I $TEST_PATH/inc-c/"
if [[ ! -f $INCLUDE_CONFIG_PATH ]]
then
return
fi
for LINE in $(cat $INCLUDE_CONFIG_PATH)
do
INCLUDE_STATEMENTS="$INCLUDE_STATEMENTS -I $REPOSITORY_FOLDER/$LINE"
done
}
function compile_single_test() {
TEST_PATH=$1
TEST_NAME=$(basename $TEST_PATH)
echo "Compiling Test: $TEST_NAME"
# TODO: As a small improvement, the tests could be able to have multiple sub-folders for sources.
get_test_include_path_configuration $TEST_PATH
get_test_linkage_path_configuration $TEST_PATH
gcc $CC_OPTIONS -o $TEST_PATH/$TEST_NAME.elf $TEST_PATH/*.c $LINKAGE_PATHS $INCLUDE_STATEMENTS
}
function compile_all_tests_of_module() {
MODULE_NAME=$1
echo "================================================================"
echo "COMPILING ALL TESTS OF MODULE: '$MODULE_NAME'."
echo " "
TEST_PATH_LIST_PATH="$REPOSITORY_FOLDER/$MODULE_NAME/build-config/tests.txt"
if [[ ! -f $TEST_PATH_LIST_PATH ]]
then
echo "Couldn't find list of tests for module '$MODULE_NAME'. Skipping."
return
fi
for RELATIVE_TEST_PATH in $(cat $TEST_PATH_LIST_PATH)
do
TEST_PATH=$REPOSITORY_FOLDER/$MODULE_NAME/$RELATIVE_TEST_PATH
compile_single_test $TEST_PATH
done
}
function compile_all_tests_of_all_modules() {
if [[ ! -f "build-config/modules.txt" ]]
then
echo "Failed compiling tests: Couldn't find 'build-config/modules.txt"
return
fi
for MODULE in $(cat "build-config/modules.txt")
do
compile_all_tests_of_module $MODULE
done
}
function compile_all_sources_of_all_modules() {
if [[ ! -f "build-config/modules.txt" ]]
then
echo "Failed compiling sources: Couldn't find 'build-config/modules.txt"
return
fi
for MODULE in $(cat "build-config/modules.txt")
do
compile_module_c_sources $MODULE
done
}
function build_in_debug_profile {
echo "================ Building in Debug Profile! ================"
CC_OPTIONS=$DEBUG_CC_OPTIONS
compile_all_sources_of_all_modules
}
function build_in_release_profile {
echo "================ Building in Release Profile! ================"
CC_OPTIONS=$RELEASE_CC_OPTIONS
compile_all_sources_of_all_modules
}
function link_modules {
gcc -o mach.elf .build/*.a .build/*.a
}
case $1 in
"d" | "dbg" | "debug")
build_in_debug_profile
link_modules
;;
"r" | "release")
build_in_release_profile
link_modules
;;
"c" | "clone-dependencies")
clone_dependencies
;;
"b" | "build-dependencies")
build_dependencies
;;
"t" | "build-tests")
compile_all_tests_of_all_modules
;;
"h" | "help")
echo "Known Actions:"
echo "[ d | dbg | debug ]: Build in the debug profile; build with debug symbols."
echo "[ r | release ]: Build for a release, with speed optimizations."
echo "[ c | clone-dependencies]: Clone the dependencies using Git (network required)."
echo "[ b | build-dependencies]: Build the dependencies (which must have been cloned first!)."
echo "[ h | help ]: Display this message."
echo ""
echo "Note: Before being able to build (debug-profile / release-profile), cloning and building the dependencies is required!"
;;
*)
echo "Unknown action, try '$0 help' or '$0 h'."
;;
esac

1
build-config/modules.txt Normal file
View File

@ -0,0 +1 @@
builder

View File

@ -0,0 +1,2 @@
.build/depends/libRR/Core/core/exports
.build/depends/libRR/Core/platform/exports

18
builder/inc-c/mach.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef MACH_H
#define MACH_H
#include <librr/types.h>
typedef struct MachScript MachScript;
struct MachScript
{
usz_t num_compilation_units;
char **compilation_unit_paths;
};
i32_t mach_read_script(const char *path, MachScript *out_script);
#endif // MACH_H

47
builder/inc-c/parser.h Normal file
View File

@ -0,0 +1,47 @@
#ifndef MACH_PARSER_H
#define MACH_PARSER_H
#include <librr/types.h>
#include <librr/runes.h>
typedef struct MachToken MachToken;
typedef struct MachTokenStream MachTokenStream;
typedef enum
{
MACH_TOKEN_WORD,
MACH_TOKEN_INTEGER,
MACH_TOKEN_STRING,
MACH_TOKEN_SPECIAL_SIGN,
MACH_TOKEN_STREAM_END,
} MachTokenType;
struct MachTokenStream
{
usz_t len_source;
char *source;
usz_t num_tokens;
MachToken *tokens;
};
struct MachToken
{
u32_t offset;
u32_t length;
MachTokenType type;
union {
rr_ascii_sign_e sign_type;
char *processed_string;
} data;
};
i32_t mach_tokenize(MachTokenStream *stream);
void mach_display_token_stream(MachTokenStream *stream);
#endif // MACH_PARSER_H

View File

@ -0,0 +1,43 @@
#include <mach.h>
#include <parser.h>
#include <stdio.h>
#include <stdlib.h>
i32_t mach_parse_script(char *string, usz_t len_string, MachScript *out_script)
{
MachTokenStream token_stream;
token_stream.len_source = len_string;
token_stream.source = string;
i32_t tokenization_status = mach_tokenize(&token_stream);
if(tokenization_status < 0)
return tokenization_status - 1024;
mach_display_token_stream(&token_stream);
free(token_stream.tokens);
return 0;
}
i32_t mach_read_script(const char *path, MachScript *out_script)
{
FILE *script_file = fopen(path, "r");
if(script_file == NULL)
return -1;
fseek(script_file, 0, SEEK_END);
usz_t len_script_string = ftell(script_file);
fseek(script_file, 0, SEEK_SET);
char *script_string = malloc(len_script_string + 1);
fread(script_string, 1, len_script_string, script_file);
fclose(script_file);
i32_t parse_status = mach_parse_script(script_string, len_script_string, out_script);
free(script_string);
if(parse_status < 0)
return parse_status - 1024;
return parse_status;
}

27
builder/src-c/main.c Normal file
View File

@ -0,0 +1,27 @@
#include <mach.h>
#include <stdio.h>
int main(int argc, char **argv)
{
char *mach_config_path = NULL;
if(argc == 1)
{
mach_config_path = "./MachScript.mach";
}
if(argc == 2)
{
mach_config_path = argv[1];
}
if(mach_config_path == NULL)
{
printf("Usage: %s <config-path (empty for Mach.cfg)>", argv[1]);
return -1;
}
MachScript script;
mach_read_script(mach_config_path, &script);
return 0;
}

4
builder/src-c/object.c Normal file
View File

@ -0,0 +1,4 @@
#include <parser.h>

View File

@ -0,0 +1,20 @@
#include <parser.h>
#include <string.h>
#include <stdio.h>
void mach_display_token_stream(MachTokenStream *stream)
{
usz_t token_index = 0;
while(token_index < stream->num_tokens)
{
MachToken token = stream->tokens[token_index];
char token_string[token.length + 1];
memcpy(token_string, &stream->source[token.offset], token.length);
token_string[token.length] = 0;
printf("#%-4d %s\n", (int) token_index, token_string);
++token_index;
}
}

161
builder/src-c/tokenizer.c Normal file
View File

@ -0,0 +1,161 @@
#include <parser.h>
#include <stdio.h>
#include <stdlib.h>
i32_t mach_tokenize(MachTokenStream *stream)
{
usz_t tokens_capacity = 2048;
stream->num_tokens = 0;
stream->tokens = calloc(sizeof(MachToken), tokens_capacity);
usz_t offset = 0;
while(offset < stream->len_source)
{
// There must always be one more after the last one for the STREAM_END token.
if((stream->num_tokens + 1) >= tokens_capacity)
{
tokens_capacity *= 2;
stream->tokens = realloc(stream->tokens, sizeof(MachToken) * tokens_capacity);
}
usz_t token_start = offset;
usz_t len_token = 0;
rune_t rune = rr_extract_utf8(stream->source, offset, &len_token);
if(len_token == 0)
{
// TODO: A log-entry because of invalid UTF-8 should be written here.
return -1;
}
offset += len_token;
if(rr_rune_is_letter(rune))
{
while(offset < stream->len_source)
{
len_token = 0;
rune = rr_extract_utf8(stream->source, offset, &len_token);
if(!rr_rune_is_letter(rune) && (rune != '_'))
break;
offset += len_token;
}
MachToken token;
token.offset = token_start;
token.length = offset - token_start;
token.type = MACH_TOKEN_WORD;
token.data.sign_type = rr_rune_to_ascii_sign(rune);
stream->tokens[stream->num_tokens++] = token;
continue;
}
if(rr_rune_is_digit(rune))
{
while(offset < stream->len_source)
{
rune = rr_extract_utf8(stream->source, offset, &offset);
if(!rr_rune_is_digit(rune))
break;
}
MachToken token;
token.offset = token_start;
token.length = offset - token_start;
token.type = MACH_TOKEN_INTEGER;
token.data.sign_type = rr_rune_to_ascii_sign(rune);
stream->tokens[stream->num_tokens++] = token;
continue;
}
if(rune == '#')
{
usz_t old_offset = offset;
rune_t following_rune = rr_extract_utf8(stream->source, offset, &offset);
if(following_rune == '#')
{
while(offset < stream->len_source)
{
following_rune = rr_extract_utf8(stream->source, offset, &offset);
if(following_rune == '\n')
break;
}
continue;
}
else if(following_rune == '[')
{
// Count how many brackets are needed to end this comment
usz_t num_opening_brackets = 1;
while(offset < stream->len_source)
{
following_rune = rr_extract_utf8(stream->source, offset, &offset);
if(following_rune != '[')
break;
++num_opening_brackets;
}
// Find the end of the comment
while(offset < stream->len_source)
{
following_rune = rr_extract_utf8(stream->source, offset, &offset);
usz_t num_closing_brackets = 0;
while(following_rune == ']')
{
++num_closing_brackets;
if(num_closing_brackets == num_opening_brackets)
break;
following_rune = rr_extract_utf8(stream->source, offset, &offset);
}
}
continue;
}
offset = old_offset;
}
if(rune == '"')
{
bool_t faulty = FALSE;
while(offset < stream->len_source)
{
rune = rr_extract_utf8(stream->source, offset, &offset);
if(rune == '"')
break;
if(rune == '\n')
{
faulty = TRUE;
break;
}
// If this is a backslash, skip the next character
if(rune == '\\')
rr_extract_utf8(stream->source, offset, &offset);
}
if(faulty)
{
// TODO: A log-entry because of an invalid string should be written here
return -2;
}
MachToken token;
token.offset = token_start;
token.length = offset - token_start;
token.type = MACH_TOKEN_STRING;
token.data.processed_string = NULL; // !TODO!: Postprocess escape sequences
stream->tokens[stream->num_tokens++] = token;
continue;
}
if(rr_rune_is_ascii_special(rune))
{
MachToken token;
token.offset = token_start;
token.length = offset - token_start;
token.type = MACH_TOKEN_SPECIAL_SIGN;
token.data.sign_type = rr_rune_to_ascii_sign(rune);
stream->tokens[stream->num_tokens++] = token;
continue;
}
}
return 0;
}