From 1ebf0085aa0a18913e1c92e0836e7bffef52a452 Mon Sep 17 00:00:00 2001 From: Eric-Paul Ickhorn Date: Sun, 11 Feb 2024 07:10:01 +0100 Subject: [PATCH] Added initial code; tokenizer/token-display, main function and other boilerplate like the build script --- .gitignore | 13 ++ action.bash | 271 +++++++++++++++++++++++++ build-config/modules.txt | 1 + builder/build-config/include_paths.txt | 2 + builder/inc-c/mach.h | 18 ++ builder/inc-c/parser.h | 47 +++++ builder/src-c/config-reader.c | 43 ++++ builder/src-c/main.c | 27 +++ builder/src-c/object.c | 4 + builder/src-c/token-displayer.c | 20 ++ builder/src-c/tokenizer.c | 161 +++++++++++++++ 11 files changed, 607 insertions(+) create mode 100644 .gitignore create mode 100755 action.bash create mode 100644 build-config/modules.txt create mode 100644 builder/build-config/include_paths.txt create mode 100644 builder/inc-c/mach.h create mode 100644 builder/inc-c/parser.h create mode 100644 builder/src-c/config-reader.c create mode 100644 builder/src-c/main.c create mode 100644 builder/src-c/object.c create mode 100644 builder/src-c/token-displayer.c create mode 100644 builder/src-c/tokenizer.c diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c23c1e6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ + +# Complete folders that are unwanted in commits +*.build/ +*.local/ +*.vscode/ + +# Machine Code +*.a +*.dll +*.elf +*.exe +*.so + diff --git a/action.bash b/action.bash new file mode 100755 index 0000000..474a94d --- /dev/null +++ b/action.bash @@ -0,0 +1,271 @@ +#!/usr/bin/env bash + +cd $(dirname "$(pwd)/$0") +REPOSITORY_FOLDER=$(pwd) + + +PROJECT_NAME="mach" +DEBUG_CC_OPTIONS="-g3 -Wall -Wextra -Wpedantic" +RELEASE_CC_OPTIONS="-O3 -Wall" + +MAIN_OBJECTS_FOLDER="$REPOSITORY_FOLDER/.build/objects" +CONFIG_FILE_INCLUDE_PATHS="build-config/include_paths.txt" + +DEFAULT_TEST_INCLUDE_PATHS=" +-I .build/depends/libRR/Core/core/exports/ +-I .build/depends/libRR/Core/platform/exports/ +-I core/exports/ +-I core/inc-c/" + +DEFAULT_TEST_LINKAGE_PATHS=" +$REPOSITORY_FOLDER/.build/librr-core.a +$REPOSITORY_FOLDER/.build/librr-platform.a" + + +function clone_dependencies { + echo "================ Cloning Dependencies! ================" + mkdir -p .build/depends/libRR + cd .build/depends/libRR/ + if [[ -d "Core" ]] + then + rm -rf Core + fi + git clone --depth=1 https://git.nerdcult.net/libRR/Core/ + + cd "$REPOSITORY_FOLDER" +} + +function build_dependencies { + echo "================ Building Dependencies! ================" + + mkdir -p "$REPOSITORY_FOLDER/.build/output" + + cd .build/depends/libRR/Core/ + bash build.bash release + cp .build/librr-core.a "$REPOSITORY_FOLDER/.build" + cp .build/librr-platform.a "$REPOSITORY_FOLDER/.build" + + cd "$REPOSITORY_FOLDER" +} + +function get_include_path_configuration { + MODULE_NAME=$1 + + INCLUDE_CONFIG_PATH="$REPOSITORY_FOLDER/$MODULE_NAME/$CONFIG_FILE_INCLUDE_PATHS" + INCLUDE_STATEMENTS="-I $REPOSITORY_FOLDER/$MODULE_NAME/inc-c/" + if [[ ! -f $INCLUDE_CONFIG_PATH ]] + then + return + fi + + for LINE in $(cat $INCLUDE_CONFIG_PATH) + do + INCLUDE_STATEMENTS="$INCLUDE_STATEMENTS -I $REPOSITORY_FOLDER/$LINE" + done +} + +function generate_object_name { + INPUT_NAME=$1 + + NUM_SUBPATHS=$(echo $INPUT_NAME | tr -cd "/" | wc -c) + SANITIZED_INPUT_NAME=$(echo $INPUT_NAME | tr "/" "_") + + OBJECT_NAME="$NUM_SUBPATHS-$SANITIZED_INPUT_NAME.o" +} + +function compile_module_c_sources { + MODULE_NAME=$1 + + get_include_path_configuration $MODULE_NAME + + MODULE_SOURCE_PATH="$REPOSITORY_FOLDER/$MODULE_NAME/src-c" + MODULE_OBJECTS_FOLDER="$MAIN_OBJECTS_FOLDER/$MODULE_NAME" + rm -r $MODULE_OBJECTS_FOLDER + mkdir -p $MODULE_OBJECTS_FOLDER + + # Loop through all files in the 'src-c'-folder and hand them over to GCC + + cd $MODULE_SOURCE_PATH + MODULE_SOURCES=$(find . -mindepth 1) + for SOURCE_FOLDER_ITEM in $MODULE_SOURCES + do + # Cut away the dot-slash given by 'find' as abbrevation for the working directory + RELATIVE_SOURCE_PATH=$(echo $SOURCE_FOLDER_ITEM | cut -c "3-") + + # If this folder item is a folder, it must be created as an + # output-folder for the object files to be placed in + + if [[ -d $RELATIVE_SOURCE_PATH ]] + then + mkdir -p "$MODULE_OBJECTS_FOLDER/$RELATIVE_SOURCE_PATH" + continue + fi + + # Check if this is a C source file by checking the last 2 characters (the ending), + # and if it isn't, continue with the next file. + + LEN_SOURCE_FILE_NAME=${#RELATIVE_SOURCE_PATH} + let PENULTIMATE_OFFSET=$LEN_SOURCE_FILE_NAME-1 + LAST_2_CHARACTERS=$(echo $RELATIVE_SOURCE_PATH | cut -c "$PENULTIMATE_OFFSET-") + if [[ $LAST_2_CHARACTERS != ".c" ]]; then continue; fi + + # Status Message + echo "==> File: $RELATIVE_SOURCE_PATH" + + # Finally, call GCC to compile the C-file and let it place the file in the + # objects folder or one of the possible subfolders which now could exist. + + generate_object_name $RELATIVE_SOURCE_PATH + + gcc -c $CC_OPTIONS -o \ + "$MODULE_OBJECTS_FOLDER/$OBJECT_NAME" \ + "$MODULE_SOURCE_PATH/$RELATIVE_SOURCE_PATH" \ + $INCLUDE_STATEMENTS + done + + ar -rvs $REPOSITORY_FOLDER/.build/$PROJECT_NAME-$MODULE_NAME.a $MODULE_OBJECTS_FOLDER/* + cd $REPOSITORY_FOLDER +} + +function get_test_linkage_path_configuration() { + TEST_PATH=$1 + + LINKAGE_PATHS=$DEFAULT_TEST_LINKAGE_PATHS + if [[ -f "$TEST_PATH/linkage_paths.txt" ]] + then + for LINKAGE_ITEM in $(cat "$TEST_PATH/linkage_paths.txt") + do + LINKAGE_PATHS="$LINKAGE_PATHS $REPOSITORY_FOLDER/$LINKAGE_ITEM" + done + fi +} + +function get_test_include_path_configuration() { + TEST_PATH=$1 + + INCLUDE_CONFIG_PATH="$TEST_PATH/include_paths.txt" + INCLUDE_STATEMENTS="$DEFAULT_TEST_INCLUDE_PATHS -I $TEST_PATH/inc-c/" + if [[ ! -f $INCLUDE_CONFIG_PATH ]] + then + return + fi + + for LINE in $(cat $INCLUDE_CONFIG_PATH) + do + INCLUDE_STATEMENTS="$INCLUDE_STATEMENTS -I $REPOSITORY_FOLDER/$LINE" + done +} + + +function compile_single_test() { + TEST_PATH=$1 + TEST_NAME=$(basename $TEST_PATH) + + echo "Compiling Test: $TEST_NAME" + + # TODO: As a small improvement, the tests could be able to have multiple sub-folders for sources. + + get_test_include_path_configuration $TEST_PATH + get_test_linkage_path_configuration $TEST_PATH + gcc $CC_OPTIONS -o $TEST_PATH/$TEST_NAME.elf $TEST_PATH/*.c $LINKAGE_PATHS $INCLUDE_STATEMENTS +} + +function compile_all_tests_of_module() { + MODULE_NAME=$1 + + echo "================================================================" + echo "COMPILING ALL TESTS OF MODULE: '$MODULE_NAME'." + echo " " + + TEST_PATH_LIST_PATH="$REPOSITORY_FOLDER/$MODULE_NAME/build-config/tests.txt" + if [[ ! -f $TEST_PATH_LIST_PATH ]] + then + echo "Couldn't find list of tests for module '$MODULE_NAME'. Skipping." + return + fi + + for RELATIVE_TEST_PATH in $(cat $TEST_PATH_LIST_PATH) + do + TEST_PATH=$REPOSITORY_FOLDER/$MODULE_NAME/$RELATIVE_TEST_PATH + compile_single_test $TEST_PATH + done +} + +function compile_all_tests_of_all_modules() { + if [[ ! -f "build-config/modules.txt" ]] + then + echo "Failed compiling tests: Couldn't find 'build-config/modules.txt" + return + fi + + for MODULE in $(cat "build-config/modules.txt") + do + compile_all_tests_of_module $MODULE + done +} + +function compile_all_sources_of_all_modules() { + if [[ ! -f "build-config/modules.txt" ]] + then + echo "Failed compiling sources: Couldn't find 'build-config/modules.txt" + return + fi + + for MODULE in $(cat "build-config/modules.txt") + do + compile_module_c_sources $MODULE + done +} + +function build_in_debug_profile { + echo "================ Building in Debug Profile! ================" + CC_OPTIONS=$DEBUG_CC_OPTIONS + compile_all_sources_of_all_modules +} + +function build_in_release_profile { + echo "================ Building in Release Profile! ================" + CC_OPTIONS=$RELEASE_CC_OPTIONS + compile_all_sources_of_all_modules +} + +function link_modules { + + gcc -o mach.elf .build/*.a .build/*.a +} + +case $1 in + "d" | "dbg" | "debug") + build_in_debug_profile + link_modules + ;; + + "r" | "release") + build_in_release_profile + link_modules + ;; + + "c" | "clone-dependencies") + clone_dependencies + ;; + + "b" | "build-dependencies") + build_dependencies + ;; + "t" | "build-tests") + compile_all_tests_of_all_modules + ;; + "h" | "help") + echo "Known Actions:" + echo "[ d | dbg | debug ]: Build in the debug profile; build with debug symbols." + echo "[ r | release ]: Build for a release, with speed optimizations." + echo "[ c | clone-dependencies]: Clone the dependencies using Git (network required)." + echo "[ b | build-dependencies]: Build the dependencies (which must have been cloned first!)." + echo "[ h | help ]: Display this message." + echo "" + echo "Note: Before being able to build (debug-profile / release-profile), cloning and building the dependencies is required!" + ;; + *) + echo "Unknown action, try '$0 help' or '$0 h'." + ;; +esac diff --git a/build-config/modules.txt b/build-config/modules.txt new file mode 100644 index 0000000..722e59f --- /dev/null +++ b/build-config/modules.txt @@ -0,0 +1 @@ +builder diff --git a/builder/build-config/include_paths.txt b/builder/build-config/include_paths.txt new file mode 100644 index 0000000..e570f0e --- /dev/null +++ b/builder/build-config/include_paths.txt @@ -0,0 +1,2 @@ +.build/depends/libRR/Core/core/exports +.build/depends/libRR/Core/platform/exports diff --git a/builder/inc-c/mach.h b/builder/inc-c/mach.h new file mode 100644 index 0000000..0061fc6 --- /dev/null +++ b/builder/inc-c/mach.h @@ -0,0 +1,18 @@ + +#ifndef MACH_H +#define MACH_H + +#include + +typedef struct MachScript MachScript; + +struct MachScript +{ + usz_t num_compilation_units; + char **compilation_unit_paths; +}; + +i32_t mach_read_script(const char *path, MachScript *out_script); + +#endif // MACH_H + diff --git a/builder/inc-c/parser.h b/builder/inc-c/parser.h new file mode 100644 index 0000000..04095c8 --- /dev/null +++ b/builder/inc-c/parser.h @@ -0,0 +1,47 @@ + +#ifndef MACH_PARSER_H +#define MACH_PARSER_H + +#include +#include + +typedef struct MachToken MachToken; +typedef struct MachTokenStream MachTokenStream; + +typedef enum +{ + MACH_TOKEN_WORD, + MACH_TOKEN_INTEGER, + MACH_TOKEN_STRING, + MACH_TOKEN_SPECIAL_SIGN, + + MACH_TOKEN_STREAM_END, + +} MachTokenType; + +struct MachTokenStream +{ + usz_t len_source; + char *source; + + usz_t num_tokens; + MachToken *tokens; +}; + +struct MachToken +{ + u32_t offset; + u32_t length; + MachTokenType type; + + union { + rr_ascii_sign_e sign_type; + char *processed_string; + } data; +}; + +i32_t mach_tokenize(MachTokenStream *stream); +void mach_display_token_stream(MachTokenStream *stream); + +#endif // MACH_PARSER_H + diff --git a/builder/src-c/config-reader.c b/builder/src-c/config-reader.c new file mode 100644 index 0000000..a60bf48 --- /dev/null +++ b/builder/src-c/config-reader.c @@ -0,0 +1,43 @@ +#include +#include + +#include +#include + +i32_t mach_parse_script(char *string, usz_t len_string, MachScript *out_script) +{ + MachTokenStream token_stream; + token_stream.len_source = len_string; + token_stream.source = string; + i32_t tokenization_status = mach_tokenize(&token_stream); + if(tokenization_status < 0) + return tokenization_status - 1024; + + mach_display_token_stream(&token_stream); + free(token_stream.tokens); + + return 0; +} + +i32_t mach_read_script(const char *path, MachScript *out_script) +{ + FILE *script_file = fopen(path, "r"); + if(script_file == NULL) + return -1; + + fseek(script_file, 0, SEEK_END); + usz_t len_script_string = ftell(script_file); + fseek(script_file, 0, SEEK_SET); + + char *script_string = malloc(len_script_string + 1); + fread(script_string, 1, len_script_string, script_file); + fclose(script_file); + + i32_t parse_status = mach_parse_script(script_string, len_script_string, out_script); + free(script_string); + + if(parse_status < 0) + return parse_status - 1024; + return parse_status; +} + diff --git a/builder/src-c/main.c b/builder/src-c/main.c new file mode 100644 index 0000000..08ac081 --- /dev/null +++ b/builder/src-c/main.c @@ -0,0 +1,27 @@ +#include +#include + +int main(int argc, char **argv) +{ + char *mach_config_path = NULL; + if(argc == 1) + { + mach_config_path = "./MachScript.mach"; + } + if(argc == 2) + { + mach_config_path = argv[1]; + } + + if(mach_config_path == NULL) + { + printf("Usage: %s ", argv[1]); + return -1; + } + + MachScript script; + mach_read_script(mach_config_path, &script); + + return 0; +} + diff --git a/builder/src-c/object.c b/builder/src-c/object.c new file mode 100644 index 0000000..0dd7871 --- /dev/null +++ b/builder/src-c/object.c @@ -0,0 +1,4 @@ +#include + + + diff --git a/builder/src-c/token-displayer.c b/builder/src-c/token-displayer.c new file mode 100644 index 0000000..588be81 --- /dev/null +++ b/builder/src-c/token-displayer.c @@ -0,0 +1,20 @@ +#include + +#include +#include + +void mach_display_token_stream(MachTokenStream *stream) +{ + usz_t token_index = 0; + while(token_index < stream->num_tokens) + { + MachToken token = stream->tokens[token_index]; + char token_string[token.length + 1]; + memcpy(token_string, &stream->source[token.offset], token.length); + token_string[token.length] = 0; + + printf("#%-4d %s\n", (int) token_index, token_string); + ++token_index; + } +} + diff --git a/builder/src-c/tokenizer.c b/builder/src-c/tokenizer.c new file mode 100644 index 0000000..f724eaa --- /dev/null +++ b/builder/src-c/tokenizer.c @@ -0,0 +1,161 @@ +#include + +#include +#include + +i32_t mach_tokenize(MachTokenStream *stream) +{ + usz_t tokens_capacity = 2048; + stream->num_tokens = 0; + stream->tokens = calloc(sizeof(MachToken), tokens_capacity); + + usz_t offset = 0; + while(offset < stream->len_source) + { + // There must always be one more after the last one for the STREAM_END token. + if((stream->num_tokens + 1) >= tokens_capacity) + { + tokens_capacity *= 2; + stream->tokens = realloc(stream->tokens, sizeof(MachToken) * tokens_capacity); + } + usz_t token_start = offset; + usz_t len_token = 0; + rune_t rune = rr_extract_utf8(stream->source, offset, &len_token); + if(len_token == 0) + { + // TODO: A log-entry because of invalid UTF-8 should be written here. + return -1; + } + offset += len_token; + + if(rr_rune_is_letter(rune)) + { + while(offset < stream->len_source) + { + len_token = 0; + rune = rr_extract_utf8(stream->source, offset, &len_token); + if(!rr_rune_is_letter(rune) && (rune != '_')) + break; + offset += len_token; + } + MachToken token; + token.offset = token_start; + token.length = offset - token_start; + token.type = MACH_TOKEN_WORD; + token.data.sign_type = rr_rune_to_ascii_sign(rune); + stream->tokens[stream->num_tokens++] = token; + continue; + } + + if(rr_rune_is_digit(rune)) + { + while(offset < stream->len_source) + { + rune = rr_extract_utf8(stream->source, offset, &offset); + if(!rr_rune_is_digit(rune)) + break; + } + MachToken token; + token.offset = token_start; + token.length = offset - token_start; + token.type = MACH_TOKEN_INTEGER; + token.data.sign_type = rr_rune_to_ascii_sign(rune); + stream->tokens[stream->num_tokens++] = token; + continue; + } + + if(rune == '#') + { + usz_t old_offset = offset; + rune_t following_rune = rr_extract_utf8(stream->source, offset, &offset); + if(following_rune == '#') + { + while(offset < stream->len_source) + { + following_rune = rr_extract_utf8(stream->source, offset, &offset); + if(following_rune == '\n') + break; + } + continue; + } + else if(following_rune == '[') + { + // Count how many brackets are needed to end this comment + + usz_t num_opening_brackets = 1; + while(offset < stream->len_source) + { + following_rune = rr_extract_utf8(stream->source, offset, &offset); + if(following_rune != '[') + break; + ++num_opening_brackets; + } + + // Find the end of the comment + + while(offset < stream->len_source) + { + following_rune = rr_extract_utf8(stream->source, offset, &offset); + usz_t num_closing_brackets = 0; + while(following_rune == ']') + { + ++num_closing_brackets; + if(num_closing_brackets == num_opening_brackets) + break; + following_rune = rr_extract_utf8(stream->source, offset, &offset); + } + } + continue; + } + offset = old_offset; + } + + if(rune == '"') + { + bool_t faulty = FALSE; + while(offset < stream->len_source) + { + rune = rr_extract_utf8(stream->source, offset, &offset); + + if(rune == '"') + break; + + if(rune == '\n') + { + faulty = TRUE; + break; + } + // If this is a backslash, skip the next character + if(rune == '\\') + rr_extract_utf8(stream->source, offset, &offset); + } + if(faulty) + { + // TODO: A log-entry because of an invalid string should be written here + return -2; + } + MachToken token; + token.offset = token_start; + token.length = offset - token_start; + token.type = MACH_TOKEN_STRING; + token.data.processed_string = NULL; // !TODO!: Postprocess escape sequences + stream->tokens[stream->num_tokens++] = token; + continue; + } + + if(rr_rune_is_ascii_special(rune)) + { + MachToken token; + token.offset = token_start; + token.length = offset - token_start; + token.type = MACH_TOKEN_SPECIAL_SIGN; + token.data.sign_type = rr_rune_to_ascii_sign(rune); + stream->tokens[stream->num_tokens++] = token; + continue; + } + + + } + return 0; +} +