From 1ebf0085aa0a18913e1c92e0836e7bffef52a452 Mon Sep 17 00:00:00 2001
From: Eric-Paul Ickhorn <ericp.ickhorn@gmail.com>
Date: Sun, 11 Feb 2024 07:10:01 +0100
Subject: [PATCH] Added initial code; tokenizer/token-display, main function
 and other boilerplate like the build script

---
 .gitignore                             |  13 ++
 action.bash                            | 271 +++++++++++++++++++++++++
 build-config/modules.txt               |   1 +
 builder/build-config/include_paths.txt |   2 +
 builder/inc-c/mach.h                   |  18 ++
 builder/inc-c/parser.h                 |  47 +++++
 builder/src-c/config-reader.c          |  43 ++++
 builder/src-c/main.c                   |  27 +++
 builder/src-c/object.c                 |   4 +
 builder/src-c/token-displayer.c        |  20 ++
 builder/src-c/tokenizer.c              | 161 +++++++++++++++
 11 files changed, 607 insertions(+)
 create mode 100644 .gitignore
 create mode 100755 action.bash
 create mode 100644 build-config/modules.txt
 create mode 100644 builder/build-config/include_paths.txt
 create mode 100644 builder/inc-c/mach.h
 create mode 100644 builder/inc-c/parser.h
 create mode 100644 builder/src-c/config-reader.c
 create mode 100644 builder/src-c/main.c
 create mode 100644 builder/src-c/object.c
 create mode 100644 builder/src-c/token-displayer.c
 create mode 100644 builder/src-c/tokenizer.c

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c23c1e6
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,13 @@
+
+# Complete folders that are unwanted in commits
+*.build/
+*.local/
+*.vscode/
+
+# Machine Code
+*.a
+*.dll
+*.elf
+*.exe
+*.so
+
diff --git a/action.bash b/action.bash
new file mode 100755
index 0000000..474a94d
--- /dev/null
+++ b/action.bash
@@ -0,0 +1,271 @@
+#!/usr/bin/env bash
+
+cd $(dirname "$(pwd)/$0")
+REPOSITORY_FOLDER=$(pwd)
+
+
+PROJECT_NAME="mach"
+DEBUG_CC_OPTIONS="-g3 -Wall -Wextra -Wpedantic"
+RELEASE_CC_OPTIONS="-O3 -Wall"
+
+MAIN_OBJECTS_FOLDER="$REPOSITORY_FOLDER/.build/objects"
+CONFIG_FILE_INCLUDE_PATHS="build-config/include_paths.txt"
+
+DEFAULT_TEST_INCLUDE_PATHS="
+-I .build/depends/libRR/Core/core/exports/
+-I .build/depends/libRR/Core/platform/exports/
+-I core/exports/
+-I core/inc-c/"
+
+DEFAULT_TEST_LINKAGE_PATHS="
+$REPOSITORY_FOLDER/.build/librr-core.a
+$REPOSITORY_FOLDER/.build/librr-platform.a"
+
+
+function clone_dependencies {
+    echo "================ Cloning Dependencies! ================"
+    mkdir -p .build/depends/libRR
+    cd .build/depends/libRR/
+    if [[ -d "Core" ]]
+    then
+        rm -rf Core
+    fi
+    git clone --depth=1 https://git.nerdcult.net/libRR/Core/
+
+    cd "$REPOSITORY_FOLDER"
+}
+
+function build_dependencies {
+    echo "================ Building Dependencies! ================"
+    
+    mkdir -p "$REPOSITORY_FOLDER/.build/output"
+
+    cd .build/depends/libRR/Core/
+    bash build.bash release
+    cp .build/librr-core.a "$REPOSITORY_FOLDER/.build"
+    cp .build/librr-platform.a "$REPOSITORY_FOLDER/.build"
+
+    cd "$REPOSITORY_FOLDER"
+}
+
+function get_include_path_configuration {
+    MODULE_NAME=$1
+
+    INCLUDE_CONFIG_PATH="$REPOSITORY_FOLDER/$MODULE_NAME/$CONFIG_FILE_INCLUDE_PATHS"
+    INCLUDE_STATEMENTS="-I $REPOSITORY_FOLDER/$MODULE_NAME/inc-c/"
+    if [[ ! -f $INCLUDE_CONFIG_PATH ]]
+    then
+        return
+    fi
+
+    for LINE in $(cat $INCLUDE_CONFIG_PATH)
+    do
+        INCLUDE_STATEMENTS="$INCLUDE_STATEMENTS -I $REPOSITORY_FOLDER/$LINE"
+    done
+}
+
+function generate_object_name {
+    INPUT_NAME=$1
+
+    NUM_SUBPATHS=$(echo $INPUT_NAME | tr -cd "/" | wc -c)
+    SANITIZED_INPUT_NAME=$(echo $INPUT_NAME | tr "/" "_")
+
+    OBJECT_NAME="$NUM_SUBPATHS-$SANITIZED_INPUT_NAME.o"
+}
+
+function compile_module_c_sources {
+    MODULE_NAME=$1
+
+    get_include_path_configuration $MODULE_NAME
+
+    MODULE_SOURCE_PATH="$REPOSITORY_FOLDER/$MODULE_NAME/src-c"
+    MODULE_OBJECTS_FOLDER="$MAIN_OBJECTS_FOLDER/$MODULE_NAME"
+    rm -r $MODULE_OBJECTS_FOLDER
+    mkdir -p $MODULE_OBJECTS_FOLDER
+
+    # Loop through all files in the 'src-c'-folder and hand them over to GCC
+
+    cd $MODULE_SOURCE_PATH
+    MODULE_SOURCES=$(find . -mindepth 1)
+    for SOURCE_FOLDER_ITEM in $MODULE_SOURCES
+    do
+        # Cut away the dot-slash given by 'find' as abbrevation for the working directory
+        RELATIVE_SOURCE_PATH=$(echo $SOURCE_FOLDER_ITEM | cut -c "3-")
+
+        # If this folder item is a folder, it must be created as an 
+        # output-folder for the object files to be placed in
+        
+        if [[ -d $RELATIVE_SOURCE_PATH ]]
+        then
+            mkdir -p "$MODULE_OBJECTS_FOLDER/$RELATIVE_SOURCE_PATH"
+            continue
+        fi
+        
+        # Check if this is a C source file by checking the last 2 characters (the ending),
+        # and if it isn't, continue with the next file.
+
+        LEN_SOURCE_FILE_NAME=${#RELATIVE_SOURCE_PATH}
+        let PENULTIMATE_OFFSET=$LEN_SOURCE_FILE_NAME-1
+        LAST_2_CHARACTERS=$(echo $RELATIVE_SOURCE_PATH | cut -c "$PENULTIMATE_OFFSET-")
+        if [[ $LAST_2_CHARACTERS != ".c" ]]; then continue; fi
+        
+        # Status Message
+        echo "==> File:          $RELATIVE_SOURCE_PATH"
+
+        # Finally, call GCC to compile the C-file and let it place the file in the
+        # objects folder or one of the possible subfolders which now could exist.
+
+        generate_object_name $RELATIVE_SOURCE_PATH
+        
+        gcc -c $CC_OPTIONS -o \
+            "$MODULE_OBJECTS_FOLDER/$OBJECT_NAME" \
+            "$MODULE_SOURCE_PATH/$RELATIVE_SOURCE_PATH" \
+            $INCLUDE_STATEMENTS
+    done
+
+    ar -rvs $REPOSITORY_FOLDER/.build/$PROJECT_NAME-$MODULE_NAME.a $MODULE_OBJECTS_FOLDER/*
+    cd $REPOSITORY_FOLDER
+}
+
+function get_test_linkage_path_configuration() {
+    TEST_PATH=$1
+
+    LINKAGE_PATHS=$DEFAULT_TEST_LINKAGE_PATHS
+    if [[ -f "$TEST_PATH/linkage_paths.txt" ]]
+    then
+        for LINKAGE_ITEM in $(cat "$TEST_PATH/linkage_paths.txt")
+        do
+            LINKAGE_PATHS="$LINKAGE_PATHS $REPOSITORY_FOLDER/$LINKAGE_ITEM"
+        done
+    fi
+}
+
+function get_test_include_path_configuration() {
+    TEST_PATH=$1
+
+    INCLUDE_CONFIG_PATH="$TEST_PATH/include_paths.txt"
+    INCLUDE_STATEMENTS="$DEFAULT_TEST_INCLUDE_PATHS -I $TEST_PATH/inc-c/"
+    if [[ ! -f $INCLUDE_CONFIG_PATH ]]
+    then
+        return
+    fi
+
+    for LINE in $(cat $INCLUDE_CONFIG_PATH)
+    do
+        INCLUDE_STATEMENTS="$INCLUDE_STATEMENTS -I $REPOSITORY_FOLDER/$LINE"
+    done
+}
+
+
+function compile_single_test() {
+    TEST_PATH=$1
+    TEST_NAME=$(basename $TEST_PATH)
+
+    echo "Compiling Test: $TEST_NAME"
+
+    # TODO: As a small improvement, the tests could be able to have multiple sub-folders for sources.
+
+    get_test_include_path_configuration $TEST_PATH
+    get_test_linkage_path_configuration $TEST_PATH
+    gcc $CC_OPTIONS -o $TEST_PATH/$TEST_NAME.elf $TEST_PATH/*.c $LINKAGE_PATHS $INCLUDE_STATEMENTS
+}
+
+function compile_all_tests_of_module() {
+    MODULE_NAME=$1
+
+    echo "================================================================"
+    echo "COMPILING ALL TESTS OF MODULE: '$MODULE_NAME'."
+    echo " "
+
+    TEST_PATH_LIST_PATH="$REPOSITORY_FOLDER/$MODULE_NAME/build-config/tests.txt"
+    if [[ ! -f $TEST_PATH_LIST_PATH ]]
+    then
+        echo "Couldn't find list of tests for module '$MODULE_NAME'. Skipping."
+        return
+    fi
+
+    for RELATIVE_TEST_PATH in $(cat $TEST_PATH_LIST_PATH)
+    do
+        TEST_PATH=$REPOSITORY_FOLDER/$MODULE_NAME/$RELATIVE_TEST_PATH
+        compile_single_test $TEST_PATH
+    done
+}
+
+function compile_all_tests_of_all_modules() {
+    if [[ ! -f "build-config/modules.txt" ]]
+    then
+        echo "Failed compiling tests: Couldn't find 'build-config/modules.txt"
+        return
+    fi
+
+    for MODULE in $(cat "build-config/modules.txt")
+    do
+        compile_all_tests_of_module $MODULE
+    done
+}
+
+function compile_all_sources_of_all_modules() {
+    if [[ ! -f "build-config/modules.txt" ]]
+    then
+        echo "Failed compiling sources: Couldn't find 'build-config/modules.txt"
+        return
+    fi
+
+    for MODULE in $(cat "build-config/modules.txt")
+    do
+        compile_module_c_sources $MODULE
+    done
+}
+
+function build_in_debug_profile {
+    echo "================ Building in Debug Profile! ================"
+    CC_OPTIONS=$DEBUG_CC_OPTIONS
+    compile_all_sources_of_all_modules
+}
+
+function build_in_release_profile {
+    echo "================ Building in Release Profile! ================"
+    CC_OPTIONS=$RELEASE_CC_OPTIONS
+    compile_all_sources_of_all_modules
+}
+
+function link_modules {
+    
+    gcc -o mach.elf .build/*.a .build/*.a
+}
+
+case $1 in
+    "d" | "dbg" | "debug")
+        build_in_debug_profile 
+        link_modules
+        ;;
+
+    "r" | "release")
+        build_in_release_profile 
+        link_modules
+        ;;
+    
+    "c" | "clone-dependencies")
+        clone_dependencies 
+        ;;
+    
+    "b" | "build-dependencies")
+        build_dependencies
+        ;;
+    "t" | "build-tests")
+        compile_all_tests_of_all_modules
+        ;;
+    "h" | "help")
+        echo "Known Actions:"
+        echo "[ d | dbg | debug ]:        Build in the debug profile; build with debug symbols."
+        echo "[ r | release ]:            Build for a release, with speed optimizations."
+        echo "[ c | clone-dependencies]:  Clone the dependencies using Git (network required)."
+        echo "[ b | build-dependencies]:  Build the dependencies (which must have been cloned first!)."
+        echo "[ h | help ]:               Display this message."
+        echo ""
+        echo "Note: Before being able to build (debug-profile / release-profile), cloning and building the dependencies is required!"
+        ;;
+    *)
+        echo "Unknown action, try '$0 help' or '$0 h'."
+        ;;
+esac
diff --git a/build-config/modules.txt b/build-config/modules.txt
new file mode 100644
index 0000000..722e59f
--- /dev/null
+++ b/build-config/modules.txt
@@ -0,0 +1 @@
+builder
diff --git a/builder/build-config/include_paths.txt b/builder/build-config/include_paths.txt
new file mode 100644
index 0000000..e570f0e
--- /dev/null
+++ b/builder/build-config/include_paths.txt
@@ -0,0 +1,2 @@
+.build/depends/libRR/Core/core/exports
+.build/depends/libRR/Core/platform/exports
diff --git a/builder/inc-c/mach.h b/builder/inc-c/mach.h
new file mode 100644
index 0000000..0061fc6
--- /dev/null
+++ b/builder/inc-c/mach.h
@@ -0,0 +1,18 @@
+
+#ifndef MACH_H
+#define MACH_H
+
+#include <librr/types.h>
+
+typedef struct MachScript MachScript;
+
+struct MachScript
+{
+    usz_t num_compilation_units;
+    char **compilation_unit_paths;
+};
+
+i32_t mach_read_script(const char *path, MachScript *out_script);
+
+#endif // MACH_H
+
diff --git a/builder/inc-c/parser.h b/builder/inc-c/parser.h
new file mode 100644
index 0000000..04095c8
--- /dev/null
+++ b/builder/inc-c/parser.h
@@ -0,0 +1,47 @@
+
+#ifndef MACH_PARSER_H
+#define MACH_PARSER_H
+
+#include <librr/types.h>
+#include <librr/runes.h>
+
+typedef struct MachToken MachToken;
+typedef struct MachTokenStream MachTokenStream;
+
+typedef enum
+{
+    MACH_TOKEN_WORD,
+    MACH_TOKEN_INTEGER,
+    MACH_TOKEN_STRING,
+    MACH_TOKEN_SPECIAL_SIGN,
+    
+    MACH_TOKEN_STREAM_END,
+    
+} MachTokenType;
+
+struct MachTokenStream
+{
+    usz_t len_source;
+    char *source;
+    
+    usz_t num_tokens;
+    MachToken *tokens;
+};
+
+struct MachToken
+{
+    u32_t offset;
+    u32_t length;
+    MachTokenType type;
+    
+    union {
+        rr_ascii_sign_e             sign_type;
+        char                       *processed_string;
+    } data;
+};
+
+i32_t mach_tokenize(MachTokenStream *stream);
+void mach_display_token_stream(MachTokenStream *stream);
+
+#endif // MACH_PARSER_H
+
diff --git a/builder/src-c/config-reader.c b/builder/src-c/config-reader.c
new file mode 100644
index 0000000..a60bf48
--- /dev/null
+++ b/builder/src-c/config-reader.c
@@ -0,0 +1,43 @@
+#include <mach.h>
+#include <parser.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+i32_t mach_parse_script(char *string, usz_t len_string, MachScript *out_script)
+{
+    MachTokenStream token_stream;
+    token_stream.len_source = len_string;
+    token_stream.source = string;
+    i32_t tokenization_status = mach_tokenize(&token_stream);
+    if(tokenization_status < 0)
+        return tokenization_status - 1024;
+    
+    mach_display_token_stream(&token_stream);
+    free(token_stream.tokens);
+    
+    return 0;
+}
+
+i32_t mach_read_script(const char *path, MachScript *out_script)
+{
+    FILE *script_file = fopen(path, "r");
+    if(script_file == NULL)
+        return -1;
+    
+    fseek(script_file, 0, SEEK_END);
+    usz_t len_script_string = ftell(script_file);
+    fseek(script_file, 0, SEEK_SET);
+    
+    char *script_string = malloc(len_script_string + 1);
+    fread(script_string, 1, len_script_string, script_file);
+    fclose(script_file);
+    
+    i32_t parse_status = mach_parse_script(script_string, len_script_string, out_script);
+    free(script_string);
+    
+    if(parse_status < 0)
+        return parse_status - 1024;
+    return parse_status;
+}
+
diff --git a/builder/src-c/main.c b/builder/src-c/main.c
new file mode 100644
index 0000000..08ac081
--- /dev/null
+++ b/builder/src-c/main.c
@@ -0,0 +1,27 @@
+#include <mach.h>
+#include <stdio.h>
+
+int main(int argc, char **argv)
+{
+    char *mach_config_path = NULL;
+    if(argc == 1)
+    {
+        mach_config_path = "./MachScript.mach";
+    }
+    if(argc == 2)
+    {
+        mach_config_path = argv[1];
+    }
+    
+    if(mach_config_path == NULL)
+    {
+        printf("Usage: %s <config-path (empty for Mach.cfg)>", argv[1]);
+        return -1;
+    }
+    
+    MachScript script;
+    mach_read_script(mach_config_path, &script);
+    
+    return 0;
+}
+
diff --git a/builder/src-c/object.c b/builder/src-c/object.c
new file mode 100644
index 0000000..0dd7871
--- /dev/null
+++ b/builder/src-c/object.c
@@ -0,0 +1,4 @@
+#include <parser.h>
+
+
+
diff --git a/builder/src-c/token-displayer.c b/builder/src-c/token-displayer.c
new file mode 100644
index 0000000..588be81
--- /dev/null
+++ b/builder/src-c/token-displayer.c
@@ -0,0 +1,20 @@
+#include <parser.h>
+
+#include <string.h>
+#include <stdio.h>
+
+void mach_display_token_stream(MachTokenStream *stream)
+{
+    usz_t token_index = 0;
+    while(token_index < stream->num_tokens)
+    {
+        MachToken token = stream->tokens[token_index];
+        char token_string[token.length + 1];
+        memcpy(token_string, &stream->source[token.offset], token.length);
+        token_string[token.length] = 0;
+        
+        printf("#%-4d %s\n", (int) token_index, token_string);
+        ++token_index;
+    }
+}
+
diff --git a/builder/src-c/tokenizer.c b/builder/src-c/tokenizer.c
new file mode 100644
index 0000000..f724eaa
--- /dev/null
+++ b/builder/src-c/tokenizer.c
@@ -0,0 +1,161 @@
+#include <parser.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+i32_t mach_tokenize(MachTokenStream *stream)
+{
+    usz_t tokens_capacity = 2048;
+    stream->num_tokens = 0;
+    stream->tokens = calloc(sizeof(MachToken), tokens_capacity);
+    
+    usz_t offset = 0;
+    while(offset < stream->len_source)
+    {
+        // There must always be one more after the last one for the STREAM_END token.
+        if((stream->num_tokens + 1) >= tokens_capacity)
+        {
+            tokens_capacity *= 2;
+            stream->tokens = realloc(stream->tokens, sizeof(MachToken) * tokens_capacity);
+        }
+        usz_t token_start = offset;
+        usz_t len_token = 0;
+        rune_t rune = rr_extract_utf8(stream->source, offset, &len_token);
+        if(len_token == 0)
+        {
+            // TODO: A log-entry because of invalid UTF-8 should be written here.
+            return -1;
+        }
+        offset += len_token;
+        
+        if(rr_rune_is_letter(rune))
+        {
+            while(offset < stream->len_source)
+            {
+                len_token = 0;
+                rune = rr_extract_utf8(stream->source, offset, &len_token);
+                if(!rr_rune_is_letter(rune) && (rune != '_'))
+                    break;
+                offset += len_token;
+            }
+            MachToken token;
+            token.offset = token_start;
+            token.length = offset - token_start;
+            token.type = MACH_TOKEN_WORD;
+            token.data.sign_type = rr_rune_to_ascii_sign(rune);
+            stream->tokens[stream->num_tokens++] = token;
+            continue;
+        }
+        
+        if(rr_rune_is_digit(rune))
+        {
+            while(offset < stream->len_source)
+            {
+                rune = rr_extract_utf8(stream->source, offset, &offset);
+                if(!rr_rune_is_digit(rune))
+                    break;
+            }
+            MachToken token;
+            token.offset = token_start;
+            token.length = offset - token_start;
+            token.type = MACH_TOKEN_INTEGER;
+            token.data.sign_type = rr_rune_to_ascii_sign(rune);
+            stream->tokens[stream->num_tokens++] = token;
+            continue;
+        }
+        
+        if(rune == '#')
+        {
+            usz_t old_offset = offset;
+            rune_t following_rune = rr_extract_utf8(stream->source, offset, &offset);
+            if(following_rune == '#')
+            {
+                while(offset < stream->len_source)
+                {
+                    following_rune = rr_extract_utf8(stream->source, offset, &offset);
+                    if(following_rune == '\n')
+                        break;
+                }
+                continue;
+            }
+            else if(following_rune == '[')
+            {
+                // Count how many brackets are needed to end this comment
+                
+                usz_t num_opening_brackets = 1;
+                while(offset < stream->len_source)
+                {
+                    following_rune = rr_extract_utf8(stream->source, offset, &offset);
+                    if(following_rune != '[')
+                        break;
+                    ++num_opening_brackets;
+                }
+                
+                // Find the end of the comment
+                
+                while(offset < stream->len_source)
+                {
+                    following_rune = rr_extract_utf8(stream->source, offset, &offset);
+                    usz_t num_closing_brackets = 0;
+                    while(following_rune == ']')
+                    {
+                        ++num_closing_brackets;
+                        if(num_closing_brackets == num_opening_brackets)
+                            break;
+                        following_rune = rr_extract_utf8(stream->source, offset, &offset);
+                    }
+                }
+                continue;
+            }
+            offset = old_offset;
+        }
+        
+        if(rune == '"')
+        {
+            bool_t faulty = FALSE;
+            while(offset < stream->len_source)
+            {
+                rune = rr_extract_utf8(stream->source, offset, &offset);
+                
+                if(rune == '"')
+                    break;
+                
+                if(rune == '\n')
+                {
+                    faulty = TRUE;
+                    break;
+                }
+                // If this is a backslash, skip the next character
+                if(rune == '\\')
+                    rr_extract_utf8(stream->source, offset, &offset);
+            }
+            if(faulty)
+            {
+                // TODO: A log-entry because of an invalid string should be written here
+                return -2;
+            }
+            MachToken token;
+            token.offset = token_start;
+            token.length = offset - token_start;
+            token.type = MACH_TOKEN_STRING;
+            token.data.processed_string = NULL; // !TODO!: Postprocess escape sequences
+            stream->tokens[stream->num_tokens++] = token;
+            continue;
+        }
+        
+        if(rr_rune_is_ascii_special(rune))
+        {
+            MachToken token;
+            token.offset = token_start;
+            token.length = offset - token_start;
+            token.type = MACH_TOKEN_SPECIAL_SIGN;
+            token.data.sign_type = rr_rune_to_ascii_sign(rune);
+            stream->tokens[stream->num_tokens++] = token;
+            continue;
+        }
+        
+        
+    }
+    return 0;
+}
+