feature (JSON parser): implemented AST

2023-05-26 14:53:34 +02:00 · 2023-05-26 14:53:34 +02:00 · 2ffbaa5322
parent a5233b7fe1
commit 2ffbaa5322
5 changed files with 580 additions and 206 deletions
--- a/inc/drivers/json.h
+++ b/inc/drivers/json.h
@ -0,0 +1,97 @@
 // This file is part of noxos and licensed under the MIT open source license
 #ifndef NOXOS_JSON_H
 #define NOXOS_JSON_H
 #include "utils/string.h"
 #include "utils/stdtypes.h"
 /*
 * JSON node schematic
 *
 *                      +-----------+
 *           +----------| Root Node |------------+
 *           |          +-----------+            |
 *           |                ^                  |
 *           |                |                  |
 *    [childs_start]  /----[parent]------\  [childs_end]
 *           |      /         |           \      |
 *           v    /           |            \     v
 *       +------+          +------+          +------+
 *       | Node |--[next]->| Node |--[next]->| Node |
 *       |      |<-[prev]--|      |<-[prev]--|      |  ---> . . .
 *       +------+          +------+          +------+
 *
 *         |
 *         v
 *       . . .
 *
 * */
 #define JSON_INCREMENT_TOKEN_ID \
    *token_id += 1; \
    if (*token_id >= json->num_tokens) { \
        log(LOG_ERROR, "failed to parse json -> unexpected EOF"); \
        json_node_dump(json->root_node, 0); \
        return false; \
    }
 typedef enum {
    JSON_TOKEN_NUMERIC,
    JSON_TOKEN_TEXT,
    JSON_TOKEN_STRING,
    JSON_TOKEN_SPECIAL
 } json_token_type_E;
 typedef enum {
    JSON_NODE_OBJECT,
    JSON_NODE_ARRAY,
    JSON_NODE_STRING,
    JSON_NODE_NUMBER,
    JSON_NODE_BOOL,
    JSON_NODE_NULL
 } json_node_type_E;
 typedef struct {
    json_token_type_E   type;
    uint64_t            value;
    string_t            string;
    uint32_t            line;
    uint32_t            column;
 } json_token_T;
 typedef struct json_node_T json_node_T;
 struct json_node_T {
    json_node_type_E    type;
    string_t            string;
    uint64_t            value;
    json_node_T*        parent;
    json_node_T*        childs_start;
    json_node_T*        childs_end;
    json_node_T*        prev;
    json_node_T*        next;
 };
 typedef struct {
    json_token_T*       tokens;
    uint64_t        num_tokens;
    void*               string_buffer;
    json_node_T*        root_node;
 } json_T;
 json_T*      json_from_string         (string_t str);
 void         json_destruct            (json_T* json);
 json_node_T* json_node_alloc          (json_node_T* parent, json_node_type_E type, json_token_T* token);
 void         json_node_destruct       (json_node_T* node);
 void         json_node_dump           (json_node_T* node, uint32_t indent);
 string_t     json_node_type_to_string (json_node_type_E type);
 void         json_tokenize            (json_T* json, string_t str);
 bool         json_parse               (json_T* json);
 bool         json_parse_assignment    (json_T* json, uint32_t* token_id, json_node_T* node);
 bool         json_parse_object        (json_T* json, uint32_t* token_id, json_node_T* node);
 bool         json_parse_array         (json_T* json, uint32_t* token_id, json_node_T* node);
 #endif //NOXOS_JSON_H
--- a/inc/drivers/json/json.h
+++ b/inc/drivers/json/json.h
@ -1,32 +0,0 @@
 // This file is part of noxos and licensed under the MIT open source license
 #ifndef NOXOS_JSON_H
 #define NOXOS_JSON_H
 #include <utils/string.h>
 #include <utils/stdtypes.h>
 typedef enum {
    JSON_TOKEN_NUMERIC,
    JSON_TOKEN_TEXT,
    JSON_TOKEN_STRING,
    JSON_TOKEN_SPECIAL
 } json_token_type_E;
 typedef struct {
    json_token_type_E   type;
    uint64_t            value;
    string_t            string;
 } json_token_T;
 typedef struct {
    json_token_T*       tokens;
    uint64_t        num_tokens;
    void*               string_buffer;
 } json_T;
 json_T* json_from_string(string_t str);
 void    json_destruct   (json_T* json);
 void    json_tokenize   (json_T* json, string_t str);
 #endif //NOXOS_JSON_H
--- a/src/boot/config.c
+++ b/src/boot/config.c
@ -2,7 +2,7 @@
 #include <boot/config.h>
 #include <drivers/fs/vfs.h>
-#include <drivers/json/json.h>
+#include "drivers/json.h"
 #include <utils/logger.h>
 #include <utils/memory.h>
@ -25,6 +25,7 @@ void sysconfig_init() {
    json_T* json = json_from_string(buffer);
    json_node_dump(json->root_node, 0);
    json_destruct(json);
--- a/src/drivers/json.c
+++ b/src/drivers/json.c
@ -0,0 +1,481 @@
 // This file is part of cmlc and licensed under the MIT open source license
 #include "drivers/json.h"
 #include "utils/memory.h"
 #include "utils/logger.h"
 json_T* json_from_string(string_t str) {
    json_T* json = memory_allocate(sizeof(json_T));
    DEBUG("\n%s", str);
    json_tokenize(json, str);
    bool status = json_parse(json);
    if (!status) {
        json_destruct(json);
        return NULL;
    }
    return json;
 }
 void json_destruct(json_T* json) {
    json_node_destruct(json->root_node);
    memory_free(json->string_buffer);
    memory_free(json->tokens);
    memory_free(json);
 }
 json_node_T* json_node_alloc(json_node_T* parent, json_node_type_E type, json_token_T* token) {
    json_node_T* node = memory_allocate(sizeof(json_node_T));
    node->type   = type;
    switch (type) {
        case JSON_NODE_OBJECT:
        case JSON_NODE_ARRAY: {
            node->string = "";
            node->value  = 0;
            break;
        }
        default: {
            node->string = token->string;
            node->value  = token->value;
            break;
        }
    }
    node->parent       = parent;
    node->childs_start = NULL;
    node->childs_end   = NULL;
    node->prev         = NULL;
    node->next         = NULL;
    if (parent == NULL) return node;
    if (parent->childs_start == NULL) {
        parent->childs_start = node;
    }
    if (parent->childs_end != NULL) {
        parent->childs_end->next = node;
        node->prev = parent->childs_end;
    }
    parent->childs_end = node;
    return node;
 }
 void json_node_destruct(json_node_T* node) {
    json_node_T* child = node->childs_start;
    while (child != NULL) {
        json_node_T* next = child->next;
        json_node_destruct(child);
        child = next;
    }
    memory_free(node);
 }
 void json_node_dump(json_node_T* node, uint32_t indent) {
    char buffer[indent+1];
    memory_set(buffer, ' ', indent);
    buffer[indent] = '\0';
    DEBUG("%s[ %s - %s ]", buffer, json_node_type_to_string(node->type), node->string);
    json_node_T* child = node->childs_start;
    while (child != NULL) {
        json_node_dump(child, indent + 2);
        child = child->next;
    }
 }
 string_t json_node_type_to_string(json_node_type_E type) {
    switch (type) {
        case JSON_NODE_OBJECT: return "Object";
        case JSON_NODE_ARRAY:  return "Array";
        case JSON_NODE_STRING: return "String";
        case JSON_NODE_NUMBER: return "Number";
        case JSON_NODE_BOOL:   return "Boolean";
        case JSON_NODE_NULL:   return "Null";
        default:               return "Invalid";
    }
 }
 uint64_t json_get_num_tokens(string_t str) {
    uint32_t num = 0;
    while (*str != '\0') {
        if (*str == '"') {
            str++;
            while (*str != '\0' && *str != '"') {
                str++;
            }
            num++;
            str++;
            continue;
        }
        if (string_is_char_special(*str)) {
            num++;
            str++;
            continue;
        }
        if (string_is_char_number(*str)) {
            if (str[0] == '0' && (str[1] == 'x' || str[1] == 'b' || str[1] == 'o')) {
                str += 2;
            }
            while (*str != '\0' && string_is_char_number(*str)) {
                str++;
            }
            num++;
            continue;
        }
        if (string_is_char_alpha(*str)) {
            while (*str != '\0' && string_is_char_alpha(*str)) {
                str++;
            }
            num++;
            continue;
        }
        str++;
    }
    return num;
 }
 void json_tokenize(json_T* json, string_t str) {
    json->num_tokens    = json_get_num_tokens(str);
    json->tokens        = memory_allocate(json->num_tokens * sizeof(json_token_T));
    json->string_buffer = memory_allocate(string_length(str) + json->num_tokens);
    uint32_t line   = 1;
    uint32_t column = 1;
    string_t      string = json->string_buffer;
    json_token_T* token  = &json->tokens[0];
    while (*str != '\0') {
        if (*str == '"') {
            token->type   = JSON_TOKEN_STRING;
            token->value  = 0;
            token->line   = line;
            token->column = column;
            str++;
            uint32_t length = 0;
            while (str[length] != '\0' && str[length] != '"') {
                length++;
            }
            token->string = string;
            memory_copy(str, string, length);
            ((char*)string)[length] = '\0';
            string  = &string[length + 1];
            token   = &token[1];
            str    += length + 1;
            column += length + 2;
            continue;
        }
        if (string_is_char_special(*str)) {
            token->type   = JSON_TOKEN_SPECIAL;
            token->value  = 0;
            token->line   = line;
            token->column = column;
            token->string = string;
            *(char*)string = *str;
            ((char*)string)[1] = '\0';
            string  = &string[2];
            token   = &token[1];
            str    += 1;
            column += 1;
            continue;
        }
        if (string_is_char_number(*str)) {
            token->type   = JSON_TOKEN_NUMERIC;
            token->value  = 0;
            token->line   = line;
            token->column = column;
            uint8_t  base   = 10;
            uint32_t length = 0;
            if (str[0] == '0' && str[1] == 'x') {
                base   = 16;
                length = 2;
            } else if (str[0] == '0' && str[1] == 'b') {
                base   = 2;
                length = 2;
            } else if (str[0] == '0' && str[1] == 'o') {
                base   = 8;
                length = 2;
            }
            while (str[length] != '\0' && string_is_char_number(str[length])) {
                token->value *= base;
                token->value += str[length] - '0';
                length++;
            }
            token->string = string;
            memory_copy(str, string, length);
            ((char*)string)[length] = '\0';
            string  = &string[length + 1];
            token   = &token[1];
            str    += length;
            column += length;
            continue;
        }
        if (string_is_char_alpha(*str)) {
            token->type   = JSON_TOKEN_TEXT;
            token->value  = 0;
            token->line   = line;
            token->column = column;
            uint32_t length = 0;
            while (str[length] != '\0' && string_is_char_alpha(str[length])) {
                length++;
            }
            token->string = string;
            memory_copy(str, string, length);
            ((char*)string)[length] = '\0';
            string  = &string[length + 1];
            token   = &token[1];
            str    += length;
            column += length;
            continue;
        }
        if (*str == '\n') {
            line++;
            column = 1;
        } else {
            column++;
        }
        str++;
    }
 }
 bool json_parse_assignment(json_T* json, uint32_t* token_id, json_node_T* node) {
    if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ':') {
        log(LOG_ERROR, "failed to parse json at position %d:%d -> expected : got \"%s\"",
            json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
        return false;
    }
    JSON_INCREMENT_TOKEN_ID
    switch (json->tokens[*token_id].type) {
        case JSON_TOKEN_STRING: {
            json_node_alloc(node, JSON_NODE_STRING, &json->tokens[*token_id]);
            break;
        }
        case JSON_TOKEN_NUMERIC: {
            json_node_alloc(node, JSON_NODE_NUMBER, &json->tokens[*token_id]);
            break;
        }
        case JSON_TOKEN_TEXT: {
            if (string_compare("true", json->tokens[*token_id].string)) {
                json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]);
                bool_node->value = true;
                break;
            }
            if (string_compare("false", json->tokens[*token_id].string)) {
                json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]);
                bool_node->value = false;
                break;
            }
            if (string_compare("null", json->tokens[*token_id].string)) {
                json_node_alloc(node, JSON_NODE_NULL, &json->tokens[*token_id]);
                break;
            }
            log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"",
                json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
            return false;
        }
        case JSON_TOKEN_SPECIAL: {
            switch (json->tokens[*token_id].string[0]) {
                case '{': {
                    json_node_T* object_node = json_node_alloc(node, JSON_NODE_OBJECT, &json->tokens[*token_id]);
                    JSON_INCREMENT_TOKEN_ID
                    bool status = json_parse_object(json, token_id, object_node);
                    if (!status) return false;
                    break;
                }
                case '[': {
                    json_node_T* array_node = json_node_alloc(node, JSON_NODE_ARRAY, &json->tokens[*token_id]);
                    JSON_INCREMENT_TOKEN_ID
                    bool status = json_parse_array(json, token_id, array_node);
                    if (!status) return false;
                    break;
                }
                default: {
                    log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"",
                        json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
                    return false;
                }
            }
            break;
        }
    }
    return true;
 }
 bool json_parse_object(json_T* json, uint32_t* token_id, json_node_T* node) {
    while (true) {
        switch (json->tokens[*token_id].type) {
            case JSON_TOKEN_STRING: {
                json_node_T* string_node = json_node_alloc(node, JSON_NODE_STRING, &json->tokens[*token_id]);
                JSON_INCREMENT_TOKEN_ID
                bool status = json_parse_assignment(json, token_id, string_node);
                if (!status) return false;
                break;
            }
            case JSON_TOKEN_SPECIAL: {
                switch (json->tokens[*token_id].string[0]) {
                    case '}': {
                        return true;
                    }
                }
                break;
            }
            default: {
                log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"",
                    json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
                return false;
            }
        }
        JSON_INCREMENT_TOKEN_ID
        if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ',') {
            if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != '}') {
                log(LOG_ERROR, "failed to parse json at position %d:%d -> expected } got \"%s\"",
                    json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
                return false;
            }
            return true;
        } else {
            JSON_INCREMENT_TOKEN_ID
        }
    }
 }
 bool json_parse_array(json_T* json, uint32_t* token_id, json_node_T* node) {
    while (true) {
        switch (json->tokens[*token_id].type) {
            case JSON_TOKEN_STRING: {
                json_node_alloc(node, JSON_NODE_STRING, &json->tokens[*token_id]);
                break;
            }
            case JSON_TOKEN_NUMERIC: {
                json_node_alloc(node, JSON_NODE_NUMBER, &json->tokens[*token_id]);
                break;
            }
            case JSON_TOKEN_TEXT: {
                if (string_compare("true", json->tokens[*token_id].string)) {
                    json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]);
                    bool_node->value = true;
                    break;
                }
                if (string_compare("false", json->tokens[*token_id].string)) {
                    json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]);
                    bool_node->value = false;
                    break;
                }
                if (string_compare("null", json->tokens[*token_id].string)) {
                    json_node_alloc(node, JSON_NODE_NULL, &json->tokens[*token_id]);
                    break;
                }
                break;
            }
            case JSON_TOKEN_SPECIAL: {
                switch (json->tokens[*token_id].string[0]) {
                    case '{': {
                        json_node_T* object_node = json_node_alloc(node, JSON_NODE_OBJECT, &json->tokens[*token_id]);
                        JSON_INCREMENT_TOKEN_ID
                        bool status = json_parse_object(json, token_id, object_node);
                        if (!status) return false;
                        break;
                    }
                    case '[': {
                        json_node_T* array_node = json_node_alloc(node, JSON_NODE_ARRAY, &json->tokens[*token_id]);
                        JSON_INCREMENT_TOKEN_ID
                        bool status = json_parse_array(json, token_id, array_node);
                        if (!status) return false;
                        break;
                    }
                    default: {
                        log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"",
                            json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
                        return false;
                    }
                }
                break;
            }
        }
        JSON_INCREMENT_TOKEN_ID
        if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ',') {
            if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ']') {
                log(LOG_ERROR, "failed to parse json at position %d:%d -> expected ] got \"%s\"",
                    json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
                return false;
            }
            JSON_INCREMENT_TOKEN_ID
            return true;
        }
        JSON_INCREMENT_TOKEN_ID
    }
 }
 bool json_parse(json_T* json) {
    json->root_node = json_node_alloc(NULL, JSON_NODE_OBJECT, &json->tokens[0]);
    uint32_t token_id = 0;
    if (json->tokens[0].type != JSON_TOKEN_SPECIAL || json->tokens[0].string[0] != '{') {
        log(LOG_ERROR, "failed to parse json at position %d:%d -> expected { got \"%s\"", json->tokens[0].line, json->tokens[0].column, json->tokens[0].string);
        return false;
    }
    token_id++;
    if (token_id >= json->num_tokens) { \
        log(LOG_ERROR, "failed to parse json -> unexpected EOF");
        return false;
    }
    return json_parse_object(json, &token_id, json->root_node);
 }
--- a/src/drivers/json/json.c
+++ b/src/drivers/json/json.c
@ -1,173 +0,0 @@
 // This file is part of cmlc and licensed under the MIT open source license
 #include <drivers/json/json.h>
 #include <utils/memory.h>
 #include <utils/logger.h>
 json_T* json_from_string(string_t str) {
    json_T* json = memory_allocate(sizeof(json_T));
    DEBUG("%s", str);
    json_tokenize(json, str);
    return json;
 }
 void json_destruct(json_T* json) {
    memory_free(json->string_buffer);
    memory_free(json->tokens);
    memory_free(json);
 }
 uint64_t json_get_num_tokens(string_t str) {
    uint32_t num = 0;
    while (*str != '\0') {
        if (*str == '"') {
            str++;
            while (*str != '\0' && *str != '"') {
                str++;
            }
            num++;
            str++;
            continue;
        }
        if (string_is_char_special(*str)) {
            num++;
            str++;
            continue;
        }
        if (string_is_char_number(*str)) {
            if (str[0] == '0' && (str[1] == 'x' || str[1] == 'b' || str[1] == 'o')) {
                str += 2;
            }
            while (*str != '\0' && string_is_char_number(*str)) {
                str++;
            }
            num++;
            continue;
        }
        if (string_is_char_alpha(*str)) {
            while (*str != '\0' && string_is_char_alpha(*str)) {
                str++;
            }
            num++;
            continue;
        }
        str++;
    }
    return num;
 }
 void json_tokenize(json_T* json, string_t str) {
    json->num_tokens    = json_get_num_tokens(str);
    json->tokens        = memory_allocate(json->num_tokens * sizeof(json_token_T));
    json->string_buffer = memory_allocate(string_length(str) + json->num_tokens);
    string_t      string = json->string_buffer;
    json_token_T* token  = &json->tokens[0];
    uint32_t i = 0;
    while (*str != '\0') {
        if (*str == '"') {
            token->type  = JSON_TOKEN_STRING;
            token->value = 0;
            str++;
            uint32_t length = 0;
            while (str[length] != '\0' && str[length] != '"') {
                length++;
            }
            token->string = string;
            memory_copy(str, string, length);
            ((char*)string)[length] = '\0';
            DEBUG("token: %d STRING %s", i++, token->string);
            string  = &string[length + 1];
            token   = &token[1];
            str    += length + 1;
            continue;
        }
        if (string_is_char_special(*str)) {
            token->type  = JSON_TOKEN_SPECIAL;
            token->value = 0;
            token->string = string;
            *(char*)string = *str;
            ((char*)string)[1] = '\0';
            DEBUG("token: %d SPECIAL %s", i++, token->string);
            string  = &string[2];
            token   = &token[1];
            str    += 1;
            continue;
        }
        if (string_is_char_number(*str)) {
            token->type  = JSON_TOKEN_NUMERIC;
            token->value = 0;
            uint8_t  base   = 10;
            uint32_t length = 0;
            if (str[0] == '0' && str[1] == 'x') {
                base   = 16;
                length = 2;
            } else if (str[0] == '0' && str[1] == 'b') {
                base   = 2;
                length = 2;
            } else if (str[0] == '0' && str[1] == 'o') {
                base   = 8;
                length = 2;
            }
            while (str[length] != '\0' && string_is_char_number(str[length])) {
                token->value *= base;
                token->value += str[length] - '0';
                length++;
            }
            token->string = string;
            memory_copy(str, string, length);
            ((char*)string)[length] = '\0';
            DEBUG("token: %d NUM %s %d", i++, token->string, token->value);
            string  = &string[length + 1];
            token   = &token[1];
            str    += length;
            continue;
        }
        if (string_is_char_alpha(*str)) {
            token->type  = JSON_TOKEN_TEXT;
            token->value = 0;
            uint32_t length = 0;
            while (str[length] != '\0' && string_is_char_alpha(str[length])) {
                length++;
            }
            token->string = string;
            memory_copy(str, string, length);
            ((char*)string)[length] = '\0';
            DEBUG("token: %d TEXT %s", i++, token->string);
            string  = &string[length];
            token   = &token[1];
            str    += length;
            continue;
        }
        str++;
    }
 }