From 2ffbaa5322510d9f8d768b722615153dc8a34fcc Mon Sep 17 00:00:00 2001 From: antifallobst Date: Fri, 26 May 2023 14:53:34 +0200 Subject: [PATCH] feature (JSON parser): implemented AST --- inc/drivers/json.h | 97 ++++++++ inc/drivers/json/json.h | 32 --- src/boot/config.c | 3 +- src/drivers/json.c | 481 ++++++++++++++++++++++++++++++++++++++++ src/drivers/json/json.c | 173 --------------- 5 files changed, 580 insertions(+), 206 deletions(-) create mode 100644 inc/drivers/json.h delete mode 100644 inc/drivers/json/json.h create mode 100644 src/drivers/json.c delete mode 100644 src/drivers/json/json.c diff --git a/inc/drivers/json.h b/inc/drivers/json.h new file mode 100644 index 0000000..af1d385 --- /dev/null +++ b/inc/drivers/json.h @@ -0,0 +1,97 @@ +// This file is part of noxos and licensed under the MIT open source license + +#ifndef NOXOS_JSON_H +#define NOXOS_JSON_H + +#include "utils/string.h" +#include "utils/stdtypes.h" + + +/* + * JSON node schematic + * + * +-----------+ + * +----------| Root Node |------------+ + * | +-----------+ | + * | ^ | + * | | | + * [childs_start] /----[parent]------\ [childs_end] + * | / | \ | + * v / | \ v + * +------+ +------+ +------+ + * | Node |--[next]->| Node |--[next]->| Node | + * | |<-[prev]--| |<-[prev]--| | ---> . . . + * +------+ +------+ +------+ + * + * | + * v + * . . . + * + * */ + +#define JSON_INCREMENT_TOKEN_ID \ + *token_id += 1; \ + if (*token_id >= json->num_tokens) { \ + log(LOG_ERROR, "failed to parse json -> unexpected EOF"); \ + json_node_dump(json->root_node, 0); \ + return false; \ + } + +typedef enum { + JSON_TOKEN_NUMERIC, + JSON_TOKEN_TEXT, + JSON_TOKEN_STRING, + JSON_TOKEN_SPECIAL +} json_token_type_E; + +typedef enum { + JSON_NODE_OBJECT, + JSON_NODE_ARRAY, + JSON_NODE_STRING, + JSON_NODE_NUMBER, + JSON_NODE_BOOL, + JSON_NODE_NULL +} json_node_type_E; + +typedef struct { + json_token_type_E type; + uint64_t value; + string_t string; + + uint32_t line; + uint32_t column; +} json_token_T; + +typedef struct json_node_T json_node_T; +struct json_node_T { + json_node_type_E type; + string_t string; + uint64_t value; + + json_node_T* parent; + json_node_T* childs_start; + json_node_T* childs_end; + json_node_T* prev; + json_node_T* next; +}; + +typedef struct { + json_token_T* tokens; + uint64_t num_tokens; + void* string_buffer; + json_node_T* root_node; +} json_T; + +json_T* json_from_string (string_t str); +void json_destruct (json_T* json); +json_node_T* json_node_alloc (json_node_T* parent, json_node_type_E type, json_token_T* token); +void json_node_destruct (json_node_T* node); +void json_node_dump (json_node_T* node, uint32_t indent); +string_t json_node_type_to_string (json_node_type_E type); +void json_tokenize (json_T* json, string_t str); +bool json_parse (json_T* json); +bool json_parse_assignment (json_T* json, uint32_t* token_id, json_node_T* node); +bool json_parse_object (json_T* json, uint32_t* token_id, json_node_T* node); +bool json_parse_array (json_T* json, uint32_t* token_id, json_node_T* node); + +#endif //NOXOS_JSON_H diff --git a/inc/drivers/json/json.h b/inc/drivers/json/json.h deleted file mode 100644 index f357cd2..0000000 --- a/inc/drivers/json/json.h +++ /dev/null @@ -1,32 +0,0 @@ -// This file is part of noxos and licensed under the MIT open source license - -#ifndef NOXOS_JSON_H -#define NOXOS_JSON_H - -#include -#include - -typedef enum { - JSON_TOKEN_NUMERIC, - JSON_TOKEN_TEXT, - JSON_TOKEN_STRING, - JSON_TOKEN_SPECIAL -} json_token_type_E; - -typedef struct { - json_token_type_E type; - uint64_t value; - string_t string; -} json_token_T; - -typedef struct { - json_token_T* tokens; - uint64_t num_tokens; - void* string_buffer; -} json_T; - -json_T* json_from_string(string_t str); -void json_destruct (json_T* json); -void json_tokenize (json_T* json, string_t str); - -#endif //NOXOS_JSON_H diff --git a/src/boot/config.c b/src/boot/config.c index 8641346..6bb3f72 100644 --- a/src/boot/config.c +++ b/src/boot/config.c @@ -2,7 +2,7 @@ #include #include -#include +#include "drivers/json.h" #include #include @@ -25,6 +25,7 @@ void sysconfig_init() { json_T* json = json_from_string(buffer); + json_node_dump(json->root_node, 0); json_destruct(json); diff --git a/src/drivers/json.c b/src/drivers/json.c new file mode 100644 index 0000000..a3f9131 --- /dev/null +++ b/src/drivers/json.c @@ -0,0 +1,481 @@ +// This file is part of cmlc and licensed under the MIT open source license + +#include "drivers/json.h" +#include "utils/memory.h" +#include "utils/logger.h" + +json_T* json_from_string(string_t str) { + json_T* json = memory_allocate(sizeof(json_T)); + + DEBUG("\n%s", str); + + json_tokenize(json, str); + + bool status = json_parse(json); + if (!status) { + json_destruct(json); + return NULL; + } + + return json; +} + +void json_destruct(json_T* json) { + json_node_destruct(json->root_node); + memory_free(json->string_buffer); + memory_free(json->tokens); + memory_free(json); +} + +json_node_T* json_node_alloc(json_node_T* parent, json_node_type_E type, json_token_T* token) { + json_node_T* node = memory_allocate(sizeof(json_node_T)); + + node->type = type; + switch (type) { + case JSON_NODE_OBJECT: + case JSON_NODE_ARRAY: { + node->string = ""; + node->value = 0; + break; + } + + default: { + node->string = token->string; + node->value = token->value; + break; + } + } + + node->parent = parent; + node->childs_start = NULL; + node->childs_end = NULL; + node->prev = NULL; + node->next = NULL; + + if (parent == NULL) return node; + + if (parent->childs_start == NULL) { + parent->childs_start = node; + } + + if (parent->childs_end != NULL) { + parent->childs_end->next = node; + node->prev = parent->childs_end; + } + parent->childs_end = node; + + return node; +} + +void json_node_destruct(json_node_T* node) { + json_node_T* child = node->childs_start; + while (child != NULL) { + json_node_T* next = child->next; + json_node_destruct(child); + child = next; + } + + memory_free(node); +} + +void json_node_dump(json_node_T* node, uint32_t indent) { + char buffer[indent+1]; + memory_set(buffer, ' ', indent); + buffer[indent] = '\0'; + DEBUG("%s[ %s - %s ]", buffer, json_node_type_to_string(node->type), node->string); + + json_node_T* child = node->childs_start; + while (child != NULL) { + json_node_dump(child, indent + 2); + child = child->next; + } +} + +string_t json_node_type_to_string(json_node_type_E type) { + switch (type) { + case JSON_NODE_OBJECT: return "Object"; + case JSON_NODE_ARRAY: return "Array"; + case JSON_NODE_STRING: return "String"; + case JSON_NODE_NUMBER: return "Number"; + case JSON_NODE_BOOL: return "Boolean"; + case JSON_NODE_NULL: return "Null"; + default: return "Invalid"; + } +} + +uint64_t json_get_num_tokens(string_t str) { + uint32_t num = 0; + + while (*str != '\0') { + if (*str == '"') { + str++; + while (*str != '\0' && *str != '"') { + str++; + } + num++; + str++; + continue; + } + + if (string_is_char_special(*str)) { + num++; + str++; + continue; + } + + if (string_is_char_number(*str)) { + if (str[0] == '0' && (str[1] == 'x' || str[1] == 'b' || str[1] == 'o')) { + str += 2; + } + while (*str != '\0' && string_is_char_number(*str)) { + str++; + } + num++; + continue; + } + + if (string_is_char_alpha(*str)) { + while (*str != '\0' && string_is_char_alpha(*str)) { + str++; + } + num++; + continue; + } + + str++; + } + + return num; +} + +void json_tokenize(json_T* json, string_t str) { + json->num_tokens = json_get_num_tokens(str); + json->tokens = memory_allocate(json->num_tokens * sizeof(json_token_T)); + json->string_buffer = memory_allocate(string_length(str) + json->num_tokens); + + uint32_t line = 1; + uint32_t column = 1; + + string_t string = json->string_buffer; + json_token_T* token = &json->tokens[0]; + while (*str != '\0') { + if (*str == '"') { + token->type = JSON_TOKEN_STRING; + token->value = 0; + token->line = line; + token->column = column; + + str++; + + uint32_t length = 0; + while (str[length] != '\0' && str[length] != '"') { + length++; + } + + token->string = string; + memory_copy(str, string, length); + ((char*)string)[length] = '\0'; + + string = &string[length + 1]; + token = &token[1]; + str += length + 1; + column += length + 2; + continue; + } + + if (string_is_char_special(*str)) { + token->type = JSON_TOKEN_SPECIAL; + token->value = 0; + token->line = line; + token->column = column; + + token->string = string; + *(char*)string = *str; + ((char*)string)[1] = '\0'; + + string = &string[2]; + token = &token[1]; + str += 1; + column += 1; + continue; + } + + if (string_is_char_number(*str)) { + token->type = JSON_TOKEN_NUMERIC; + token->value = 0; + token->line = line; + token->column = column; + + uint8_t base = 10; + uint32_t length = 0; + + if (str[0] == '0' && str[1] == 'x') { + base = 16; + length = 2; + } else if (str[0] == '0' && str[1] == 'b') { + base = 2; + length = 2; + } else if (str[0] == '0' && str[1] == 'o') { + base = 8; + length = 2; + } + + while (str[length] != '\0' && string_is_char_number(str[length])) { + token->value *= base; + token->value += str[length] - '0'; + length++; + } + + token->string = string; + memory_copy(str, string, length); + ((char*)string)[length] = '\0'; + + string = &string[length + 1]; + token = &token[1]; + str += length; + column += length; + continue; + } + + if (string_is_char_alpha(*str)) { + token->type = JSON_TOKEN_TEXT; + token->value = 0; + token->line = line; + token->column = column; + + uint32_t length = 0; + while (str[length] != '\0' && string_is_char_alpha(str[length])) { + length++; + } + + token->string = string; + memory_copy(str, string, length); + ((char*)string)[length] = '\0'; + + string = &string[length + 1]; + token = &token[1]; + str += length; + column += length; + continue; + } + + if (*str == '\n') { + line++; + column = 1; + } else { + column++; + } + + str++; + } + +} + +bool json_parse_assignment(json_T* json, uint32_t* token_id, json_node_T* node) { + if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ':') { + log(LOG_ERROR, "failed to parse json at position %d:%d -> expected : got \"%s\"", + json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string); + return false; + } + + JSON_INCREMENT_TOKEN_ID + switch (json->tokens[*token_id].type) { + case JSON_TOKEN_STRING: { + json_node_alloc(node, JSON_NODE_STRING, &json->tokens[*token_id]); + break; + } + + case JSON_TOKEN_NUMERIC: { + json_node_alloc(node, JSON_NODE_NUMBER, &json->tokens[*token_id]); + break; + } + + case JSON_TOKEN_TEXT: { + if (string_compare("true", json->tokens[*token_id].string)) { + json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]); + bool_node->value = true; + break; + } + if (string_compare("false", json->tokens[*token_id].string)) { + json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]); + bool_node->value = false; + break; + } + if (string_compare("null", json->tokens[*token_id].string)) { + json_node_alloc(node, JSON_NODE_NULL, &json->tokens[*token_id]); + break; + } + + log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"", + json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string); + return false; + } + + case JSON_TOKEN_SPECIAL: { + switch (json->tokens[*token_id].string[0]) { + case '{': { + json_node_T* object_node = json_node_alloc(node, JSON_NODE_OBJECT, &json->tokens[*token_id]); + + JSON_INCREMENT_TOKEN_ID + bool status = json_parse_object(json, token_id, object_node); + if (!status) return false; + break; + } + + case '[': { + json_node_T* array_node = json_node_alloc(node, JSON_NODE_ARRAY, &json->tokens[*token_id]); + + JSON_INCREMENT_TOKEN_ID + bool status = json_parse_array(json, token_id, array_node); + if (!status) return false; + break; + } + + default: { + log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"", + json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string); + return false; + } + } + break; + } + } + + return true; +} + +bool json_parse_object(json_T* json, uint32_t* token_id, json_node_T* node) { + while (true) { + switch (json->tokens[*token_id].type) { + case JSON_TOKEN_STRING: { + json_node_T* string_node = json_node_alloc(node, JSON_NODE_STRING, &json->tokens[*token_id]); + + JSON_INCREMENT_TOKEN_ID + bool status = json_parse_assignment(json, token_id, string_node); + if (!status) return false; + break; + } + + case JSON_TOKEN_SPECIAL: { + switch (json->tokens[*token_id].string[0]) { + case '}': { + return true; + } + } + break; + } + + default: { + log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"", + json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string); + return false; + } + } + + JSON_INCREMENT_TOKEN_ID + if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ',') { + if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != '}') { + log(LOG_ERROR, "failed to parse json at position %d:%d -> expected } got \"%s\"", + json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string); + return false; + } + return true; + } else { + JSON_INCREMENT_TOKEN_ID + } + + } +} + +bool json_parse_array(json_T* json, uint32_t* token_id, json_node_T* node) { + while (true) { + switch (json->tokens[*token_id].type) { + case JSON_TOKEN_STRING: { + json_node_alloc(node, JSON_NODE_STRING, &json->tokens[*token_id]); + break; + } + + case JSON_TOKEN_NUMERIC: { + json_node_alloc(node, JSON_NODE_NUMBER, &json->tokens[*token_id]); + break; + } + + case JSON_TOKEN_TEXT: { + if (string_compare("true", json->tokens[*token_id].string)) { + json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]); + bool_node->value = true; + break; + } + if (string_compare("false", json->tokens[*token_id].string)) { + json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]); + bool_node->value = false; + break; + } + if (string_compare("null", json->tokens[*token_id].string)) { + json_node_alloc(node, JSON_NODE_NULL, &json->tokens[*token_id]); + break; + } + break; + } + + case JSON_TOKEN_SPECIAL: { + switch (json->tokens[*token_id].string[0]) { + case '{': { + json_node_T* object_node = json_node_alloc(node, JSON_NODE_OBJECT, &json->tokens[*token_id]); + + JSON_INCREMENT_TOKEN_ID + bool status = json_parse_object(json, token_id, object_node); + if (!status) return false; + break; + } + + case '[': { + json_node_T* array_node = json_node_alloc(node, JSON_NODE_ARRAY, &json->tokens[*token_id]); + + JSON_INCREMENT_TOKEN_ID + bool status = json_parse_array(json, token_id, array_node); + if (!status) return false; + break; + } + + default: { + log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"", + json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string); + return false; + } + } + break; + } + } + + JSON_INCREMENT_TOKEN_ID + if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ',') { + if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ']') { + log(LOG_ERROR, "failed to parse json at position %d:%d -> expected ] got \"%s\"", + json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string); + return false; + } + JSON_INCREMENT_TOKEN_ID + return true; + } + + JSON_INCREMENT_TOKEN_ID + } +} + +bool json_parse(json_T* json) { + json->root_node = json_node_alloc(NULL, JSON_NODE_OBJECT, &json->tokens[0]); + + uint32_t token_id = 0; + if (json->tokens[0].type != JSON_TOKEN_SPECIAL || json->tokens[0].string[0] != '{') { + log(LOG_ERROR, "failed to parse json at position %d:%d -> expected { got \"%s\"", json->tokens[0].line, json->tokens[0].column, json->tokens[0].string); + return false; + } + token_id++; + if (token_id >= json->num_tokens) { \ + log(LOG_ERROR, "failed to parse json -> unexpected EOF"); + return false; + } + + return json_parse_object(json, &token_id, json->root_node); +} diff --git a/src/drivers/json/json.c b/src/drivers/json/json.c deleted file mode 100644 index f0e91e7..0000000 --- a/src/drivers/json/json.c +++ /dev/null @@ -1,173 +0,0 @@ -// This file is part of cmlc and licensed under the MIT open source license - -#include -#include -#include - -json_T* json_from_string(string_t str) { - json_T* json = memory_allocate(sizeof(json_T)); - - DEBUG("%s", str); - - json_tokenize(json, str); - - return json; -} - -void json_destruct(json_T* json) { - memory_free(json->string_buffer); - memory_free(json->tokens); - memory_free(json); -} - -uint64_t json_get_num_tokens(string_t str) { - uint32_t num = 0; - - while (*str != '\0') { - if (*str == '"') { - str++; - while (*str != '\0' && *str != '"') { - str++; - } - num++; - str++; - continue; - } - - if (string_is_char_special(*str)) { - num++; - str++; - continue; - } - - if (string_is_char_number(*str)) { - if (str[0] == '0' && (str[1] == 'x' || str[1] == 'b' || str[1] == 'o')) { - str += 2; - } - while (*str != '\0' && string_is_char_number(*str)) { - str++; - } - num++; - continue; - } - - if (string_is_char_alpha(*str)) { - while (*str != '\0' && string_is_char_alpha(*str)) { - str++; - } - num++; - continue; - } - - str++; - } - - return num; -} - -void json_tokenize(json_T* json, string_t str) { - json->num_tokens = json_get_num_tokens(str); - json->tokens = memory_allocate(json->num_tokens * sizeof(json_token_T)); - json->string_buffer = memory_allocate(string_length(str) + json->num_tokens); - - string_t string = json->string_buffer; - json_token_T* token = &json->tokens[0]; - uint32_t i = 0; - while (*str != '\0') { - if (*str == '"') { - token->type = JSON_TOKEN_STRING; - token->value = 0; - - str++; - - uint32_t length = 0; - while (str[length] != '\0' && str[length] != '"') { - length++; - } - - token->string = string; - memory_copy(str, string, length); - ((char*)string)[length] = '\0'; - DEBUG("token: %d STRING %s", i++, token->string); - - string = &string[length + 1]; - token = &token[1]; - str += length + 1; - continue; - } - - if (string_is_char_special(*str)) { - token->type = JSON_TOKEN_SPECIAL; - token->value = 0; - - token->string = string; - *(char*)string = *str; - ((char*)string)[1] = '\0'; - - DEBUG("token: %d SPECIAL %s", i++, token->string); - - string = &string[2]; - token = &token[1]; - str += 1; - continue; - } - - if (string_is_char_number(*str)) { - token->type = JSON_TOKEN_NUMERIC; - token->value = 0; - - uint8_t base = 10; - uint32_t length = 0; - - if (str[0] == '0' && str[1] == 'x') { - base = 16; - length = 2; - } else if (str[0] == '0' && str[1] == 'b') { - base = 2; - length = 2; - } else if (str[0] == '0' && str[1] == 'o') { - base = 8; - length = 2; - } - - while (str[length] != '\0' && string_is_char_number(str[length])) { - token->value *= base; - token->value += str[length] - '0'; - length++; - } - - token->string = string; - memory_copy(str, string, length); - ((char*)string)[length] = '\0'; - DEBUG("token: %d NUM %s %d", i++, token->string, token->value); - - string = &string[length + 1]; - token = &token[1]; - str += length; - continue; - } - - if (string_is_char_alpha(*str)) { - token->type = JSON_TOKEN_TEXT; - token->value = 0; - - uint32_t length = 0; - while (str[length] != '\0' && string_is_char_alpha(str[length])) { - length++; - } - - token->string = string; - memory_copy(str, string, length); - ((char*)string)[length] = '\0'; - DEBUG("token: %d TEXT %s", i++, token->string); - - string = &string[length]; - token = &token[1]; - str += length; - continue; - } - - str++; - } - -} \ No newline at end of file