feature (JSON parser): implemented AST

This commit is contained in:
antifallobst 2023-05-26 14:53:34 +02:00
parent a5233b7fe1
commit 2ffbaa5322
5 changed files with 580 additions and 206 deletions

97
inc/drivers/json.h Normal file
View File

@ -0,0 +1,97 @@
// This file is part of noxos and licensed under the MIT open source license
#ifndef NOXOS_JSON_H
#define NOXOS_JSON_H
#include "utils/string.h"
#include "utils/stdtypes.h"
/*
* JSON node schematic
*
* +-----------+
* +----------| Root Node |------------+
* | +-----------+ |
* | ^ |
* | | |
* [childs_start] /----[parent]------\ [childs_end]
* | / | \ |
* v / | \ v
* +------+ +------+ +------+
* | Node |--[next]->| Node |--[next]->| Node |
* | |<-[prev]--| |<-[prev]--| | ---> . . .
* +------+ +------+ +------+
*
* |
* v
* . . .
*
* */
#define JSON_INCREMENT_TOKEN_ID \
*token_id += 1; \
if (*token_id >= json->num_tokens) { \
log(LOG_ERROR, "failed to parse json -> unexpected EOF"); \
json_node_dump(json->root_node, 0); \
return false; \
}
typedef enum {
JSON_TOKEN_NUMERIC,
JSON_TOKEN_TEXT,
JSON_TOKEN_STRING,
JSON_TOKEN_SPECIAL
} json_token_type_E;
typedef enum {
JSON_NODE_OBJECT,
JSON_NODE_ARRAY,
JSON_NODE_STRING,
JSON_NODE_NUMBER,
JSON_NODE_BOOL,
JSON_NODE_NULL
} json_node_type_E;
typedef struct {
json_token_type_E type;
uint64_t value;
string_t string;
uint32_t line;
uint32_t column;
} json_token_T;
typedef struct json_node_T json_node_T;
struct json_node_T {
json_node_type_E type;
string_t string;
uint64_t value;
json_node_T* parent;
json_node_T* childs_start;
json_node_T* childs_end;
json_node_T* prev;
json_node_T* next;
};
typedef struct {
json_token_T* tokens;
uint64_t num_tokens;
void* string_buffer;
json_node_T* root_node;
} json_T;
json_T* json_from_string (string_t str);
void json_destruct (json_T* json);
json_node_T* json_node_alloc (json_node_T* parent, json_node_type_E type, json_token_T* token);
void json_node_destruct (json_node_T* node);
void json_node_dump (json_node_T* node, uint32_t indent);
string_t json_node_type_to_string (json_node_type_E type);
void json_tokenize (json_T* json, string_t str);
bool json_parse (json_T* json);
bool json_parse_assignment (json_T* json, uint32_t* token_id, json_node_T* node);
bool json_parse_object (json_T* json, uint32_t* token_id, json_node_T* node);
bool json_parse_array (json_T* json, uint32_t* token_id, json_node_T* node);
#endif //NOXOS_JSON_H

View File

@ -1,32 +0,0 @@
// This file is part of noxos and licensed under the MIT open source license
#ifndef NOXOS_JSON_H
#define NOXOS_JSON_H
#include <utils/string.h>
#include <utils/stdtypes.h>
typedef enum {
JSON_TOKEN_NUMERIC,
JSON_TOKEN_TEXT,
JSON_TOKEN_STRING,
JSON_TOKEN_SPECIAL
} json_token_type_E;
typedef struct {
json_token_type_E type;
uint64_t value;
string_t string;
} json_token_T;
typedef struct {
json_token_T* tokens;
uint64_t num_tokens;
void* string_buffer;
} json_T;
json_T* json_from_string(string_t str);
void json_destruct (json_T* json);
void json_tokenize (json_T* json, string_t str);
#endif //NOXOS_JSON_H

View File

@ -2,7 +2,7 @@
#include <boot/config.h>
#include <drivers/fs/vfs.h>
#include <drivers/json/json.h>
#include "drivers/json.h"
#include <utils/logger.h>
#include <utils/memory.h>
@ -25,6 +25,7 @@ void sysconfig_init() {
json_T* json = json_from_string(buffer);
json_node_dump(json->root_node, 0);
json_destruct(json);

481
src/drivers/json.c Normal file
View File

@ -0,0 +1,481 @@
// This file is part of cmlc and licensed under the MIT open source license
#include "drivers/json.h"
#include "utils/memory.h"
#include "utils/logger.h"
json_T* json_from_string(string_t str) {
json_T* json = memory_allocate(sizeof(json_T));
DEBUG("\n%s", str);
json_tokenize(json, str);
bool status = json_parse(json);
if (!status) {
json_destruct(json);
return NULL;
}
return json;
}
void json_destruct(json_T* json) {
json_node_destruct(json->root_node);
memory_free(json->string_buffer);
memory_free(json->tokens);
memory_free(json);
}
json_node_T* json_node_alloc(json_node_T* parent, json_node_type_E type, json_token_T* token) {
json_node_T* node = memory_allocate(sizeof(json_node_T));
node->type = type;
switch (type) {
case JSON_NODE_OBJECT:
case JSON_NODE_ARRAY: {
node->string = "";
node->value = 0;
break;
}
default: {
node->string = token->string;
node->value = token->value;
break;
}
}
node->parent = parent;
node->childs_start = NULL;
node->childs_end = NULL;
node->prev = NULL;
node->next = NULL;
if (parent == NULL) return node;
if (parent->childs_start == NULL) {
parent->childs_start = node;
}
if (parent->childs_end != NULL) {
parent->childs_end->next = node;
node->prev = parent->childs_end;
}
parent->childs_end = node;
return node;
}
void json_node_destruct(json_node_T* node) {
json_node_T* child = node->childs_start;
while (child != NULL) {
json_node_T* next = child->next;
json_node_destruct(child);
child = next;
}
memory_free(node);
}
void json_node_dump(json_node_T* node, uint32_t indent) {
char buffer[indent+1];
memory_set(buffer, ' ', indent);
buffer[indent] = '\0';
DEBUG("%s[ %s - %s ]", buffer, json_node_type_to_string(node->type), node->string);
json_node_T* child = node->childs_start;
while (child != NULL) {
json_node_dump(child, indent + 2);
child = child->next;
}
}
string_t json_node_type_to_string(json_node_type_E type) {
switch (type) {
case JSON_NODE_OBJECT: return "Object";
case JSON_NODE_ARRAY: return "Array";
case JSON_NODE_STRING: return "String";
case JSON_NODE_NUMBER: return "Number";
case JSON_NODE_BOOL: return "Boolean";
case JSON_NODE_NULL: return "Null";
default: return "Invalid";
}
}
uint64_t json_get_num_tokens(string_t str) {
uint32_t num = 0;
while (*str != '\0') {
if (*str == '"') {
str++;
while (*str != '\0' && *str != '"') {
str++;
}
num++;
str++;
continue;
}
if (string_is_char_special(*str)) {
num++;
str++;
continue;
}
if (string_is_char_number(*str)) {
if (str[0] == '0' && (str[1] == 'x' || str[1] == 'b' || str[1] == 'o')) {
str += 2;
}
while (*str != '\0' && string_is_char_number(*str)) {
str++;
}
num++;
continue;
}
if (string_is_char_alpha(*str)) {
while (*str != '\0' && string_is_char_alpha(*str)) {
str++;
}
num++;
continue;
}
str++;
}
return num;
}
void json_tokenize(json_T* json, string_t str) {
json->num_tokens = json_get_num_tokens(str);
json->tokens = memory_allocate(json->num_tokens * sizeof(json_token_T));
json->string_buffer = memory_allocate(string_length(str) + json->num_tokens);
uint32_t line = 1;
uint32_t column = 1;
string_t string = json->string_buffer;
json_token_T* token = &json->tokens[0];
while (*str != '\0') {
if (*str == '"') {
token->type = JSON_TOKEN_STRING;
token->value = 0;
token->line = line;
token->column = column;
str++;
uint32_t length = 0;
while (str[length] != '\0' && str[length] != '"') {
length++;
}
token->string = string;
memory_copy(str, string, length);
((char*)string)[length] = '\0';
string = &string[length + 1];
token = &token[1];
str += length + 1;
column += length + 2;
continue;
}
if (string_is_char_special(*str)) {
token->type = JSON_TOKEN_SPECIAL;
token->value = 0;
token->line = line;
token->column = column;
token->string = string;
*(char*)string = *str;
((char*)string)[1] = '\0';
string = &string[2];
token = &token[1];
str += 1;
column += 1;
continue;
}
if (string_is_char_number(*str)) {
token->type = JSON_TOKEN_NUMERIC;
token->value = 0;
token->line = line;
token->column = column;
uint8_t base = 10;
uint32_t length = 0;
if (str[0] == '0' && str[1] == 'x') {
base = 16;
length = 2;
} else if (str[0] == '0' && str[1] == 'b') {
base = 2;
length = 2;
} else if (str[0] == '0' && str[1] == 'o') {
base = 8;
length = 2;
}
while (str[length] != '\0' && string_is_char_number(str[length])) {
token->value *= base;
token->value += str[length] - '0';
length++;
}
token->string = string;
memory_copy(str, string, length);
((char*)string)[length] = '\0';
string = &string[length + 1];
token = &token[1];
str += length;
column += length;
continue;
}
if (string_is_char_alpha(*str)) {
token->type = JSON_TOKEN_TEXT;
token->value = 0;
token->line = line;
token->column = column;
uint32_t length = 0;
while (str[length] != '\0' && string_is_char_alpha(str[length])) {
length++;
}
token->string = string;
memory_copy(str, string, length);
((char*)string)[length] = '\0';
string = &string[length + 1];
token = &token[1];
str += length;
column += length;
continue;
}
if (*str == '\n') {
line++;
column = 1;
} else {
column++;
}
str++;
}
}
bool json_parse_assignment(json_T* json, uint32_t* token_id, json_node_T* node) {
if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ':') {
log(LOG_ERROR, "failed to parse json at position %d:%d -> expected : got \"%s\"",
json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
return false;
}
JSON_INCREMENT_TOKEN_ID
switch (json->tokens[*token_id].type) {
case JSON_TOKEN_STRING: {
json_node_alloc(node, JSON_NODE_STRING, &json->tokens[*token_id]);
break;
}
case JSON_TOKEN_NUMERIC: {
json_node_alloc(node, JSON_NODE_NUMBER, &json->tokens[*token_id]);
break;
}
case JSON_TOKEN_TEXT: {
if (string_compare("true", json->tokens[*token_id].string)) {
json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]);
bool_node->value = true;
break;
}
if (string_compare("false", json->tokens[*token_id].string)) {
json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]);
bool_node->value = false;
break;
}
if (string_compare("null", json->tokens[*token_id].string)) {
json_node_alloc(node, JSON_NODE_NULL, &json->tokens[*token_id]);
break;
}
log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"",
json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
return false;
}
case JSON_TOKEN_SPECIAL: {
switch (json->tokens[*token_id].string[0]) {
case '{': {
json_node_T* object_node = json_node_alloc(node, JSON_NODE_OBJECT, &json->tokens[*token_id]);
JSON_INCREMENT_TOKEN_ID
bool status = json_parse_object(json, token_id, object_node);
if (!status) return false;
break;
}
case '[': {
json_node_T* array_node = json_node_alloc(node, JSON_NODE_ARRAY, &json->tokens[*token_id]);
JSON_INCREMENT_TOKEN_ID
bool status = json_parse_array(json, token_id, array_node);
if (!status) return false;
break;
}
default: {
log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"",
json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
return false;
}
}
break;
}
}
return true;
}
bool json_parse_object(json_T* json, uint32_t* token_id, json_node_T* node) {
while (true) {
switch (json->tokens[*token_id].type) {
case JSON_TOKEN_STRING: {
json_node_T* string_node = json_node_alloc(node, JSON_NODE_STRING, &json->tokens[*token_id]);
JSON_INCREMENT_TOKEN_ID
bool status = json_parse_assignment(json, token_id, string_node);
if (!status) return false;
break;
}
case JSON_TOKEN_SPECIAL: {
switch (json->tokens[*token_id].string[0]) {
case '}': {
return true;
}
}
break;
}
default: {
log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"",
json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
return false;
}
}
JSON_INCREMENT_TOKEN_ID
if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ',') {
if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != '}') {
log(LOG_ERROR, "failed to parse json at position %d:%d -> expected } got \"%s\"",
json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
return false;
}
return true;
} else {
JSON_INCREMENT_TOKEN_ID
}
}
}
bool json_parse_array(json_T* json, uint32_t* token_id, json_node_T* node) {
while (true) {
switch (json->tokens[*token_id].type) {
case JSON_TOKEN_STRING: {
json_node_alloc(node, JSON_NODE_STRING, &json->tokens[*token_id]);
break;
}
case JSON_TOKEN_NUMERIC: {
json_node_alloc(node, JSON_NODE_NUMBER, &json->tokens[*token_id]);
break;
}
case JSON_TOKEN_TEXT: {
if (string_compare("true", json->tokens[*token_id].string)) {
json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]);
bool_node->value = true;
break;
}
if (string_compare("false", json->tokens[*token_id].string)) {
json_node_T* bool_node = json_node_alloc(node, JSON_NODE_BOOL, &json->tokens[*token_id]);
bool_node->value = false;
break;
}
if (string_compare("null", json->tokens[*token_id].string)) {
json_node_alloc(node, JSON_NODE_NULL, &json->tokens[*token_id]);
break;
}
break;
}
case JSON_TOKEN_SPECIAL: {
switch (json->tokens[*token_id].string[0]) {
case '{': {
json_node_T* object_node = json_node_alloc(node, JSON_NODE_OBJECT, &json->tokens[*token_id]);
JSON_INCREMENT_TOKEN_ID
bool status = json_parse_object(json, token_id, object_node);
if (!status) return false;
break;
}
case '[': {
json_node_T* array_node = json_node_alloc(node, JSON_NODE_ARRAY, &json->tokens[*token_id]);
JSON_INCREMENT_TOKEN_ID
bool status = json_parse_array(json, token_id, array_node);
if (!status) return false;
break;
}
default: {
log(LOG_ERROR, "failed to parse json at position %d:%d -> unexpected \"%s\"",
json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
return false;
}
}
break;
}
}
JSON_INCREMENT_TOKEN_ID
if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ',') {
if (json->tokens[*token_id].type != JSON_TOKEN_SPECIAL || json->tokens[*token_id].string[0] != ']') {
log(LOG_ERROR, "failed to parse json at position %d:%d -> expected ] got \"%s\"",
json->tokens[*token_id].line, json->tokens[*token_id].column, json->tokens[*token_id].string);
return false;
}
JSON_INCREMENT_TOKEN_ID
return true;
}
JSON_INCREMENT_TOKEN_ID
}
}
bool json_parse(json_T* json) {
json->root_node = json_node_alloc(NULL, JSON_NODE_OBJECT, &json->tokens[0]);
uint32_t token_id = 0;
if (json->tokens[0].type != JSON_TOKEN_SPECIAL || json->tokens[0].string[0] != '{') {
log(LOG_ERROR, "failed to parse json at position %d:%d -> expected { got \"%s\"", json->tokens[0].line, json->tokens[0].column, json->tokens[0].string);
return false;
}
token_id++;
if (token_id >= json->num_tokens) { \
log(LOG_ERROR, "failed to parse json -> unexpected EOF");
return false;
}
return json_parse_object(json, &token_id, json->root_node);
}

View File

@ -1,173 +0,0 @@
// This file is part of cmlc and licensed under the MIT open source license
#include <drivers/json/json.h>
#include <utils/memory.h>
#include <utils/logger.h>
json_T* json_from_string(string_t str) {
json_T* json = memory_allocate(sizeof(json_T));
DEBUG("%s", str);
json_tokenize(json, str);
return json;
}
void json_destruct(json_T* json) {
memory_free(json->string_buffer);
memory_free(json->tokens);
memory_free(json);
}
uint64_t json_get_num_tokens(string_t str) {
uint32_t num = 0;
while (*str != '\0') {
if (*str == '"') {
str++;
while (*str != '\0' && *str != '"') {
str++;
}
num++;
str++;
continue;
}
if (string_is_char_special(*str)) {
num++;
str++;
continue;
}
if (string_is_char_number(*str)) {
if (str[0] == '0' && (str[1] == 'x' || str[1] == 'b' || str[1] == 'o')) {
str += 2;
}
while (*str != '\0' && string_is_char_number(*str)) {
str++;
}
num++;
continue;
}
if (string_is_char_alpha(*str)) {
while (*str != '\0' && string_is_char_alpha(*str)) {
str++;
}
num++;
continue;
}
str++;
}
return num;
}
void json_tokenize(json_T* json, string_t str) {
json->num_tokens = json_get_num_tokens(str);
json->tokens = memory_allocate(json->num_tokens * sizeof(json_token_T));
json->string_buffer = memory_allocate(string_length(str) + json->num_tokens);
string_t string = json->string_buffer;
json_token_T* token = &json->tokens[0];
uint32_t i = 0;
while (*str != '\0') {
if (*str == '"') {
token->type = JSON_TOKEN_STRING;
token->value = 0;
str++;
uint32_t length = 0;
while (str[length] != '\0' && str[length] != '"') {
length++;
}
token->string = string;
memory_copy(str, string, length);
((char*)string)[length] = '\0';
DEBUG("token: %d STRING %s", i++, token->string);
string = &string[length + 1];
token = &token[1];
str += length + 1;
continue;
}
if (string_is_char_special(*str)) {
token->type = JSON_TOKEN_SPECIAL;
token->value = 0;
token->string = string;
*(char*)string = *str;
((char*)string)[1] = '\0';
DEBUG("token: %d SPECIAL %s", i++, token->string);
string = &string[2];
token = &token[1];
str += 1;
continue;
}
if (string_is_char_number(*str)) {
token->type = JSON_TOKEN_NUMERIC;
token->value = 0;
uint8_t base = 10;
uint32_t length = 0;
if (str[0] == '0' && str[1] == 'x') {
base = 16;
length = 2;
} else if (str[0] == '0' && str[1] == 'b') {
base = 2;
length = 2;
} else if (str[0] == '0' && str[1] == 'o') {
base = 8;
length = 2;
}
while (str[length] != '\0' && string_is_char_number(str[length])) {
token->value *= base;
token->value += str[length] - '0';
length++;
}
token->string = string;
memory_copy(str, string, length);
((char*)string)[length] = '\0';
DEBUG("token: %d NUM %s %d", i++, token->string, token->value);
string = &string[length + 1];
token = &token[1];
str += length;
continue;
}
if (string_is_char_alpha(*str)) {
token->type = JSON_TOKEN_TEXT;
token->value = 0;
uint32_t length = 0;
while (str[length] != '\0' && string_is_char_alpha(str[length])) {
length++;
}
token->string = string;
memory_copy(str, string, length);
((char*)string)[length] = '\0';
DEBUG("token: %d TEXT %s", i++, token->string);
string = &string[length];
token = &token[1];
str += length;
continue;
}
str++;
}
}