diff options
author | justanothercatgirl <sotov@twistea.su> | 2025-03-22 14:09:55 +0300 |
---|---|---|
committer | justanothercatgirl <sotov@twistea.su> | 2025-03-22 14:09:55 +0300 |
commit | 0612bfa57649c4bffea0ca6e436539d8c3193a80 (patch) | |
tree | 67f98dcd8bf98194761c0ea3de4c2d48ca674bea | |
parent | ff8ab43201e5c862003d9353016409c13691562c (diff) |
Added json serializer/parser
-rw-r--r-- | Makefile | 10 | ||||
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | include/container.h | 2 | ||||
-rw-r--r-- | include/jacson.h | 856 | ||||
-rw-r--r-- | tests/json.c | 66 |
5 files changed, 934 insertions, 3 deletions
@@ -2,7 +2,7 @@ CC := gcc CFLAGS := -g -Wall -Wextra -Wpedantic -Werror -Wno-language-extension-token -Wno-stringop-overread -Iinclude -tests: container types log +tests: container types log json container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c \ tests/binary_search.c tests/linked_list.c tests/hashset.c \ @@ -30,14 +30,18 @@ container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c \ rm $@ -types: tests/types.c +types: tests/types.c include/rstypes.h $(CC) -o $@ tests/types.c $(CFLAGS) ./$@ rm $@ -log: tests/log.c +log: tests/log.c include/log.h $(CC) -o $@ tests/log.c $(CFLAGS) - ./$@ rm $@ +json: tests/json.c include/jacson.h + $(CC) -o $@ tests/json.c $(CFLAGS) + ./$@ + rm $@ @@ -22,6 +22,9 @@ This is just a collection of (usually single-purpos) header files that I use ref * Summary: Code generator for embedding resources directly into an executable. * How to use: It exposes C interface, so in order to use it, you will need to have a C program that builds resources for you. It has it's own repo, but i will be adding CLI to there soon. * Examples: see [this repo](https://github.com/justanothercatgirl/embed_test.c/) +## [`json.h`](include/json.h) +* Summary: json generator and parser that supports comments. +* How to use: define `JSON_IMPLEMENTATION` macro. `CONTAINER_IMPLEMENTATION` MUST be defined as well; either somewhere earlier in the code before including `container.h` or just before `json.h`. this JSON implementation uses `array` and `hash_map`. ## `build.h` nearest TODO for now. * Summary: a build system based on C. To compile something under it, you do something like `cc -o builder builder.c && ./builder`. The idea is stolen from [Tsoding](https://example.com) diff --git a/include/container.h b/include/container.h index 1401162..5f40e4a 100644 --- a/include/container.h +++ b/include/container.h @@ -130,6 +130,8 @@ typedef void*(*memcpy_t)(void* restrict, const void*, size_t); } while(0) /* Removes last element from the array */ #define array_pop(array) --array_header(array)->size +/* get last element of an array */ +#define array_last(array) (array)[array_header((array))->size - 1] /* Reserve length elements so that subsequent (length - current_size) pushes require no reallocation */ #define array_reserve(array, length) do { array = _memreserve_dynarray(array, length); } while (0) /* Change size of an array. the capacity is set to lowest power of 2 that is greater than length */ diff --git a/include/jacson.h b/include/jacson.h new file mode 100644 index 0000000..046c1d7 --- /dev/null +++ b/include/jacson.h @@ -0,0 +1,856 @@ +#ifndef JAC_JACSON_H +#define JAC_JACSON_H + +#include <assert.h> +#include <stddef.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> + +#include "container.h" + + + +typedef enum jacson_type { + /* primitive types */ + JSON_TYPE_NULL, + JSON_TYPE_BOOLEAN, + /* this distinction is not standard but useful in C */ + JSON_TYPE_INTEGER, + JSON_TYPE_REAL, + /* composite types */ + JSON_TYPE_STRING, + JSON_TYPE_ARRAY, + JSON_TYPE_OBJECT, +} jacson_type; + +/* NOSP or NOSPACE are the same, because + * serialization only checks `policy == PRETTY` + * You could pass 42069 to there as a way + * to remove spaces. Literally. */ +enum jacson_serialize_policy { + JSON_SERIALIZE_PRETTY = 0, + JSON_SERIALIZE_NOSP, + JSON_SERIALIZE_NOSPACE, +}; + +/* I am lazy */ +typedef struct hash_set jacson_object; + +/* jacson array (sized array) */ +typedef struct jacson_array { + struct jacson_value *data; + size_t size, capacity; +} jacson_array; + +/* jacson string (sized string) */ +typedef struct jacson_string { + char *data; + size_t size; +} jacson_string; + +/* jacson value (tagged union) */ +typedef struct jacson_value { + enum jacson_type type; + union { + bool boolean; + long integer; + double real; + struct jacson_string string; + struct jacson_array array; + jacson_object object; + }; +} jacson_value; + +/* represents name-value pair + * should probably not be used in user code*/ +struct jacson_pair { + struct jacson_string name; + struct jacson_value value; +}; + + + +/* helper hash functions */ +int jacson_pair_eq(const void *a, const void *b); +size_t jacson_pair_hash(const void *a); + +/* object constructors / destructors */ +jacson_object jacson_object_new(); +jacson_object jacson_object_deep_copy(const jacson_object *other); +void jacson_object_free(jacson_object *obj); +/* string constructors / destructors */ +jacson_string jacson_string_from_buffer(const char *str); +jacson_string jacson_string_from_buffer_n(const char *str, size_t s); +jacson_string jacson_string_copy(const jacson_string *str); +jacson_string jacson_string_mallocated(char *str); +void jacson_string_free(jacson_string str); +/* array constructors / destructors */ +jacson_array jacson_array_new(size_t reserve); /* initial size 0 */ +jacson_array jacson_array_from_buffer(const jacson_value *values, size_t s); +jacson_array jacson_array_deep_copy(const jacson_array *arr); +jacson_array jacson_array_mallocated(jacson_value *values, size_t s); +void jacson_array_free(struct jacson_array arr); +/* value constructors / destructors */ +jacson_value jacson_value_deep_copy(const jacson_value *other); +void jacson_value_free(struct jacson_value val); + +/* serialization / parsing + * returned string must be freed */ +char *jacson_serialize(jacson_value *obj, enum jacson_serialize_policy); +/* return value indicates success + * obj must be NOT initialized */ +bool jacson_parse(jacson_value *obj, const char *str); + +/* wrappers + * these functions take ownership of the memory + * (except for string literal one) */ +jacson_value jacson_wrap_null(void); +jacson_value jacson_wrap_boolean(bool b); +jacson_value jacson_wrap_integer(long i); +jacson_value jacson_wrap_real(double d); +jacson_value jacson_wrap_string(jacson_string s); +jacson_value jacson_wrap_string_literal(const char *s); +jacson_value jacson_wrap_array(jacson_array a); +jacson_value jacson_wrap_object(jacson_object o); + +/* access: insert/read */ +/* transfers ownership of val, copies name */ +void jacson_add(jacson_object *obj, const char *name, jacson_value val); +/* transfers ownership of val and name */ +void jacson_take_and_add(jacson_object *obj, struct jacson_string name, struct jacson_value val); +/* copies both val and name */ +void jacson_copy_and_add(jacson_object *obj, const char *name, const jacson_value *val); +jacson_value *jacson_at(jacson_object *obj, const char *name); + +/* same for arrays */ +void jacson_array_reserve(jacson_array *arr, size_t s); +void jacson_array_append(jacson_array *arr, jacson_value val); +void jacson_array_append_copy(jacson_array *arr, const jacson_value *val); +jacson_value *jacson_array_at(jacson_array *arr, size_t idx); + +/* access: typed getters */ +bool jacson_is_null(jacson_value *obj); +long jacson_get_integer(jacson_value *obj); +double jacson_get_real(jacson_value *obj); +bool jacson_get_boolean(jacson_value *obj); +jacson_array jacson_get_array(jacson_value *obj); +jacson_object *jacson_get_object(jacson_value *obj); + +/* access: untyped getters */ +bool jacson_get_to(jacson_value *obj, void *mem); +bool jacson_try_get_to(jacson_value *obj, enum jacson_type type, void *mem); + + + +#define JACSON_IMPLEMENTATION +#ifdef JACSON_IMPLEMENTATION + +#include <ctype.h> + +/* structure to represent intermediate parsing state + * should not be used in user code */ +struct __jcsn_token { + enum __jcsn_tokens { + __JCSN_PRIMITIVE_NUL = 1 << 0, + __JCSN_PRIMITIVE_BLN = 1 << 1, + __JCSN_PRIMITIVE_INT = 1 << 2, + __JCSN_PRIMITIVE_FLT = 1 << 3, + __JCSN_PRIMITIVE_STR = 1 << 4, + __JSCN_PRIMITIVE = __JCSN_PRIMITIVE_NUL | __JCSN_PRIMITIVE_BLN + | __JCSN_PRIMITIVE_INT | __JCSN_PRIMITIVE_FLT + | __JCSN_PRIMITIVE_STR, + __JSCN_TOK_CBO = 1 << 5, /* curly bracket open */ + __JSCN_TOK_CBC = 1 << 6, /* curly bracket close */ + __JSCN_TOK_SBO = 1 << 7, /* square bracket open */ + __JSCN_TOK_SBC = 1 << 8, /* square bracket close */ + __JSCN_TOK_OPN = __JSCN_TOK_CBO | __JSCN_TOK_SBO, + __JSCN_TOK_CLS = __JSCN_TOK_CBC | __JSCN_TOK_SBC, + __JSCN_TOK_COM = 1 << 9, /* a coma */ + __JSCN_TOK_COL = 1 << 10, /* a colon */ + __JSCN_VAL = __JSCN_PRIMITIVE | __JSCN_TOK_OPN, + __JSCN_TOK_EOF = 1 << 11, /* end of file */ + __JSCN_TOK_INI = 1 << 12, /* initial token */ + __JSCN_ERROR = 1 << 13, /* parsing error */ + } type; + union { + bool b; + long l; + char *s; + double f; + }; +}; +enum __jacson_numtype { + __NT_FLT, + __NT_INT, + __NT_ERR, /* erronous number */ +}; +union __jacson_number { + long i; + double f; +}; +enum __jacson_stack { + __ST_OBJ, + __ST_ARR, +}; + + + +/* helper serialization functions */ +size_t __jacson_rsnprintf(char **str, size_t *strsz, size_t off, const char *fmt, ...); +size_t __jacson_rsnputc(char **str, size_t *strsz, size_t off, char c); +size_t __jacson_array_serialize(struct jacson_array *arr, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation); +size_t __jacson_object_serialize(jacson_object *arr, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation); +size_t __jacson_value_serialize(struct jacson_value *val, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation); + + +/* resize string numbered print formatted */ +size_t __jacson_rsnprintf(char **const str, size_t *strsz, size_t off, const char *fmt, ...) { + va_list ls, ls1; + size_t ln; + va_start(ls, fmt); +try_write: + va_copy(ls1, ls); + ln = vsnprintf(*str + off, *strsz-off, fmt, ls1); + va_end(ls1); + if (ln >= *strsz-off) { + *strsz <<= 1; + *str = realloc(*str, *strsz); + goto try_write; + } + va_end(ls); + return ln; +} +size_t __jacson_rsnputc(char **const str, size_t *strsz, size_t off, char c) { + if (*strsz - off <= 1) { + *strsz <<= 1; + *str = realloc(*str, *strsz); + } + (*str)[off++] = c; + (*str)[off] = '\0'; + return 1; +} + +int jacson_pair_eq(const void *a, const void *b) { + const struct jacson_pair *_a = a; + const struct jacson_pair *_b = b; + printf("comparing %s(%zu), %s(%zu)\n", _a->name.data, _a->name.size, _b->name.data, _b->name.size); + return _a->name.size != _b->name.size + || strcmp(_a->name.data, _b->name.data); +} +size_t jacson_pair_hash(const void *a) { +#define HASHMUL 7 + const struct jacson_pair *_a = a; + size_t ret = 0; + char *retmem = (void*)&ret; + size_t i = 0; + for (i = 0; i < _a->name.size / __SIZEOF_SIZE_T__; i++) + for (size_t j = 0; j < __SIZEOF_SIZE_T__; ++j) + retmem[j] ^= _a->name.data[i*__SIZEOF_SIZE_T__+j] * HASHMUL; + for (size_t j = i; j < _a->name.size; ++j) + retmem[j % __SIZEOF_SIZE_T__] ^= _a->name.data[j] * HASHMUL; + + return ret; +} + +jacson_object jacson_object_new() { + return hset_new(sizeof (struct jacson_pair), jacson_pair_eq, jacson_pair_hash); +} +jacson_object jacson_object_deep_copy(const jacson_object *other) { + jacson_object ret = hset_new(sizeof (struct jacson_pair), jacson_pair_eq, jacson_pair_hash); + hset_rehash_to_size(&ret, array_size(other->buckets)); + struct hash_set_iter iter; + for (hseti_begin(other, &iter); !hseti_end(&iter); hseti_next(&iter)) { + struct jacson_pair *old = hseti_get(&iter); + struct jacson_pair new_p = { + .name = jacson_string_copy(&old->name), + .value = jacson_value_deep_copy(&old->value) + }; + hset_insert_copy(&ret, &new_p); + } + return ret; +} +void jacson_object_free(jacson_object *obj) { + struct hash_set_iter iter; + for (hseti_begin(obj, &iter); !hseti_end(&iter); hseti_next(&iter)) { + struct jacson_pair *p = hseti_get(&iter); + free(p->name.data); + jacson_value_free(p->value); + } + hset_free(obj); +} +jacson_string jacson_string_from_buffer(const char *str) { + struct jacson_string ret; + ret.size = strlen(str); + ret.data = malloc(ret.size + 1); + /* copy includes '\0' */ + memcpy(ret.data, str, ret.size + 1); + return ret; +} +jacson_string jacson_string_from_buffer_n(const char *str, size_t s) { + struct jacson_string ret; + ret.size = s; + ret.data = malloc(s + 1); + memcpy(ret.data, str, s + 1); + ret.data[s] = '\0'; + return ret; +} +jacson_string jacson_string_copy(const jacson_string *str) { + jacson_string ret = {.data = malloc(str->size + 1), .size = str->size}; + memcpy(ret.data, str->data, ret.size+1); + return ret; +} +jacson_string jacson_string_mallocated(char *str) { + return (struct jacson_string){.size = strlen(str), .data = str}; +} +void jacson_string_free(jacson_string str) { + free(str.data); +} +jacson_array jacson_array_new(size_t reserve) { + struct jacson_array ret = { + .data = malloc(sizeof (struct jacson_value) * reserve), + .size = 0, + .capacity = reserve + }; + return ret; +} +jacson_array jacson_array_from_buffer(const jacson_value *values, size_t s) { + struct jacson_array ret = { + .data = malloc(s * sizeof (struct jacson_value)), + .size = s, + .capacity = s}; + for (size_t i = 0; i < s; ++i) ret.data[i] = jacson_value_deep_copy(values + i); + return ret; +} +jacson_array jacson_array_deep_copy(const jacson_array *arr) { + struct jacson_array ret = { + .data = malloc(arr->size * sizeof (struct jacson_value)), + .size = arr->size, + .capacity = arr->size}; + for (size_t i = 0; i < ret.size; ++i) { + ret.data[i] = jacson_value_deep_copy(arr->data + i); + } + return ret; +} +jacson_array jacson_array_mallocated(jacson_value *values, size_t s) { + return (struct jacson_array){.data = values, .size = s, .capacity = s}; +} +void jacson_array_free(struct jacson_array arr) { + for (size_t i = 0; i < arr.size; ++i) + jacson_value_free(arr.data[i]); + free(arr.data); +} +jacson_value jacson_value_deep_copy(const jacson_value *val) { + jacson_value ret = {.type = val->type}; + switch (ret.type) { + case JSON_TYPE_NULL: break; + case JSON_TYPE_BOOLEAN: ret.boolean = val->boolean; break; + case JSON_TYPE_INTEGER: ret.integer = val->integer ; break; + case JSON_TYPE_REAL: ret.real = val->real ; break; + case JSON_TYPE_STRING: ret.string = jacson_string_copy(&val->string); break; + case JSON_TYPE_ARRAY: ret.array = jacson_array_deep_copy(&val->array); break; + case JSON_TYPE_OBJECT: ret.object = jacson_object_deep_copy(&val->object); break; + } + return ret; +} +void jacson_value_free(struct jacson_value val) { + switch(val.type) { + case JSON_TYPE_NULL: + case JSON_TYPE_BOOLEAN: + case JSON_TYPE_REAL: + case JSON_TYPE_INTEGER: return; + case JSON_TYPE_STRING: free(val.string.data); return; + case JSON_TYPE_ARRAY: jacson_array_free(val.array); return; + case JSON_TYPE_OBJECT: jacson_object_free(&val.object); return; + } +} + +size_t __jacson_value_serialize(struct jacson_value *val, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation) { + switch(val->type) { + case JSON_TYPE_NULL: off += __jacson_rsnprintf(str, strsz, off, "null"); break; + case JSON_TYPE_BOOLEAN: off += __jacson_rsnprintf(str, strsz, off, val->boolean ? "true" : "false"); break; + case JSON_TYPE_INTEGER: off += __jacson_rsnprintf(str, strsz, off, "%li", val->integer); break; + case JSON_TYPE_REAL: off += __jacson_rsnprintf(str, strsz, off, "%lf", val->real); break; + case JSON_TYPE_STRING: off += __jacson_rsnprintf(str, strsz, off, "\"%s\"", val->string.data); break; + case JSON_TYPE_ARRAY: off = __jacson_array_serialize(&val->array, str, strsz, off, policy, identation); break; + case JSON_TYPE_OBJECT: off = __jacson_object_serialize(&val->object, str, strsz, off, policy, identation); break; + } + return off; +} +size_t __jacson_array_serialize(struct jacson_array *arr, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation) { + off += __jacson_rsnputc(str, strsz, off, '['); + identation+= 2; + bool coma = false; + for (size_t i = 0; i < arr->size; ++i) { + if (coma) off += __jacson_rsnputc(str, strsz, off, ','); + if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnprintf(str, strsz, off, "\n%*s", identation*4, ""); + off = __jacson_value_serialize(arr->data + i, str, strsz, off, policy, identation); + coma = true; + } + identation -= 2; + if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnprintf(str, strsz, off, "\n%*s", identation*4, ""); + off += __jacson_rsnputc(str, strsz, off, ']'); + return off; +} +size_t __jacson_object_serialize(jacson_object *obj, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation) { + struct hash_set_iter iter; + bool coma = false; + off += __jacson_rsnputc(str, strsz, off, '{'); + ++identation; + for (hseti_begin(obj, &iter); !hseti_end(&iter); hseti_next(&iter)) { + struct jacson_pair *p = hseti_get(&iter); + if (coma) off += __jacson_rsnputc(str, strsz, off, ','); + if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnprintf(str, strsz, off, "\n%*s", identation*4, ""); + off += __jacson_rsnprintf(str, strsz, off, "\"%s\":", p->name.data); + if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnputc(str, strsz, off, ' '); + off = __jacson_value_serialize(&p->value, str, strsz, off, policy, identation); + coma = true; + } + --identation; + if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnprintf(str, strsz, off, "\n%*s", identation*4, ""); + off += __jacson_rsnputc(str, strsz, off, '}'); + return off; +} + +char *jacson_serialize(jacson_value *obj, enum jacson_serialize_policy policy) { + size_t strsz = 128; + char *str = malloc(strsz); + __jacson_value_serialize(obj, &str, &strsz, 0, policy, 0); + return str; +} +/* after return points either to the beginning of next token + * (if there was whitespace), or to the end of previous one + * (if there was no whitespace) */ +static void __jacson_skipws(char const ** const str) { + while (true) { + switch(**str) { + case 0x20: + case 0x09: + case 0x0A: + case 0x0D: ++*str; continue; + default: return; + } + } +} +static enum __jacson_numtype __jacson_numtype(const char **const str, union __jacson_number *res) { + bool iflag = true; + const char *begin = *str; + if (**str == '-' || **str == '+') ++*str; + if (!isdigit(**str)) return __NT_ERR; + while (isdigit(**str)) ++*str; + + if (**str == '.' || **str == 'e' || **str == 'E') { + iflag = false; + if (**str == '.') { + ++*str; + if (!isdigit(**str)) return __NT_ERR; + while (isdigit(**str)) ++*str; + } + if (**str == 'e' || **str == 'E') { + ++*str; + if (**str == '-' || **str == '+') ++*str; + if (!isdigit(**str)) return __NT_ERR; + while (isdigit(**str)) ++*str; + } + } + if (iflag) res->i = atol(begin); + else res->f = atof(begin); + return iflag ? __NT_INT : __NT_FLT; +} +static unsigned short __jacson_hex16(const char *str, bool *e) { + unsigned short ret = 0; + for (int i = 0; i < 4; ++i) { + unsigned short val = 0; + if (isdigit(str[i])) + val = str[i] - '0'; + else if ('a' <= str[i] && str[i] <= 'f') + val = str[i] - 'a' + 10; + else if ('A' <= str[i] && str[i] <= 'F') + val = str[i] - 'A' + 10; + else { + *e = false; + return 0xFFFF; + } + ret |= val << (4 * (3-i)); + } + *e = true; + return ret; +} +static char __jacson_get_utf8(const char **const str, char *buf) { + bool val; + ++*str; /* skip u */ + unsigned short b1 = __jacson_hex16(*str, &val), b2; + int codepoint; + if (!val) return 0; + *str += 4; + if (0xD800 <= b1 && b1 <= 0xDBFF) { + if (*(*str)++ != '\\') return 0; + if (*(*str)++ != 'u') return 0; + b2 = __jacson_hex16(*str, &val); + *str += 4; + if (!val || 0xDC00 > b2 || b2 > 0xDFFF) return 0; + codepoint = 0x10000 + ((b1 - 0xD800) << 10) + (b2 - 0xDC00); + } else codepoint = b1; + if (codepoint <= 0x7F) { + buf[0] = codepoint; + return 1; + } else if (codepoint <= 0x7FF) { + buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F); + buf[1] = 0x80 | ((codepoint) & 0x3F); + return 2; + } else if (codepoint <= 0xFFFF) { + buf[0] = 0xE0 | ((codepoint >> 12) & 0xF); + buf[1] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[2] = 0x80 | ((codepoint) & 0x3F); + return 3; + } else if (codepoint <= 0x10FFFF){ + buf[0] = 0xF0 | ((codepoint >> 18) & 0x7); + buf[1] = 0x80 | ((codepoint >> 12) & 0x3F); + buf[2] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[3] = 0x80 | ((codepoint) & 0x3F); + return 4; + } else return 0; +} +static char *__jacson_parse_string(const char **const str) { + /* MUST be escaped: quotation mark, reverse solidus, + * and the control characters (U+0000 through U+001F). */ + ++*str; /* skip " */ + + size_t size = 16, off = 0; + char *ret = malloc(size); + ret[0] = '\0'; /* edge case */ + while (**str != '"') { + if (**str != '\\') off += __jacson_rsnputc(&ret, &size, off, **str); + else switch (*++*str) { + case '"': + case '/': + case '\\':off += __jacson_rsnputc(&ret, &size, off, **str); break; + case 't': off += __jacson_rsnputc(&ret, &size, off, '\t'); break; + case 'r': off += __jacson_rsnputc(&ret, &size, off, '\r'); break; + case 'n': off += __jacson_rsnputc(&ret, &size, off, '\n'); break; + case 'f': off += __jacson_rsnputc(&ret, &size, off, '\f'); break; + case 'b': off += __jacson_rsnputc(&ret, &size, off, '\b'); break; + case 'u': { + char buffer[4]; /* utf-8 */ + char sz = __jacson_get_utf8(str, buffer); + if (sz == 0) { + free(ret); + return NULL; + } + off += __jacson_rsnprintf(&ret, &size, off, "%.*s", (int)sz, buffer); + continue; + } break; + default: free(ret); return NULL; + } + ++*str; + } + ++*str; + return ret; +} +#ifdef DEBUG +static void __jacson_print_token(struct __token t, FILE *f) { + fprintf(f, "token { .type = 0x%04X (", t.type); + switch (t.type) { + case __PRIMITIVE_NUL: fprintf(f, "NUL), .val = null"); break; + case __PRIMITIVE_BLN: fprintf(f, "BLN), .val = %s", t.b ? "true" : "false"); break; + case __PRIMITIVE_INT: fprintf(f, "INT), .val = %li", t.l); break; + case __PRIMITIVE_FLT: fprintf(f, "FLT), .val = %lf", t.f); break; + case __PRIMITIVE_STR: fprintf(f, "STR), .val = \"%s\"", t.s); break; + case __TOK_CBO: fprintf(f, "CBO)"); break; + case __TOK_CBC: fprintf(f, "CBC)"); break; + case __TOK_SBO: fprintf(f, "SBO)"); break; + case __TOK_SBC: fprintf(f, "SBC)"); break; + case __TOK_COM: fprintf(f, "COM)"); break; + case __TOK_COL: fprintf(f, "COL)"); break; + case __TOK_EOF: fprintf(f, "EOF)"); break; + case __TOK_INI: fprintf(f, "INI)"); break; + case __ERROR: fprintf(f, "ERROR)"); break; + default: fprintf(f, "UNKNOWN)"); break; + } + fprintf(f, " }\n"); +} +#endif /* DEBUG */ +static struct __jcsn_token __jacson_get_token(const char **const str, enum __jcsn_tokens tokens) { + __jacson_skipws(str); + struct __jcsn_token tok; + switch (**str) { + case '{': tok.type = __JSCN_TOK_CBO; ++*str; break; + case '}': tok.type = __JSCN_TOK_CBC; ++*str; break; + case '[': tok.type = __JSCN_TOK_SBO; ++*str; break; + case ']': tok.type = __JSCN_TOK_SBC; ++*str; break; + case ',': tok.type = __JSCN_TOK_COM; ++*str; break; + case ':': tok.type = __JSCN_TOK_COL; ++*str; break; + case '\0': tok.type = __JSCN_TOK_EOF; return tok; + case '+': + case '-': + case '0': /* this could have been case '0'...'9', but I removed it for interoperability */ + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + union __jacson_number n; + switch(__jacson_numtype(str, &n)) { + case __NT_FLT: tok.type = __JCSN_PRIMITIVE_FLT; tok.f = n.f; break; + case __NT_INT: tok.type = __JCSN_PRIMITIVE_INT; tok.l = n.i; break; + case __NT_ERR: tok.type = __JSCN_ERROR; break; + } + } break; + case 't': if (!strncmp((*str)+1, "rue", 3)) { + tok.type = __JCSN_PRIMITIVE_BLN; + tok.b = true; + *str += 4; + } else tok.type = __JSCN_ERROR; + break; + case 'f': if (!strncmp((*str)+1, "alse", 4)) { + tok.type = __JCSN_PRIMITIVE_BLN; + tok.b = false; + *str += 5; + } else tok.type = __JSCN_ERROR; + break; + case 'n': if (!strncmp((*str)+1, "ull", 3)) { + tok.type = __JCSN_PRIMITIVE_NUL; + *str += 4; + } else tok.type = __JSCN_ERROR; + break; + case '"': { + tok.s = __jacson_parse_string(str); + if (tok.s == NULL) tok.type = __JSCN_ERROR; + else tok.type = __JCSN_PRIMITIVE_STR; + } break; + default: tok.type = __JSCN_ERROR; break; + } + if ((tok.type & tokens) == 0) + tok.type = __JSCN_ERROR; + return tok; +} +/* Caller can assume that token stream is valid jacson + * (e.g. it can not start with colon) + * Since the validity check is done, tokens like ':' and ',' + * are not in the stream. */ +static struct __jcsn_token *__jacson_tokenize_string(const char *str) { + struct __jcsn_token *tokens = array_new(struct __jcsn_token, 0); + enum __jacson_stack *stack = array_new(enum __jacson_stack, 0); + bool str_iskey = false; + array_reserve(tokens, 16); + array_reserve(stack, 16); + enum __jcsn_tokens allowed = __JSCN_VAL; + for (struct __jcsn_token tok = {.type = __JSCN_TOK_INI}; tok.type != __JSCN_ERROR; tok = __jacson_get_token(&str, allowed)) { + switch (tok.type) { + case __JSCN_TOK_INI: continue; + case __JSCN_TOK_CBO: + array_push(stack, __ST_OBJ); + str_iskey = true; + allowed = __JCSN_PRIMITIVE_STR | __JSCN_TOK_CBC; + break; + case __JSCN_TOK_SBO: + array_push(stack, __ST_ARR); + str_iskey = false; + allowed = __JSCN_VAL | __JSCN_TOK_SBC; + break; + case __JSCN_TOK_CBC: + if (array_last(stack) != __ST_OBJ) goto evacuate; + array_pop(stack); + allowed = __JSCN_TOK_COM | __JSCN_TOK_CLS; + str_iskey = array_last(stack) == __ST_OBJ; + break; + case __JSCN_TOK_SBC: + if (array_last(stack) != __ST_ARR) goto evacuate; + array_pop(stack); + allowed = __JSCN_TOK_COM | __JSCN_TOK_CLS; + str_iskey = array_last(stack) == __ST_OBJ; + break; + case __JSCN_TOK_COM: + allowed = __JCSN_PRIMITIVE_STR; + if (array_last(stack) == __ST_ARR) + allowed |= __JSCN_VAL; + /* TODO: check if removing this breaks code */ + /* DONE: yes it does */ + str_iskey = array_last(stack) == __ST_OBJ; + break; + case __JSCN_TOK_COL: + allowed = __JSCN_VAL; + str_iskey = false; + break; + case __JSCN_TOK_EOF: + if (array_size(stack) == 0) goto endwhile; + else goto evacuate; + case __JCSN_PRIMITIVE_STR: + if (str_iskey) allowed = __JSCN_TOK_COL; + else allowed = __JSCN_TOK_COM | __JSCN_TOK_CLS; + break; + case __JCSN_PRIMITIVE_NUL: + case __JCSN_PRIMITIVE_BLN: + case __JCSN_PRIMITIVE_INT: + case __JCSN_PRIMITIVE_FLT: + allowed = __JSCN_TOK_COM | __JSCN_TOK_CLS; + break; + case __JSCN_ERROR: goto evacuate; + default: UNREACHABLE; + } + if (tok.type & (__JSCN_PRIMITIVE | __JSCN_TOK_OPN | __JSCN_TOK_CLS)) array_push(tokens, tok); + if (array_size(stack) == 0) { + tok = __jacson_get_token(&str, 0xFFFFFFFF); + if (tok.type != __JSCN_TOK_EOF) goto evacuate; + } + } +endwhile: + if (array_size(stack) != 0) goto evacuate; + array_free(stack); + return tokens; +evacuate: + for (size_t i= 0; i < array_size(tokens); ++i) { + if (tokens[i].type == __JCSN_PRIMITIVE_STR) free(tokens[i].s); + } + array_free(tokens); + array_free(stack); + return NULL; +} +jacson_object __jacson_object_parse(struct __jcsn_token *stream, size_t *offset); +struct jacson_array __jacson_array_parse(struct __jcsn_token *stream, size_t *offset); + +/* stream must point to one of the token types listed in switch statement */ +struct jacson_value __jacson_value_parse(struct __jcsn_token *stream, size_t *offset) { + /* A wacky way to access element at index 0 */ + switch (stream[*offset].type) { + case __JCSN_PRIMITIVE_NUL: return jacson_wrap_null(); + case __JCSN_PRIMITIVE_BLN: return jacson_wrap_boolean(stream[(*offset)].b); + case __JCSN_PRIMITIVE_INT: return jacson_wrap_integer(stream[(*offset)].l); + case __JCSN_PRIMITIVE_FLT: return jacson_wrap_real(stream[(*offset)].f); + case __JCSN_PRIMITIVE_STR: return jacson_wrap_string(jacson_string_mallocated(stream[(*offset)].s)); + case __JSCN_TOK_CBO: return jacson_wrap_object(__jacson_object_parse(stream, offset)); + case __JSCN_TOK_SBO: return jacson_wrap_array(__jacson_array_parse(stream, offset)); + default: UNREACHABLE; + } +} +/* The stream must point to '{' token */ +jacson_object __jacson_object_parse(struct __jcsn_token *stream, size_t *offset) { + jacson_object ret = jacson_object_new(); + while (stream[++*offset].type != __JSCN_TOK_CBC) { + struct jacson_pair newp; + /* __tokenize_string has already done checks for validity */ + newp.name = jacson_string_mallocated(stream[(*offset)++].s); + newp.value = __jacson_value_parse(stream, offset); + struct jacson_pair *oldval; + if ((oldval = hset_at(&ret, &newp)) != NULL) { + struct jacson_pair oldcpy = *oldval; + hset_remove(&ret, oldval); + free(oldcpy.name.data); + jacson_value_free(oldcpy.value); + } + hset_insert_copy(&ret, &newp); + } + return ret; +} +/* The stream must point to '[' tokenb */ +struct jacson_array __jacson_array_parse(struct __jcsn_token *stream, size_t *offset) { + struct jacson_array ret = jacson_array_new(16); + while (stream[++*offset].type != __JSCN_TOK_SBC) { + jacson_array_append(&ret, __jacson_value_parse(stream, offset)); + } + return ret; +} +bool jacson_parse(jacson_value *obj, const char *str) { + struct __jcsn_token *stream = __jacson_tokenize_string(str); + size_t off = 0; + if (stream == NULL) return false; /* ligma ballse */ + *obj = __jacson_value_parse(stream, &off); + array_free(stream); + return true; /* Yebalo peretrue */ +} +jacson_value jacson_wrap_null(void) { + return (struct jacson_value){.type = JSON_TYPE_NULL}; +} +jacson_value jacson_wrap_boolean(bool b) { + return (struct jacson_value){.type = JSON_TYPE_BOOLEAN, .boolean = b}; +} +jacson_value jacson_wrap_integer(long i) { + return (struct jacson_value){.type = JSON_TYPE_INTEGER, .integer = i}; +} +jacson_value jacson_wrap_real(double d) { + return (struct jacson_value){.type = JSON_TYPE_REAL, .real = d}; +} +jacson_value jacson_wrap_string(jacson_string s) { + return (struct jacson_value){.type = JSON_TYPE_STRING, .string = s}; +} +jacson_value jacson_wrap_string_literal(const char *s) { + return jacson_wrap_string(jacson_string_from_buffer(s)); +} +jacson_value jacson_wrap_array(jacson_array a) { + return (struct jacson_value){.type = JSON_TYPE_ARRAY, .array = a}; +} +jacson_value jacson_wrap_object(jacson_object o) { + return (struct jacson_value){.type = JSON_TYPE_OBJECT, .object = o}; +} + +void jacson_add(jacson_object *obj, const char *name, jacson_value val) { + struct jacson_pair p = {.name = jacson_string_from_buffer(name), val}; + hset_insert_copy(obj, &p); +} +void jacson_take_and_add(jacson_object *obj, jacson_string name, jacson_value val) { + struct jacson_pair p = {name, val}; + hset_insert_copy(obj, &p); +} +void jacson_copy_and_add(jacson_object *obj, const char *name, const struct jacson_value *val) { + struct jacson_pair p; + p.name = jacson_string_from_buffer(name); + p.value = jacson_value_deep_copy(val); + hset_insert_copy(obj, &p); +} +struct jacson_value *jacson_at(jacson_object *obj, const char *name) { + struct jacson_pair p = {(struct jacson_string){(char*)name, 0}, {0}}; + return hset_at(obj, &p); +} + +void jacson_array_reserve(jacson_array *arr, size_t s) { + arr->data = realloc(arr->data, s); + arr->capacity = s; +} +void jacson_array_append(jacson_array *arr, jacson_value val) { + if (arr->size + 1 >= arr->capacity) + jacson_array_reserve(arr, arr->size << 1); + arr->data[arr->size++] = val; +} +void jacson_array_append_copy(jacson_array *arr, const jacson_value *val) { + if (arr->size + 1 >= arr->capacity) + jacson_array_reserve(arr, arr->size << 1); + arr->data[arr->size++] = jacson_value_deep_copy(val);; +} +jacson_value *jacson_array_at(jacson_array *arr, size_t idx) { + if (idx <= 0 || idx >= arr->size) return NULL; + return arr->data + idx; +} + +bool jacson_is_null(struct jacson_value *obj) { return obj->type == JSON_TYPE_NULL; } +long jacson_get_integer(struct jacson_value *obj) { return obj->integer; } +double jacson_get_real(jacson_value *obj) { return obj->real; } +bool jacson_get_boolean(jacson_value *obj) { return obj->boolean; } +struct jacson_array jacson_get_array(jacson_value *obj) { return obj->array; } +jacson_object *jacson_get_object(jacson_value *obj) { return &obj->object; } + +bool jacson_get_to(jacson_value *obj, void *mem) { + switch (obj->type) { + case JSON_TYPE_NULL: memcpy(mem, "\0", 1); return false; + case JSON_TYPE_BOOLEAN: (memcpy(mem, &obj->boolean, sizeof (bool))); break; + case JSON_TYPE_INTEGER: memcpy(mem, &obj->integer, sizeof obj->integer); break; + case JSON_TYPE_REAL: memcpy(mem, &obj->real, sizeof obj->real); break; + case JSON_TYPE_STRING: memcpy(mem, &obj->string, sizeof obj->string); break; + case JSON_TYPE_ARRAY: memcpy(mem, &obj->array, sizeof obj->array); break; + case JSON_TYPE_OBJECT: memcpy(mem, &obj->object, sizeof obj->object); break; + } + return true; +} +bool jacson_try_get_to(jacson_value *obj, enum jacson_type type, void *mem) { + if (type == obj->type) jacson_get_to(obj, mem); + else return false; + return true; +} + + + +#endif // JACSON_IMPLEMENTATION +#endif // JAC_JACSON_H diff --git a/tests/json.c b/tests/json.c new file mode 100644 index 0000000..d420125 --- /dev/null +++ b/tests/json.c @@ -0,0 +1,66 @@ +#include <stdio.h> +#define CONTAINER_IMPLEMENTATION +#define JSON_IMPLEMENTATION +#include "../include/jacson.h" +int main() { + + const char *hardtest = + "{\n" + " \"validString\": \"This is a tEst string\",\n" + " \"EmptyString\": \"\",\n" + " \"numbEr\": 1234567890123456789,\n" + " \"nEgativENumbEr\": -98765.4321,\n" + " \"ExponEntialNumbEr\": 6.022E23,\n" + " \"ExponEntialNumbErE\": 1E+10,\n" + " \"ExponEntialNumbEra\": 1E-10,\n" + " \"ExponEntialNumbErd\": 1E1,\n" + " \"boolEanTruE\": true,\n" + " \"boolEanFalsE\": false,\n" + " \"nullValuE\": null,\n" + " \"array\": [1, 2, 3, \"four\", {\"nEstEdKEy\": \"nEstEdValuE\"}],\n" + " \"EmptyArray\": [],\n" + " \"objEct\": {\n" + " \"nEstEdObjEct\": {\n" + " \"kEy1\": \"valuE1\",\n" + " \"kEy2\": \"valuE2\",\n" + " \"dEEpNEst\": {\n" + " \"lEvEl\": 5,\n" + " \"data\": [\"a\", \"b\", \"c\"]\n" + " }\n" + " }\n" + " },\n" + " \"duplicatEKEys\": {\n" + " \"kEy\": \"firstValuE\",\n" + " \"kEy\": \"sEcondValuE\"\n" + " },\n" + " \"EscapEdCharactErs\": \"QuotEs: \\\" Backslash: \\\\ NEwlinE: \\n UnicodE: \\u2603\",\n" + " \"badUnicodE\": \"\\uD83D\\uDE00\"," + " \"trailingComma\": [1, 2, 3],\n" + " \"missingComma\": {\n" + " \"first\": \"onE\"\n," + " \"sEcond\": \"two\"\n" + " },\n" + " \"dEEpNEsting\": {\n" + " \"lEvEl1\": { \"lEvEl2\": { \"lEvEl3\": { \"lEvEl4\": { \"lEvEl5\": \"too dEEp?\" }}}}\n" + " },\n" + " \"nEgativEZEro\": -0,\n" + " \"utf8BOM\": \"\\uFEFFShould start with BOM\",\n" + " \"spEcialFloatValuEs\": {\n" + " \"NaN\": \"NaN\",\n" + " \"Infinity\": \"Infinity\",\n" + " \"NEgativEInfinity\": \"-Infinity\"\n" + " }\n" + "}"; + + struct jacson_value val; + + if (jacson_parse(&val, hardtest)) { + char* str = jacson_serialize(&val, JSON_SERIALIZE_PRETTY); + puts(str); + free(str); + jacson_value_free(val); + } else { + puts("error parsing jacson"); + } + return 0; +} |