aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjustanothercatgirl <sotov@twistea.su>2025-03-22 14:09:55 +0300
committerjustanothercatgirl <sotov@twistea.su>2025-03-22 14:09:55 +0300
commit0612bfa57649c4bffea0ca6e436539d8c3193a80 (patch)
tree67f98dcd8bf98194761c0ea3de4c2d48ca674bea
parentff8ab43201e5c862003d9353016409c13691562c (diff)
Added json serializer/parser
-rw-r--r--Makefile10
-rw-r--r--README.md3
-rw-r--r--include/container.h2
-rw-r--r--include/jacson.h856
-rw-r--r--tests/json.c66
5 files changed, 934 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index c24e590..3a44550 100644
--- a/Makefile
+++ b/Makefile
@@ -2,7 +2,7 @@
CC := gcc
CFLAGS := -g -Wall -Wextra -Wpedantic -Werror -Wno-language-extension-token -Wno-stringop-overread -Iinclude
-tests: container types log
+tests: container types log json
container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c \
tests/binary_search.c tests/linked_list.c tests/hashset.c \
@@ -30,14 +30,18 @@ container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c \
rm $@
-types: tests/types.c
+types: tests/types.c include/rstypes.h
$(CC) -o $@ tests/types.c $(CFLAGS)
./$@
rm $@
-log: tests/log.c
+log: tests/log.c include/log.h
$(CC) -o $@ tests/log.c $(CFLAGS)
- ./$@
rm $@
+json: tests/json.c include/jacson.h
+ $(CC) -o $@ tests/json.c $(CFLAGS)
+ ./$@
+ rm $@
diff --git a/README.md b/README.md
index d3d1a2c..362c237 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,9 @@ This is just a collection of (usually single-purpos) header files that I use ref
* Summary: Code generator for embedding resources directly into an executable.
* How to use: It exposes C interface, so in order to use it, you will need to have a C program that builds resources for you. It has it's own repo, but i will be adding CLI to there soon.
* Examples: see [this repo](https://github.com/justanothercatgirl/embed_test.c/)
+## [`json.h`](include/json.h)
+* Summary: json generator and parser that supports comments.
+* How to use: define `JSON_IMPLEMENTATION` macro. `CONTAINER_IMPLEMENTATION` MUST be defined as well; either somewhere earlier in the code before including `container.h` or just before `json.h`. this JSON implementation uses `array` and `hash_map`.
## `build.h`
nearest TODO for now.
* Summary: a build system based on C. To compile something under it, you do something like `cc -o builder builder.c && ./builder`. The idea is stolen from [Tsoding](https://example.com)
diff --git a/include/container.h b/include/container.h
index 1401162..5f40e4a 100644
--- a/include/container.h
+++ b/include/container.h
@@ -130,6 +130,8 @@ typedef void*(*memcpy_t)(void* restrict, const void*, size_t);
} while(0)
/* Removes last element from the array */
#define array_pop(array) --array_header(array)->size
+/* get last element of an array */
+#define array_last(array) (array)[array_header((array))->size - 1]
/* Reserve length elements so that subsequent (length - current_size) pushes require no reallocation */
#define array_reserve(array, length) do { array = _memreserve_dynarray(array, length); } while (0)
/* Change size of an array. the capacity is set to lowest power of 2 that is greater than length */
diff --git a/include/jacson.h b/include/jacson.h
new file mode 100644
index 0000000..046c1d7
--- /dev/null
+++ b/include/jacson.h
@@ -0,0 +1,856 @@
+#ifndef JAC_JACSON_H
+#define JAC_JACSON_H
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "container.h"
+
+
+
+typedef enum jacson_type {
+ /* primitive types */
+ JSON_TYPE_NULL,
+ JSON_TYPE_BOOLEAN,
+ /* this distinction is not standard but useful in C */
+ JSON_TYPE_INTEGER,
+ JSON_TYPE_REAL,
+ /* composite types */
+ JSON_TYPE_STRING,
+ JSON_TYPE_ARRAY,
+ JSON_TYPE_OBJECT,
+} jacson_type;
+
+/* NOSP or NOSPACE are the same, because
+ * serialization only checks `policy == PRETTY`
+ * You could pass 42069 to there as a way
+ * to remove spaces. Literally. */
+enum jacson_serialize_policy {
+ JSON_SERIALIZE_PRETTY = 0,
+ JSON_SERIALIZE_NOSP,
+ JSON_SERIALIZE_NOSPACE,
+};
+
+/* I am lazy */
+typedef struct hash_set jacson_object;
+
+/* jacson array (sized array) */
+typedef struct jacson_array {
+ struct jacson_value *data;
+ size_t size, capacity;
+} jacson_array;
+
+/* jacson string (sized string) */
+typedef struct jacson_string {
+ char *data;
+ size_t size;
+} jacson_string;
+
+/* jacson value (tagged union) */
+typedef struct jacson_value {
+ enum jacson_type type;
+ union {
+ bool boolean;
+ long integer;
+ double real;
+ struct jacson_string string;
+ struct jacson_array array;
+ jacson_object object;
+ };
+} jacson_value;
+
+/* represents name-value pair
+ * should probably not be used in user code*/
+struct jacson_pair {
+ struct jacson_string name;
+ struct jacson_value value;
+};
+
+
+
+/* helper hash functions */
+int jacson_pair_eq(const void *a, const void *b);
+size_t jacson_pair_hash(const void *a);
+
+/* object constructors / destructors */
+jacson_object jacson_object_new();
+jacson_object jacson_object_deep_copy(const jacson_object *other);
+void jacson_object_free(jacson_object *obj);
+/* string constructors / destructors */
+jacson_string jacson_string_from_buffer(const char *str);
+jacson_string jacson_string_from_buffer_n(const char *str, size_t s);
+jacson_string jacson_string_copy(const jacson_string *str);
+jacson_string jacson_string_mallocated(char *str);
+void jacson_string_free(jacson_string str);
+/* array constructors / destructors */
+jacson_array jacson_array_new(size_t reserve); /* initial size 0 */
+jacson_array jacson_array_from_buffer(const jacson_value *values, size_t s);
+jacson_array jacson_array_deep_copy(const jacson_array *arr);
+jacson_array jacson_array_mallocated(jacson_value *values, size_t s);
+void jacson_array_free(struct jacson_array arr);
+/* value constructors / destructors */
+jacson_value jacson_value_deep_copy(const jacson_value *other);
+void jacson_value_free(struct jacson_value val);
+
+/* serialization / parsing
+ * returned string must be freed */
+char *jacson_serialize(jacson_value *obj, enum jacson_serialize_policy);
+/* return value indicates success
+ * obj must be NOT initialized */
+bool jacson_parse(jacson_value *obj, const char *str);
+
+/* wrappers
+ * these functions take ownership of the memory
+ * (except for string literal one) */
+jacson_value jacson_wrap_null(void);
+jacson_value jacson_wrap_boolean(bool b);
+jacson_value jacson_wrap_integer(long i);
+jacson_value jacson_wrap_real(double d);
+jacson_value jacson_wrap_string(jacson_string s);
+jacson_value jacson_wrap_string_literal(const char *s);
+jacson_value jacson_wrap_array(jacson_array a);
+jacson_value jacson_wrap_object(jacson_object o);
+
+/* access: insert/read */
+/* transfers ownership of val, copies name */
+void jacson_add(jacson_object *obj, const char *name, jacson_value val);
+/* transfers ownership of val and name */
+void jacson_take_and_add(jacson_object *obj, struct jacson_string name, struct jacson_value val);
+/* copies both val and name */
+void jacson_copy_and_add(jacson_object *obj, const char *name, const jacson_value *val);
+jacson_value *jacson_at(jacson_object *obj, const char *name);
+
+/* same for arrays */
+void jacson_array_reserve(jacson_array *arr, size_t s);
+void jacson_array_append(jacson_array *arr, jacson_value val);
+void jacson_array_append_copy(jacson_array *arr, const jacson_value *val);
+jacson_value *jacson_array_at(jacson_array *arr, size_t idx);
+
+/* access: typed getters */
+bool jacson_is_null(jacson_value *obj);
+long jacson_get_integer(jacson_value *obj);
+double jacson_get_real(jacson_value *obj);
+bool jacson_get_boolean(jacson_value *obj);
+jacson_array jacson_get_array(jacson_value *obj);
+jacson_object *jacson_get_object(jacson_value *obj);
+
+/* access: untyped getters */
+bool jacson_get_to(jacson_value *obj, void *mem);
+bool jacson_try_get_to(jacson_value *obj, enum jacson_type type, void *mem);
+
+
+
+#define JACSON_IMPLEMENTATION
+#ifdef JACSON_IMPLEMENTATION
+
+#include <ctype.h>
+
+/* structure to represent intermediate parsing state
+ * should not be used in user code */
+struct __jcsn_token {
+ enum __jcsn_tokens {
+ __JCSN_PRIMITIVE_NUL = 1 << 0,
+ __JCSN_PRIMITIVE_BLN = 1 << 1,
+ __JCSN_PRIMITIVE_INT = 1 << 2,
+ __JCSN_PRIMITIVE_FLT = 1 << 3,
+ __JCSN_PRIMITIVE_STR = 1 << 4,
+ __JSCN_PRIMITIVE = __JCSN_PRIMITIVE_NUL | __JCSN_PRIMITIVE_BLN
+ | __JCSN_PRIMITIVE_INT | __JCSN_PRIMITIVE_FLT
+ | __JCSN_PRIMITIVE_STR,
+ __JSCN_TOK_CBO = 1 << 5, /* curly bracket open */
+ __JSCN_TOK_CBC = 1 << 6, /* curly bracket close */
+ __JSCN_TOK_SBO = 1 << 7, /* square bracket open */
+ __JSCN_TOK_SBC = 1 << 8, /* square bracket close */
+ __JSCN_TOK_OPN = __JSCN_TOK_CBO | __JSCN_TOK_SBO,
+ __JSCN_TOK_CLS = __JSCN_TOK_CBC | __JSCN_TOK_SBC,
+ __JSCN_TOK_COM = 1 << 9, /* a coma */
+ __JSCN_TOK_COL = 1 << 10, /* a colon */
+ __JSCN_VAL = __JSCN_PRIMITIVE | __JSCN_TOK_OPN,
+ __JSCN_TOK_EOF = 1 << 11, /* end of file */
+ __JSCN_TOK_INI = 1 << 12, /* initial token */
+ __JSCN_ERROR = 1 << 13, /* parsing error */
+ } type;
+ union {
+ bool b;
+ long l;
+ char *s;
+ double f;
+ };
+};
+enum __jacson_numtype {
+ __NT_FLT,
+ __NT_INT,
+ __NT_ERR, /* erronous number */
+};
+union __jacson_number {
+ long i;
+ double f;
+};
+enum __jacson_stack {
+ __ST_OBJ,
+ __ST_ARR,
+};
+
+
+
+/* helper serialization functions */
+size_t __jacson_rsnprintf(char **str, size_t *strsz, size_t off, const char *fmt, ...);
+size_t __jacson_rsnputc(char **str, size_t *strsz, size_t off, char c);
+size_t __jacson_array_serialize(struct jacson_array *arr, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation);
+size_t __jacson_object_serialize(jacson_object *arr, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation);
+size_t __jacson_value_serialize(struct jacson_value *val, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation);
+
+
+/* resize string numbered print formatted */
+size_t __jacson_rsnprintf(char **const str, size_t *strsz, size_t off, const char *fmt, ...) {
+ va_list ls, ls1;
+ size_t ln;
+ va_start(ls, fmt);
+try_write:
+ va_copy(ls1, ls);
+ ln = vsnprintf(*str + off, *strsz-off, fmt, ls1);
+ va_end(ls1);
+ if (ln >= *strsz-off) {
+ *strsz <<= 1;
+ *str = realloc(*str, *strsz);
+ goto try_write;
+ }
+ va_end(ls);
+ return ln;
+}
+size_t __jacson_rsnputc(char **const str, size_t *strsz, size_t off, char c) {
+ if (*strsz - off <= 1) {
+ *strsz <<= 1;
+ *str = realloc(*str, *strsz);
+ }
+ (*str)[off++] = c;
+ (*str)[off] = '\0';
+ return 1;
+}
+
+int jacson_pair_eq(const void *a, const void *b) {
+ const struct jacson_pair *_a = a;
+ const struct jacson_pair *_b = b;
+ printf("comparing %s(%zu), %s(%zu)\n", _a->name.data, _a->name.size, _b->name.data, _b->name.size);
+ return _a->name.size != _b->name.size
+ || strcmp(_a->name.data, _b->name.data);
+}
+size_t jacson_pair_hash(const void *a) {
+#define HASHMUL 7
+ const struct jacson_pair *_a = a;
+ size_t ret = 0;
+ char *retmem = (void*)&ret;
+ size_t i = 0;
+ for (i = 0; i < _a->name.size / __SIZEOF_SIZE_T__; i++)
+ for (size_t j = 0; j < __SIZEOF_SIZE_T__; ++j)
+ retmem[j] ^= _a->name.data[i*__SIZEOF_SIZE_T__+j] * HASHMUL;
+ for (size_t j = i; j < _a->name.size; ++j)
+ retmem[j % __SIZEOF_SIZE_T__] ^= _a->name.data[j] * HASHMUL;
+
+ return ret;
+}
+
+jacson_object jacson_object_new() {
+ return hset_new(sizeof (struct jacson_pair), jacson_pair_eq, jacson_pair_hash);
+}
+jacson_object jacson_object_deep_copy(const jacson_object *other) {
+ jacson_object ret = hset_new(sizeof (struct jacson_pair), jacson_pair_eq, jacson_pair_hash);
+ hset_rehash_to_size(&ret, array_size(other->buckets));
+ struct hash_set_iter iter;
+ for (hseti_begin(other, &iter); !hseti_end(&iter); hseti_next(&iter)) {
+ struct jacson_pair *old = hseti_get(&iter);
+ struct jacson_pair new_p = {
+ .name = jacson_string_copy(&old->name),
+ .value = jacson_value_deep_copy(&old->value)
+ };
+ hset_insert_copy(&ret, &new_p);
+ }
+ return ret;
+}
+void jacson_object_free(jacson_object *obj) {
+ struct hash_set_iter iter;
+ for (hseti_begin(obj, &iter); !hseti_end(&iter); hseti_next(&iter)) {
+ struct jacson_pair *p = hseti_get(&iter);
+ free(p->name.data);
+ jacson_value_free(p->value);
+ }
+ hset_free(obj);
+}
+jacson_string jacson_string_from_buffer(const char *str) {
+ struct jacson_string ret;
+ ret.size = strlen(str);
+ ret.data = malloc(ret.size + 1);
+ /* copy includes '\0' */
+ memcpy(ret.data, str, ret.size + 1);
+ return ret;
+}
+jacson_string jacson_string_from_buffer_n(const char *str, size_t s) {
+ struct jacson_string ret;
+ ret.size = s;
+ ret.data = malloc(s + 1);
+ memcpy(ret.data, str, s + 1);
+ ret.data[s] = '\0';
+ return ret;
+}
+jacson_string jacson_string_copy(const jacson_string *str) {
+ jacson_string ret = {.data = malloc(str->size + 1), .size = str->size};
+ memcpy(ret.data, str->data, ret.size+1);
+ return ret;
+}
+jacson_string jacson_string_mallocated(char *str) {
+ return (struct jacson_string){.size = strlen(str), .data = str};
+}
+void jacson_string_free(jacson_string str) {
+ free(str.data);
+}
+jacson_array jacson_array_new(size_t reserve) {
+ struct jacson_array ret = {
+ .data = malloc(sizeof (struct jacson_value) * reserve),
+ .size = 0,
+ .capacity = reserve
+ };
+ return ret;
+}
+jacson_array jacson_array_from_buffer(const jacson_value *values, size_t s) {
+ struct jacson_array ret = {
+ .data = malloc(s * sizeof (struct jacson_value)),
+ .size = s,
+ .capacity = s};
+ for (size_t i = 0; i < s; ++i) ret.data[i] = jacson_value_deep_copy(values + i);
+ return ret;
+}
+jacson_array jacson_array_deep_copy(const jacson_array *arr) {
+ struct jacson_array ret = {
+ .data = malloc(arr->size * sizeof (struct jacson_value)),
+ .size = arr->size,
+ .capacity = arr->size};
+ for (size_t i = 0; i < ret.size; ++i) {
+ ret.data[i] = jacson_value_deep_copy(arr->data + i);
+ }
+ return ret;
+}
+jacson_array jacson_array_mallocated(jacson_value *values, size_t s) {
+ return (struct jacson_array){.data = values, .size = s, .capacity = s};
+}
+void jacson_array_free(struct jacson_array arr) {
+ for (size_t i = 0; i < arr.size; ++i)
+ jacson_value_free(arr.data[i]);
+ free(arr.data);
+}
+jacson_value jacson_value_deep_copy(const jacson_value *val) {
+ jacson_value ret = {.type = val->type};
+ switch (ret.type) {
+ case JSON_TYPE_NULL: break;
+ case JSON_TYPE_BOOLEAN: ret.boolean = val->boolean; break;
+ case JSON_TYPE_INTEGER: ret.integer = val->integer ; break;
+ case JSON_TYPE_REAL: ret.real = val->real ; break;
+ case JSON_TYPE_STRING: ret.string = jacson_string_copy(&val->string); break;
+ case JSON_TYPE_ARRAY: ret.array = jacson_array_deep_copy(&val->array); break;
+ case JSON_TYPE_OBJECT: ret.object = jacson_object_deep_copy(&val->object); break;
+ }
+ return ret;
+}
+void jacson_value_free(struct jacson_value val) {
+ switch(val.type) {
+ case JSON_TYPE_NULL:
+ case JSON_TYPE_BOOLEAN:
+ case JSON_TYPE_REAL:
+ case JSON_TYPE_INTEGER: return;
+ case JSON_TYPE_STRING: free(val.string.data); return;
+ case JSON_TYPE_ARRAY: jacson_array_free(val.array); return;
+ case JSON_TYPE_OBJECT: jacson_object_free(&val.object); return;
+ }
+}
+
+size_t __jacson_value_serialize(struct jacson_value *val, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation) {
+ switch(val->type) {
+ case JSON_TYPE_NULL: off += __jacson_rsnprintf(str, strsz, off, "null"); break;
+ case JSON_TYPE_BOOLEAN: off += __jacson_rsnprintf(str, strsz, off, val->boolean ? "true" : "false"); break;
+ case JSON_TYPE_INTEGER: off += __jacson_rsnprintf(str, strsz, off, "%li", val->integer); break;
+ case JSON_TYPE_REAL: off += __jacson_rsnprintf(str, strsz, off, "%lf", val->real); break;
+ case JSON_TYPE_STRING: off += __jacson_rsnprintf(str, strsz, off, "\"%s\"", val->string.data); break;
+ case JSON_TYPE_ARRAY: off = __jacson_array_serialize(&val->array, str, strsz, off, policy, identation); break;
+ case JSON_TYPE_OBJECT: off = __jacson_object_serialize(&val->object, str, strsz, off, policy, identation); break;
+ }
+ return off;
+}
+size_t __jacson_array_serialize(struct jacson_array *arr, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation) {
+ off += __jacson_rsnputc(str, strsz, off, '[');
+ identation+= 2;
+ bool coma = false;
+ for (size_t i = 0; i < arr->size; ++i) {
+ if (coma) off += __jacson_rsnputc(str, strsz, off, ',');
+ if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnprintf(str, strsz, off, "\n%*s", identation*4, "");
+ off = __jacson_value_serialize(arr->data + i, str, strsz, off, policy, identation);
+ coma = true;
+ }
+ identation -= 2;
+ if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnprintf(str, strsz, off, "\n%*s", identation*4, "");
+ off += __jacson_rsnputc(str, strsz, off, ']');
+ return off;
+}
+size_t __jacson_object_serialize(jacson_object *obj, char **str, size_t *strsz, size_t off, enum jacson_serialize_policy policy, int identation) {
+ struct hash_set_iter iter;
+ bool coma = false;
+ off += __jacson_rsnputc(str, strsz, off, '{');
+ ++identation;
+ for (hseti_begin(obj, &iter); !hseti_end(&iter); hseti_next(&iter)) {
+ struct jacson_pair *p = hseti_get(&iter);
+ if (coma) off += __jacson_rsnputc(str, strsz, off, ',');
+ if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnprintf(str, strsz, off, "\n%*s", identation*4, "");
+ off += __jacson_rsnprintf(str, strsz, off, "\"%s\":", p->name.data);
+ if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnputc(str, strsz, off, ' ');
+ off = __jacson_value_serialize(&p->value, str, strsz, off, policy, identation);
+ coma = true;
+ }
+ --identation;
+ if (policy == JSON_SERIALIZE_PRETTY) off += __jacson_rsnprintf(str, strsz, off, "\n%*s", identation*4, "");
+ off += __jacson_rsnputc(str, strsz, off, '}');
+ return off;
+}
+
+char *jacson_serialize(jacson_value *obj, enum jacson_serialize_policy policy) {
+ size_t strsz = 128;
+ char *str = malloc(strsz);
+ __jacson_value_serialize(obj, &str, &strsz, 0, policy, 0);
+ return str;
+}
+/* after return points either to the beginning of next token
+ * (if there was whitespace), or to the end of previous one
+ * (if there was no whitespace) */
+static void __jacson_skipws(char const ** const str) {
+ while (true) {
+ switch(**str) {
+ case 0x20:
+ case 0x09:
+ case 0x0A:
+ case 0x0D: ++*str; continue;
+ default: return;
+ }
+ }
+}
+static enum __jacson_numtype __jacson_numtype(const char **const str, union __jacson_number *res) {
+ bool iflag = true;
+ const char *begin = *str;
+ if (**str == '-' || **str == '+') ++*str;
+ if (!isdigit(**str)) return __NT_ERR;
+ while (isdigit(**str)) ++*str;
+
+ if (**str == '.' || **str == 'e' || **str == 'E') {
+ iflag = false;
+ if (**str == '.') {
+ ++*str;
+ if (!isdigit(**str)) return __NT_ERR;
+ while (isdigit(**str)) ++*str;
+ }
+ if (**str == 'e' || **str == 'E') {
+ ++*str;
+ if (**str == '-' || **str == '+') ++*str;
+ if (!isdigit(**str)) return __NT_ERR;
+ while (isdigit(**str)) ++*str;
+ }
+ }
+ if (iflag) res->i = atol(begin);
+ else res->f = atof(begin);
+ return iflag ? __NT_INT : __NT_FLT;
+}
+static unsigned short __jacson_hex16(const char *str, bool *e) {
+ unsigned short ret = 0;
+ for (int i = 0; i < 4; ++i) {
+ unsigned short val = 0;
+ if (isdigit(str[i]))
+ val = str[i] - '0';
+ else if ('a' <= str[i] && str[i] <= 'f')
+ val = str[i] - 'a' + 10;
+ else if ('A' <= str[i] && str[i] <= 'F')
+ val = str[i] - 'A' + 10;
+ else {
+ *e = false;
+ return 0xFFFF;
+ }
+ ret |= val << (4 * (3-i));
+ }
+ *e = true;
+ return ret;
+}
+static char __jacson_get_utf8(const char **const str, char *buf) {
+ bool val;
+ ++*str; /* skip u */
+ unsigned short b1 = __jacson_hex16(*str, &val), b2;
+ int codepoint;
+ if (!val) return 0;
+ *str += 4;
+ if (0xD800 <= b1 && b1 <= 0xDBFF) {
+ if (*(*str)++ != '\\') return 0;
+ if (*(*str)++ != 'u') return 0;
+ b2 = __jacson_hex16(*str, &val);
+ *str += 4;
+ if (!val || 0xDC00 > b2 || b2 > 0xDFFF) return 0;
+ codepoint = 0x10000 + ((b1 - 0xD800) << 10) + (b2 - 0xDC00);
+ } else codepoint = b1;
+ if (codepoint <= 0x7F) {
+ buf[0] = codepoint;
+ return 1;
+ } else if (codepoint <= 0x7FF) {
+ buf[0] = 0xC0 | ((codepoint >> 6) & 0x1F);
+ buf[1] = 0x80 | ((codepoint) & 0x3F);
+ return 2;
+ } else if (codepoint <= 0xFFFF) {
+ buf[0] = 0xE0 | ((codepoint >> 12) & 0xF);
+ buf[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+ buf[2] = 0x80 | ((codepoint) & 0x3F);
+ return 3;
+ } else if (codepoint <= 0x10FFFF){
+ buf[0] = 0xF0 | ((codepoint >> 18) & 0x7);
+ buf[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+ buf[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+ buf[3] = 0x80 | ((codepoint) & 0x3F);
+ return 4;
+ } else return 0;
+}
+static char *__jacson_parse_string(const char **const str) {
+ /* MUST be escaped: quotation mark, reverse solidus,
+ * and the control characters (U+0000 through U+001F). */
+ ++*str; /* skip " */
+
+ size_t size = 16, off = 0;
+ char *ret = malloc(size);
+ ret[0] = '\0'; /* edge case */
+ while (**str != '"') {
+ if (**str != '\\') off += __jacson_rsnputc(&ret, &size, off, **str);
+ else switch (*++*str) {
+ case '"':
+ case '/':
+ case '\\':off += __jacson_rsnputc(&ret, &size, off, **str); break;
+ case 't': off += __jacson_rsnputc(&ret, &size, off, '\t'); break;
+ case 'r': off += __jacson_rsnputc(&ret, &size, off, '\r'); break;
+ case 'n': off += __jacson_rsnputc(&ret, &size, off, '\n'); break;
+ case 'f': off += __jacson_rsnputc(&ret, &size, off, '\f'); break;
+ case 'b': off += __jacson_rsnputc(&ret, &size, off, '\b'); break;
+ case 'u': {
+ char buffer[4]; /* utf-8 */
+ char sz = __jacson_get_utf8(str, buffer);
+ if (sz == 0) {
+ free(ret);
+ return NULL;
+ }
+ off += __jacson_rsnprintf(&ret, &size, off, "%.*s", (int)sz, buffer);
+ continue;
+ } break;
+ default: free(ret); return NULL;
+ }
+ ++*str;
+ }
+ ++*str;
+ return ret;
+}
+#ifdef DEBUG
+static void __jacson_print_token(struct __token t, FILE *f) {
+ fprintf(f, "token { .type = 0x%04X (", t.type);
+ switch (t.type) {
+ case __PRIMITIVE_NUL: fprintf(f, "NUL), .val = null"); break;
+ case __PRIMITIVE_BLN: fprintf(f, "BLN), .val = %s", t.b ? "true" : "false"); break;
+ case __PRIMITIVE_INT: fprintf(f, "INT), .val = %li", t.l); break;
+ case __PRIMITIVE_FLT: fprintf(f, "FLT), .val = %lf", t.f); break;
+ case __PRIMITIVE_STR: fprintf(f, "STR), .val = \"%s\"", t.s); break;
+ case __TOK_CBO: fprintf(f, "CBO)"); break;
+ case __TOK_CBC: fprintf(f, "CBC)"); break;
+ case __TOK_SBO: fprintf(f, "SBO)"); break;
+ case __TOK_SBC: fprintf(f, "SBC)"); break;
+ case __TOK_COM: fprintf(f, "COM)"); break;
+ case __TOK_COL: fprintf(f, "COL)"); break;
+ case __TOK_EOF: fprintf(f, "EOF)"); break;
+ case __TOK_INI: fprintf(f, "INI)"); break;
+ case __ERROR: fprintf(f, "ERROR)"); break;
+ default: fprintf(f, "UNKNOWN)"); break;
+ }
+ fprintf(f, " }\n");
+}
+#endif /* DEBUG */
+static struct __jcsn_token __jacson_get_token(const char **const str, enum __jcsn_tokens tokens) {
+ __jacson_skipws(str);
+ struct __jcsn_token tok;
+ switch (**str) {
+ case '{': tok.type = __JSCN_TOK_CBO; ++*str; break;
+ case '}': tok.type = __JSCN_TOK_CBC; ++*str; break;
+ case '[': tok.type = __JSCN_TOK_SBO; ++*str; break;
+ case ']': tok.type = __JSCN_TOK_SBC; ++*str; break;
+ case ',': tok.type = __JSCN_TOK_COM; ++*str; break;
+ case ':': tok.type = __JSCN_TOK_COL; ++*str; break;
+ case '\0': tok.type = __JSCN_TOK_EOF; return tok;
+ case '+':
+ case '-':
+ case '0': /* this could have been case '0'...'9', but I removed it for interoperability */
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ union __jacson_number n;
+ switch(__jacson_numtype(str, &n)) {
+ case __NT_FLT: tok.type = __JCSN_PRIMITIVE_FLT; tok.f = n.f; break;
+ case __NT_INT: tok.type = __JCSN_PRIMITIVE_INT; tok.l = n.i; break;
+ case __NT_ERR: tok.type = __JSCN_ERROR; break;
+ }
+ } break;
+ case 't': if (!strncmp((*str)+1, "rue", 3)) {
+ tok.type = __JCSN_PRIMITIVE_BLN;
+ tok.b = true;
+ *str += 4;
+ } else tok.type = __JSCN_ERROR;
+ break;
+ case 'f': if (!strncmp((*str)+1, "alse", 4)) {
+ tok.type = __JCSN_PRIMITIVE_BLN;
+ tok.b = false;
+ *str += 5;
+ } else tok.type = __JSCN_ERROR;
+ break;
+ case 'n': if (!strncmp((*str)+1, "ull", 3)) {
+ tok.type = __JCSN_PRIMITIVE_NUL;
+ *str += 4;
+ } else tok.type = __JSCN_ERROR;
+ break;
+ case '"': {
+ tok.s = __jacson_parse_string(str);
+ if (tok.s == NULL) tok.type = __JSCN_ERROR;
+ else tok.type = __JCSN_PRIMITIVE_STR;
+ } break;
+ default: tok.type = __JSCN_ERROR; break;
+ }
+ if ((tok.type & tokens) == 0)
+ tok.type = __JSCN_ERROR;
+ return tok;
+}
+/* Caller can assume that token stream is valid jacson
+ * (e.g. it can not start with colon)
+ * Since the validity check is done, tokens like ':' and ','
+ * are not in the stream. */
+static struct __jcsn_token *__jacson_tokenize_string(const char *str) {
+ struct __jcsn_token *tokens = array_new(struct __jcsn_token, 0);
+ enum __jacson_stack *stack = array_new(enum __jacson_stack, 0);
+ bool str_iskey = false;
+ array_reserve(tokens, 16);
+ array_reserve(stack, 16);
+ enum __jcsn_tokens allowed = __JSCN_VAL;
+ for (struct __jcsn_token tok = {.type = __JSCN_TOK_INI}; tok.type != __JSCN_ERROR; tok = __jacson_get_token(&str, allowed)) {
+ switch (tok.type) {
+ case __JSCN_TOK_INI: continue;
+ case __JSCN_TOK_CBO:
+ array_push(stack, __ST_OBJ);
+ str_iskey = true;
+ allowed = __JCSN_PRIMITIVE_STR | __JSCN_TOK_CBC;
+ break;
+ case __JSCN_TOK_SBO:
+ array_push(stack, __ST_ARR);
+ str_iskey = false;
+ allowed = __JSCN_VAL | __JSCN_TOK_SBC;
+ break;
+ case __JSCN_TOK_CBC:
+ if (array_last(stack) != __ST_OBJ) goto evacuate;
+ array_pop(stack);
+ allowed = __JSCN_TOK_COM | __JSCN_TOK_CLS;
+ str_iskey = array_last(stack) == __ST_OBJ;
+ break;
+ case __JSCN_TOK_SBC:
+ if (array_last(stack) != __ST_ARR) goto evacuate;
+ array_pop(stack);
+ allowed = __JSCN_TOK_COM | __JSCN_TOK_CLS;
+ str_iskey = array_last(stack) == __ST_OBJ;
+ break;
+ case __JSCN_TOK_COM:
+ allowed = __JCSN_PRIMITIVE_STR;
+ if (array_last(stack) == __ST_ARR)
+ allowed |= __JSCN_VAL;
+ /* TODO: check if removing this breaks code */
+ /* DONE: yes it does */
+ str_iskey = array_last(stack) == __ST_OBJ;
+ break;
+ case __JSCN_TOK_COL:
+ allowed = __JSCN_VAL;
+ str_iskey = false;
+ break;
+ case __JSCN_TOK_EOF:
+ if (array_size(stack) == 0) goto endwhile;
+ else goto evacuate;
+ case __JCSN_PRIMITIVE_STR:
+ if (str_iskey) allowed = __JSCN_TOK_COL;
+ else allowed = __JSCN_TOK_COM | __JSCN_TOK_CLS;
+ break;
+ case __JCSN_PRIMITIVE_NUL:
+ case __JCSN_PRIMITIVE_BLN:
+ case __JCSN_PRIMITIVE_INT:
+ case __JCSN_PRIMITIVE_FLT:
+ allowed = __JSCN_TOK_COM | __JSCN_TOK_CLS;
+ break;
+ case __JSCN_ERROR: goto evacuate;
+ default: UNREACHABLE;
+ }
+ if (tok.type & (__JSCN_PRIMITIVE | __JSCN_TOK_OPN | __JSCN_TOK_CLS)) array_push(tokens, tok);
+ if (array_size(stack) == 0) {
+ tok = __jacson_get_token(&str, 0xFFFFFFFF);
+ if (tok.type != __JSCN_TOK_EOF) goto evacuate;
+ }
+ }
+endwhile:
+ if (array_size(stack) != 0) goto evacuate;
+ array_free(stack);
+ return tokens;
+evacuate:
+ for (size_t i= 0; i < array_size(tokens); ++i) {
+ if (tokens[i].type == __JCSN_PRIMITIVE_STR) free(tokens[i].s);
+ }
+ array_free(tokens);
+ array_free(stack);
+ return NULL;
+}
+jacson_object __jacson_object_parse(struct __jcsn_token *stream, size_t *offset);
+struct jacson_array __jacson_array_parse(struct __jcsn_token *stream, size_t *offset);
+
+/* stream must point to one of the token types listed in switch statement */
+struct jacson_value __jacson_value_parse(struct __jcsn_token *stream, size_t *offset) {
+ /* A wacky way to access element at index 0 */
+ switch (stream[*offset].type) {
+ case __JCSN_PRIMITIVE_NUL: return jacson_wrap_null();
+ case __JCSN_PRIMITIVE_BLN: return jacson_wrap_boolean(stream[(*offset)].b);
+ case __JCSN_PRIMITIVE_INT: return jacson_wrap_integer(stream[(*offset)].l);
+ case __JCSN_PRIMITIVE_FLT: return jacson_wrap_real(stream[(*offset)].f);
+ case __JCSN_PRIMITIVE_STR: return jacson_wrap_string(jacson_string_mallocated(stream[(*offset)].s));
+ case __JSCN_TOK_CBO: return jacson_wrap_object(__jacson_object_parse(stream, offset));
+ case __JSCN_TOK_SBO: return jacson_wrap_array(__jacson_array_parse(stream, offset));
+ default: UNREACHABLE;
+ }
+}
+/* The stream must point to '{' token */
+jacson_object __jacson_object_parse(struct __jcsn_token *stream, size_t *offset) {
+ jacson_object ret = jacson_object_new();
+ while (stream[++*offset].type != __JSCN_TOK_CBC) {
+ struct jacson_pair newp;
+ /* __tokenize_string has already done checks for validity */
+ newp.name = jacson_string_mallocated(stream[(*offset)++].s);
+ newp.value = __jacson_value_parse(stream, offset);
+ struct jacson_pair *oldval;
+ if ((oldval = hset_at(&ret, &newp)) != NULL) {
+ struct jacson_pair oldcpy = *oldval;
+ hset_remove(&ret, oldval);
+ free(oldcpy.name.data);
+ jacson_value_free(oldcpy.value);
+ }
+ hset_insert_copy(&ret, &newp);
+ }
+ return ret;
+}
+/* The stream must point to '[' tokenb */
+struct jacson_array __jacson_array_parse(struct __jcsn_token *stream, size_t *offset) {
+ struct jacson_array ret = jacson_array_new(16);
+ while (stream[++*offset].type != __JSCN_TOK_SBC) {
+ jacson_array_append(&ret, __jacson_value_parse(stream, offset));
+ }
+ return ret;
+}
+bool jacson_parse(jacson_value *obj, const char *str) {
+ struct __jcsn_token *stream = __jacson_tokenize_string(str);
+ size_t off = 0;
+ if (stream == NULL) return false; /* ligma ballse */
+ *obj = __jacson_value_parse(stream, &off);
+ array_free(stream);
+ return true; /* Yebalo peretrue */
+}
+jacson_value jacson_wrap_null(void) {
+ return (struct jacson_value){.type = JSON_TYPE_NULL};
+}
+jacson_value jacson_wrap_boolean(bool b) {
+ return (struct jacson_value){.type = JSON_TYPE_BOOLEAN, .boolean = b};
+}
+jacson_value jacson_wrap_integer(long i) {
+ return (struct jacson_value){.type = JSON_TYPE_INTEGER, .integer = i};
+}
+jacson_value jacson_wrap_real(double d) {
+ return (struct jacson_value){.type = JSON_TYPE_REAL, .real = d};
+}
+jacson_value jacson_wrap_string(jacson_string s) {
+ return (struct jacson_value){.type = JSON_TYPE_STRING, .string = s};
+}
+jacson_value jacson_wrap_string_literal(const char *s) {
+ return jacson_wrap_string(jacson_string_from_buffer(s));
+}
+jacson_value jacson_wrap_array(jacson_array a) {
+ return (struct jacson_value){.type = JSON_TYPE_ARRAY, .array = a};
+}
+jacson_value jacson_wrap_object(jacson_object o) {
+ return (struct jacson_value){.type = JSON_TYPE_OBJECT, .object = o};
+}
+
+void jacson_add(jacson_object *obj, const char *name, jacson_value val) {
+ struct jacson_pair p = {.name = jacson_string_from_buffer(name), val};
+ hset_insert_copy(obj, &p);
+}
+void jacson_take_and_add(jacson_object *obj, jacson_string name, jacson_value val) {
+ struct jacson_pair p = {name, val};
+ hset_insert_copy(obj, &p);
+}
+void jacson_copy_and_add(jacson_object *obj, const char *name, const struct jacson_value *val) {
+ struct jacson_pair p;
+ p.name = jacson_string_from_buffer(name);
+ p.value = jacson_value_deep_copy(val);
+ hset_insert_copy(obj, &p);
+}
+struct jacson_value *jacson_at(jacson_object *obj, const char *name) {
+ struct jacson_pair p = {(struct jacson_string){(char*)name, 0}, {0}};
+ return hset_at(obj, &p);
+}
+
+void jacson_array_reserve(jacson_array *arr, size_t s) {
+ arr->data = realloc(arr->data, s);
+ arr->capacity = s;
+}
+void jacson_array_append(jacson_array *arr, jacson_value val) {
+ if (arr->size + 1 >= arr->capacity)
+ jacson_array_reserve(arr, arr->size << 1);
+ arr->data[arr->size++] = val;
+}
+void jacson_array_append_copy(jacson_array *arr, const jacson_value *val) {
+ if (arr->size + 1 >= arr->capacity)
+ jacson_array_reserve(arr, arr->size << 1);
+ arr->data[arr->size++] = jacson_value_deep_copy(val);;
+}
+jacson_value *jacson_array_at(jacson_array *arr, size_t idx) {
+ if (idx <= 0 || idx >= arr->size) return NULL;
+ return arr->data + idx;
+}
+
+bool jacson_is_null(struct jacson_value *obj) { return obj->type == JSON_TYPE_NULL; }
+long jacson_get_integer(struct jacson_value *obj) { return obj->integer; }
+double jacson_get_real(jacson_value *obj) { return obj->real; }
+bool jacson_get_boolean(jacson_value *obj) { return obj->boolean; }
+struct jacson_array jacson_get_array(jacson_value *obj) { return obj->array; }
+jacson_object *jacson_get_object(jacson_value *obj) { return &obj->object; }
+
+bool jacson_get_to(jacson_value *obj, void *mem) {
+ switch (obj->type) {
+ case JSON_TYPE_NULL: memcpy(mem, "\0", 1); return false;
+ case JSON_TYPE_BOOLEAN: (memcpy(mem, &obj->boolean, sizeof (bool))); break;
+ case JSON_TYPE_INTEGER: memcpy(mem, &obj->integer, sizeof obj->integer); break;
+ case JSON_TYPE_REAL: memcpy(mem, &obj->real, sizeof obj->real); break;
+ case JSON_TYPE_STRING: memcpy(mem, &obj->string, sizeof obj->string); break;
+ case JSON_TYPE_ARRAY: memcpy(mem, &obj->array, sizeof obj->array); break;
+ case JSON_TYPE_OBJECT: memcpy(mem, &obj->object, sizeof obj->object); break;
+ }
+ return true;
+}
+bool jacson_try_get_to(jacson_value *obj, enum jacson_type type, void *mem) {
+ if (type == obj->type) jacson_get_to(obj, mem);
+ else return false;
+ return true;
+}
+
+
+
+#endif // JACSON_IMPLEMENTATION
+#endif // JAC_JACSON_H
diff --git a/tests/json.c b/tests/json.c
new file mode 100644
index 0000000..d420125
--- /dev/null
+++ b/tests/json.c
@@ -0,0 +1,66 @@
+#include <stdio.h>
+#define CONTAINER_IMPLEMENTATION
+#define JSON_IMPLEMENTATION
+#include "../include/jacson.h"
+int main() {
+
+ const char *hardtest =
+ "{\n"
+ " \"validString\": \"This is a tEst string\",\n"
+ " \"EmptyString\": \"\",\n"
+ " \"numbEr\": 1234567890123456789,\n"
+ " \"nEgativENumbEr\": -98765.4321,\n"
+ " \"ExponEntialNumbEr\": 6.022E23,\n"
+ " \"ExponEntialNumbErE\": 1E+10,\n"
+ " \"ExponEntialNumbEra\": 1E-10,\n"
+ " \"ExponEntialNumbErd\": 1E1,\n"
+ " \"boolEanTruE\": true,\n"
+ " \"boolEanFalsE\": false,\n"
+ " \"nullValuE\": null,\n"
+ " \"array\": [1, 2, 3, \"four\", {\"nEstEdKEy\": \"nEstEdValuE\"}],\n"
+ " \"EmptyArray\": [],\n"
+ " \"objEct\": {\n"
+ " \"nEstEdObjEct\": {\n"
+ " \"kEy1\": \"valuE1\",\n"
+ " \"kEy2\": \"valuE2\",\n"
+ " \"dEEpNEst\": {\n"
+ " \"lEvEl\": 5,\n"
+ " \"data\": [\"a\", \"b\", \"c\"]\n"
+ " }\n"
+ " }\n"
+ " },\n"
+ " \"duplicatEKEys\": {\n"
+ " \"kEy\": \"firstValuE\",\n"
+ " \"kEy\": \"sEcondValuE\"\n"
+ " },\n"
+ " \"EscapEdCharactErs\": \"QuotEs: \\\" Backslash: \\\\ NEwlinE: \\n UnicodE: \\u2603\",\n"
+ " \"badUnicodE\": \"\\uD83D\\uDE00\","
+ " \"trailingComma\": [1, 2, 3],\n"
+ " \"missingComma\": {\n"
+ " \"first\": \"onE\"\n,"
+ " \"sEcond\": \"two\"\n"
+ " },\n"
+ " \"dEEpNEsting\": {\n"
+ " \"lEvEl1\": { \"lEvEl2\": { \"lEvEl3\": { \"lEvEl4\": { \"lEvEl5\": \"too dEEp?\" }}}}\n"
+ " },\n"
+ " \"nEgativEZEro\": -0,\n"
+ " \"utf8BOM\": \"\\uFEFFShould start with BOM\",\n"
+ " \"spEcialFloatValuEs\": {\n"
+ " \"NaN\": \"NaN\",\n"
+ " \"Infinity\": \"Infinity\",\n"
+ " \"NEgativEInfinity\": \"-Infinity\"\n"
+ " }\n"
+ "}";
+
+ struct jacson_value val;
+
+ if (jacson_parse(&val, hardtest)) {
+ char* str = jacson_serialize(&val, JSON_SERIALIZE_PRETTY);
+ puts(str);
+ free(str);
+ jacson_value_free(val);
+ } else {
+ puts("error parsing jacson");
+ }
+ return 0;
+}