From ebad5ad23dc6893b0fb75ba04aa961a25a532b5f Mon Sep 17 00:00:00 2001 From: justanothercatgirl Date: Fri, 28 Jun 2024 00:06:40 +0300 Subject: Finished hash_map implementation TODO: fix that one linked list function --- Makefile | 17 +++--- compile_flags.txt | 1 + include/container.h | 144 +++++++++++++++++++++++++++++++++++++----------- include/hashset.h | 0 tests/binary_search.c | 2 +- tests/dynarray_struct.c | 2 +- tests/hmap.c | 105 +++++++++++++++++++++++++++++++++++ tests/linked_list.c | 11 ++-- tests/obscure.c | 2 +- tests/types.c | 3 +- 10 files changed, 240 insertions(+), 47 deletions(-) delete mode 100644 include/hashset.h create mode 100644 tests/hmap.c diff --git a/Makefile b/Makefile index 1e1397a..4824636 100644 --- a/Makefile +++ b/Makefile @@ -2,11 +2,9 @@ CC := gcc CFLAGS := -g -Wall -Wextra -Wpedantic -Werror -Wno-language-extension-token +tests: container types -tests: container - - -container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c tests/binary_search.c tests/types.c +container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c tests/binary_search.c include/container.h $(CC) $(CFLAGS) -o $@ $< -DTT_CHAR ./$@ $(CC) $(CFLAGS) -o $@ $< -DTT_SHORT @@ -21,10 +19,15 @@ container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c tests/binary ./$@ $(CC) $(CFLAGS) -o $@ tests/binary_search.c ./$@ - $(CC) $(CFLAGS) -o $@ tests/types.c - ./$@ - $(CC) $(CFLAGS) -o $@ tests/linked_list.c + # $(CC) $(CFLAGS) -o $@ tests/linked_list.c + # ./$@ + $(CC) $(CFLAGS) -o $@ tests/hmap.c ./$@ rm $@ +types: + $(CC) -o $@ tests/types.c + ./$@ + + rm $@ diff --git a/compile_flags.txt b/compile_flags.txt index 1a9ce62..54d3666 100644 --- a/compile_flags.txt +++ b/compile_flags.txt @@ -1 +1,2 @@ -DTT_INT +-Wall -Wextra -Wpedantic -Werror diff --git a/include/container.h b/include/container.h index 3370be3..78090af 100644 --- a/include/container.h +++ b/include/container.h @@ -41,7 +41,6 @@ typedef unsigned char byte; /// define TODO macro #define TODO (void)(*(volatile char*)0) - /// typedefs for array qsort typedef int(*qsort_cmp_t)(const void*, const void*); #define get_qsort_cmp(type) __qsort_cmps[sizeof(type)] @@ -225,7 +224,12 @@ void ll_remove_back(struct linked_list* list); /// Removes element at index i. if there is no such index, does nothing. /// O(n) char ll_remove_at(struct linked_list *list, size_t i); +/// removes first occurrence of element `elem` from list. +void ll_remove_elem(struct linked_list* list, const void* elem, qsort_cmp_t cmp); /// Removes ALL occurrences of element "elem" in a list +/// WARNING: SOMETHING IS WRONG WITH THIS FUNCTION AND IT SHOULD NOT BE USED EVER +/// UNLESS I FIX IT +/// FOR NOW I WILL ADD TODO MARK THERE /// O(n) void ll_remove_all(struct linked_list* list, const void* elem, qsort_cmp_t cmp); /// splits a linked list into two, the second one starting from element at index `i`. @@ -273,10 +277,14 @@ struct linked_list ll_deep_copy(const struct linked_list* list, memcpy_t cpy); /* ------------------------HASH MAP HEADER-------------------------- */ /* ----------------------------------------------------------------- */ -#define HMAP_BUCKET_MAX_SIZE 8 -#define HMAP_DEFAULT_BUCKETS 32 -/// should return `0` only if first argument is not equal to second, -/// otherwise should return `1` or any other positive integer +#ifndef HMAP_MAX_BUCKET_SIZE + #define HMAP_MAX_BUCKET_SIZE 8 +#endif +#ifndef HMAP_INIT_SIZE + #define HMAP_INIT_SIZE 32 +#endif +/// should return `0` only if first argument is equal to second, +/// otherwise should return `1` or any other integer /// OPERATES ON `struct hmap_pair*` !!!!!!!!!! NOT ON KEYS! typedef int(*hmap_equal_fn)(const void*, const void*); typedef size_t(*hmap_hash_fn)(const void*); @@ -295,24 +303,34 @@ enum hmapi_cmp_res { hmap_lt = -1, hmap_eq = 0, hmap_gt = 1, - hmap_no = 0x80000000, + hmap_no = 0xB19A55, }; struct hash_map_iter { const struct hash_map* map; size_t bucket_pos; struct linked_list_node* current_node; }; +typedef struct hash_map_iter hmap_iter; struct hash_map hmap_create(const size_t key_size, const size_t val_size, hmap_equal_fn eq, hmap_hash_fn hash); -struct hash_map hmap_create_from_buffer(size_t key_size, size_t val_size, hmap_equal_fn eq, hmap_hash_fn hash, struct hmap_pair* buf, size_t buf_len); +/// copies data from pointers +struct hash_map hmap_create_from_buffer(size_t key_size, size_t val_size, hmap_equal_fn eq, hmap_hash_fn hash, const struct hmap_pair* buf, size_t buf_len); void hmap_free(struct hash_map* map); -void hmap_insert(struct hash_map* map, struct hmap_pair element); +/// copy the contents of the pair into hash map (using `memcpy`). +/// The element is left unchanged, the memory pointed to by `element` may be anything, from stack to FFI allocator +void hmap_insert_copy(struct hash_map* map, const struct hmap_pair element); +/// returns 1 if an object was inserted, 0 if it was not. This is done to be able to free uninserted pointer. +char hmap_insert_mallocated(struct hash_map* map, struct hmap_pair element); /// pair is a pointer #define hmap_insert_pair(map, pair) hmap_insert_range(map, pair, 1); -void hmap_insert_range(struct hash_map* map, struct hmap_pair* elements, size_t elements_len); +/// copies data from pointers +void hmap_insert_range(struct hash_map* map, const struct hmap_pair* elements, size_t elements_len); void* hmap_get(struct hash_map* map, void* key); +/// returns 1 if value was updates successfully, otherwise 0 (if key does not exist) +char hmap_update(struct hash_map* map, void* key, void* new_val); void hmap_remove(struct hash_map* map, void* key); /// double the size of a hash map void hmap_rehash(struct hash_map* map); +void hmap_rehash_to_size(struct hash_map* map, size_t new_size); /// initialize iterator `iter`. void hmapi_begin(const struct hash_map* map, struct hash_map_iter* iter); @@ -369,7 +387,7 @@ void *_insert_to_index_dynarray(void *const dynarray, const void *const element, // uncomment in dev mode so that LSP highlights the code -#define CONTAINER_IMPLEMENTATION +/*#define CONTAINER_IMPLEMENTATION*/ #ifdef CONTAINER_IMPLEMENTATION /* ----------------------------------------------------------------- */ @@ -647,7 +665,8 @@ void ll_truncate(struct linked_list* list, const size_t i) { struct linked_list_node *start = ll_at(list, i-1); if (start == NULL) return; struct linked_list freeable = { - .meta = {list->meta.element_size, list->meta.assumed_size - i}, + .meta = {list->meta.element_size, list->meta.assumed_size - i, .free_function = list->meta.free_function, +}, .first = start->next, }; ll_free(&freeable); @@ -786,8 +805,33 @@ char ll_remove_at(struct linked_list *list, size_t i) { --list->meta.assumed_size; return 1; } +void ll_remove_elem(struct linked_list* list, const void* elem, qsort_cmp_t cmp) { + if (list->first == NULL) return; + if (cmp(list->first->data, elem) == 0) { + ll_remove_front(list); + return; + } + struct linked_list_node *fst = list->first, + *snd = fst->next; + while (snd != NULL) { + if (cmp(snd->data, elem) == 0) { + fst->next = ll_free_node(list, snd); + return; + } + } + if (cmp(fst->data, elem) == 0) ll_remove_back(list); +} void ll_remove_all(struct linked_list* list, const void* elem, qsort_cmp_t cmp) { - while (cmp(list->first->data, elem) == 0) ll_remove_front(list); + printf("DON'T USE THIS FUNCTION!!!!!\n"); + TODO; + __builtin_unreachable(); + + if (list->first == NULL) return; + while (1) { + if (list->first == NULL) return; + if (cmp(list->first->data, elem) == 0) ll_remove_front(list); + else break; + } struct linked_list_node *prev = list->first, *current; for (current = prev->next; current != NULL; current = prev->next) { @@ -903,11 +947,11 @@ void __hmap_ll_custom_free(void* data) { struct hmap_pair *pair = data; free(pair->key); free(pair->val); - free(pair); + free(data); } struct hash_map hmap_create(const size_t key_size, const size_t val_size, hmap_equal_fn eq, hmap_hash_fn hash) { struct hash_map ret = { - .buckets = array_new(struct linked_list, HMAP_DEFAULT_BUCKETS), + .buckets = array_new(struct linked_list, HMAP_INIT_SIZE), .key_size = key_size, .val_size = val_size, .eq = eq, @@ -915,19 +959,20 @@ struct hash_map hmap_create(const size_t key_size, const size_t val_size, hmap_e }; const struct linked_list def = { .meta = { - .element_size = key_size+val_size, + .element_size = sizeof(struct hmap_pair), .assumed_size = 0, .free_function = __hmap_ll_custom_free, }, .first = NULL, .last = NULL, }; - for (size_t i = 0; i < HMAP_DEFAULT_BUCKETS; ++i) ret.buckets[i] = def; + for (size_t i = 0; i < HMAP_INIT_SIZE; ++i) ret.buckets[i] = def; return ret; } -struct hash_map hmap_create_from_buffer(size_t key_size, size_t val_size, hmap_equal_fn eq, hmap_hash_fn hash, struct hmap_pair* buf, size_t buf_len) { +struct hash_map hmap_create_from_buffer(size_t key_size, size_t val_size, hmap_equal_fn eq, hmap_hash_fn hash, const struct hmap_pair* buf, size_t buf_len) { struct hash_map ret = hmap_create(key_size, val_size, eq, hash); - for (size_t i = 0; i < buf_len; ++i) ll_append(ret.buckets + (ret.hash(buf[i].key) % array_size(ret.buckets)), buf + i); + hmap_rehash_to_size(&ret, buf_len / HMAP_MAX_BUCKET_SIZE); + for (size_t i = 0; i < buf_len; ++i) hmap_insert_copy(&ret, buf[i]); return ret; } void hmap_free(struct hash_map* map) { @@ -935,42 +980,79 @@ void hmap_free(struct hash_map* map) { array_free(map->buckets); memset(map, 0, sizeof(*map)); } -void hmap_insert(struct hash_map* map, struct hmap_pair element) { +void hmap_insert_copy(struct hash_map* map, const struct hmap_pair element) { size_t index = map->hash(element.key) % array_size(map->buckets); + if (ll_search(map->buckets + index, &element, map->eq)) { + struct hmap_pair* target = ll_find(map->buckets + index, &element, map->eq)->data; + memcpy(target->val, element.val, map->val_size); + return; + } + struct hmap_pair copy = {malloc(map->key_size), malloc(map->val_size)}; + memcpy(copy.key, element.key, map->key_size); + memcpy(copy.val, element.val, map->val_size); + ll_append(map->buckets + index, ©); + if (map->buckets[index].meta.assumed_size > HMAP_MAX_BUCKET_SIZE) hmap_rehash(map); +} +char hmap_insert_mallocated(struct hash_map* map, struct hmap_pair element) { + size_t index = map->hash(element.key) % array_size(map->buckets); + if (ll_search(map->buckets + index, &element, map->eq)) { + struct hmap_pair* target = ll_find(map->buckets + index, &element, map->eq)->data; + free(target->val); + target->val = element.val; + free(element.key); + return 0; + } ll_append(map->buckets + index, &element); - if (map->buckets[index].meta.assumed_size > HMAP_BUCKET_MAX_SIZE) hmap_rehash(map); + if (map->buckets[index].meta.assumed_size > HMAP_MAX_BUCKET_SIZE) hmap_rehash(map); + return 1; } -/// pair is a pointer -void hmap_insert_range(struct hash_map* map, struct hmap_pair* elements, size_t elements_len) { - for (size_t i = 0; i < elements_len; ++i) hmap_insert(map, elements[i]); +void hmap_insert_range(struct hash_map* map, const struct hmap_pair* elements, size_t elements_len) { + for (size_t i = 0; i < elements_len; ++i) hmap_insert_copy(map, elements[i]); } void* hmap_get(struct hash_map* map, void* key) { size_t index = map->hash(key) % array_size(map->buckets); struct linked_list* target_list = map->buckets + index; - struct linked_list_node* node = ll_find(target_list, key, map->eq); - if (node == NULL) { return NULL; } + struct hmap_pair pr = {.key = key, .val = NULL }; + struct linked_list_node* node = ll_find(target_list, &pr, map->eq); + if (node == NULL) return NULL; struct hmap_pair *pair = node->data; return pair->val; } +char hmap_update(struct hash_map* map, void* key, void* new_val) { + size_t index = map->hash(key) % array_size(map->buckets); + struct linked_list* target_list = map->buckets + index; + struct hmap_pair pr = {.key = key, .val = NULL }; + struct linked_list_node* node = ll_find(target_list, &pr, map->eq); + if (node == NULL) return 0; + memcpy(((struct hmap_pair*)node->data)->val, new_val, map->val_size); + return 1; +} void hmap_remove(struct hash_map* map, void* key) { size_t index = map->hash(key) % array_size(map->buckets); struct linked_list* target_list = map->buckets + index; struct hmap_pair tmppr = {.key = key, .val = NULL}; - ll_remove_all(target_list, &tmppr, map->eq); + ll_remove_elem(target_list, &tmppr, map->eq); } -/// double the size of a hash map void hmap_rehash(struct hash_map* map) { - size_t new_size = array_size(map->buckets) * 2; + hmap_rehash_to_size(map, array_size(map->buckets) << 1); +} +/// double the size of a hash map +void hmap_rehash_to_size(struct hash_map* map, size_t new_size) { struct linked_list* new_buckets = array_new(struct linked_list, new_size); - for (size_t i = 0; i < new_size; ++i) - new_buckets[i] = ll_create( /* I know this is ugly */ map->buckets[0].meta.element_size); - + for (size_t i = 0; i < new_size; ++i) { + new_buckets[i] = ll_create(sizeof(struct hmap_pair)); + new_buckets[i].meta.free_function = __hmap_ll_custom_free; + } struct hash_map_iter iter; for (hmapi_begin(map, &iter); !hmapi_end(&iter); hmapi_next(&iter)) { size_t hash_pos = map->hash(hmapi_get_key(&iter)) % new_size; struct hmap_pair hash_data = hmapi_get_data(&iter); ll_append(new_buckets + hash_pos, &hash_data); } + for (size_t i = 0; i < array_size(map->buckets); ++i) { + map->buckets[i].meta.free_function = free; + ll_free(map->buckets + i); + } array_free(map->buckets); map->buckets = new_buckets; } diff --git a/include/hashset.h b/include/hashset.h deleted file mode 100644 index e69de29..0000000 diff --git a/tests/binary_search.c b/tests/binary_search.c index e3a973b..ce35d14 100644 --- a/tests/binary_search.c +++ b/tests/binary_search.c @@ -4,7 +4,7 @@ #define prints(el) \ do {printf("search %i: %s\n", el, (array_binary_search(array, &el, __default_int_cmp)) ? "true" : "false");} while(0) -int main() { +int main(void) { int data[] = {9, 5, 2, 7, 6, 10, 5, 9, 7}; // no 8 there int* array = array_new(int, sizeof(data)/sizeof(data[0])); memcpy(array, data, sizeof(data)); diff --git a/tests/dynarray_struct.c b/tests/dynarray_struct.c index 6cce97c..7675ab4 100644 --- a/tests/dynarray_struct.c +++ b/tests/dynarray_struct.c @@ -27,7 +27,7 @@ int point_abs(const void* a, const void* b) { return absa-absb; } -int main() +int main(void) { struct point *points = array_new(struct point, 4); for (size_t i = 0; i < array_size(points); ++i) points[i] = (struct point){.x = i, .y = 2 * i, .z = 4 * i, .w = 8 * i}; diff --git a/tests/hmap.c b/tests/hmap.c new file mode 100644 index 0000000..920c854 --- /dev/null +++ b/tests/hmap.c @@ -0,0 +1,105 @@ +#define CONTAINER_IMPLEMENTATION +#define HMAP_MAX_BUCKET_SIZE 1 +#define HMAP_INIT_SIZE 1 +#include "../include/container.h" + + +// Necessary for hash map to operate +int int_eqals(const struct hmap_pair* a, const struct hmap_pair* b) { + return *(int*)(a->key) - *(int*)(b->key); +} + +// Just a regular hash algorithm +size_t int_hash(const int* a) { + return *a ^ (*a << 1); +} + +// A macro for fun +#define free_pair(pair) do { free(pair.key); free(pair.val); } while(0) + +// Here is an example of how you might iterate elements in hash map +void print_hmap(struct hash_map* map) { + printf("hash map at %p\n", (void*)map); + struct hash_map_iter iter; + for (hmapi_begin(map, &iter); !hmapi_end(&iter); hmapi_next(&iter)) { + printf("key: %i, value: %i\n", *(int*)hmapi_get_key(&iter), *(int*)hmapi_get_val(&iter)); + } +} +struct hmap_pair mkpr(int key, int val) { + struct hmap_pair ret = {malloc(sizeof(int)), malloc(sizeof(int))}; + *(int*)ret.key = key; + *(int*)ret.val = val; + return ret; +} +void freepr(struct hmap_pair* pr) { + free(pr->key); + free(pr->val); +} + +int main(void) { + // Casting functions to the right type should not break it + struct hmap_pair pairs[] = {mkpr(1, 2), mkpr(2, 3), mkpr(3, 4), mkpr(3, 5), mkpr(3, 6), mkpr(6, 8)}; + struct hash_map map = hmap_create_from_buffer( + sizeof(int), sizeof(int), + (hmap_equal_fn)int_eqals, (hmap_hash_fn)int_hash, + pairs, sizeof(pairs)/sizeof(*pairs)); + + struct hmap_pair w1 = {malloc(sizeof(int)), malloc(sizeof(int))}; + *(int*)(w1.key) = 10; + *(int*)(w1.val) = 15; + struct hmap_pair w2 = {malloc(sizeof(int)), malloc(sizeof(int))}; + *(int*)(w2.key) = 10; + *(int*)(w2.val) = 15; + struct hmap_pair w3 = {malloc(sizeof(int)), malloc(sizeof(int))}; + *(int*)(w3.key) = -69; + *(int*)(w3.val) = 12; + + // Ensure there are no memory leaks + hmap_insert_mallocated(&map, w1); + hmap_insert_mallocated(&map, w2); + hmap_insert_mallocated(&map, w3); + + int key; + int val; + struct hmap_pair p1 = {.key = &key, .val = &val}; + key = 274; + val = 349; + hmap_insert_copy(&map, p1); + key = 1234; + val = 5678; + hmap_insert_copy(&map, p1); + key = 42; + val = 727; + hmap_insert_copy(&map, p1); + hmap_insert_copy(&map, p1); // Second insert does not do anything + + int zero = 0; + hmap_update(&map, &key, &zero); + + int ten = 10; + printf("hmap at %i: %i\n", ten, *(int*)hmap_get(&map, &ten)); + hmap_remove(&map, &ten); + + struct hmap_pair pairs_2[] = {mkpr(100, 200), mkpr(200, 600), mkpr(600, 300), mkpr(300, 420)}; + hmap_insert_range(&map, pairs_2, sizeof(pairs_2)/sizeof(*pairs_2)); + + hmap_iter iter; + void** rmkeys = array_new(void*, 0); + for (hmapi_begin(&map, &iter); !hmapi_end(&iter); hmapi_next(&iter)) { + if (*(int*)hmapi_get_key(&iter) % 100 != 0) + array_push(rmkeys, hmapi_get_key(&iter)); + } + for (size_t i = 0; i < array_size(rmkeys); ++i) + hmap_remove(&map, rmkeys[i]); + array_free(rmkeys); + + print_hmap(&map); + hmap_free(&map); + for (size_t i = 0; i < sizeof(pairs)/sizeof(*pairs); ++i) { + freepr(pairs + i); + } + for (size_t i = 0; i < sizeof(pairs_2)/sizeof(*pairs_2); ++i) { + freepr(pairs_2 + i); + } + return 0; +} diff --git a/tests/linked_list.c b/tests/linked_list.c index 1682375..7068e83 100644 --- a/tests/linked_list.c +++ b/tests/linked_list.c @@ -4,15 +4,15 @@ void print_linked_list(struct linked_list* list) { int i = 0; - printf("linked list at %p, size = %zu\n", list, list->meta.assumed_size); - printf("first=%p, last=%p\n", list->first, list->last); + printf("linked list at %p, size = %zu\n", (void*)list, list->meta.assumed_size); + printf("first=%p, last=%p\n", (void*)list->first, (void*)list->last); for (struct linked_list_node* current = list->first; current != NULL; current = current->next) { - printf("[%i] = {%i, %p}\n", i, *(int*)current->data, current->next); + printf("[%i] = {%i, %p}\n", i, *(int*)current->data, (void*)current->next); ++i; } } -int main() { +int main(void) { int buf[] = {0, 1, 3, 2, 7, 8, 6, 0, 18, 1}; struct linked_list list = ll_create_from_buffer(sizeof(int), buf, sizeof(buf)/sizeof(*buf)); @@ -24,7 +24,8 @@ int main() { ll_insert_front(&list, &one); ll_append(&list, &one); ll_append(&list, &one); - ll_remove_all(&list, &one, __default_int_cmp); + // TODO: fix this function + ll_remove_all(&list, &one, __default_int_cmp); print_linked_list(&list); ll_remove_at(&list, 3); ll_remove_back(&list); diff --git a/tests/obscure.c b/tests/obscure.c index 190ff09..7b6f1fd 100644 --- a/tests/obscure.c +++ b/tests/obscure.c @@ -5,7 +5,7 @@ typedef char* packet_t; -int main() { +int main(void) { packet_t *data = array_new(packet_t, 0); packet_t el_0 = malloc(128); memset(el_0, 42, 128); diff --git a/tests/types.c b/tests/types.c index 4320e8d..94d6cbe 100644 --- a/tests/types.c +++ b/tests/types.c @@ -1,7 +1,8 @@ #include +#define RS_TYPES_USE_128 #include "../include/rstypes.h" -i32 main() { +i32 main(void) { u8 a = 215; i8 b = -127; u16 c = 65535; -- cgit v1.2.3-70-g09d2