diff options
author | justanothercatgirl <sotov2070@gmail.com> | 2024-07-08 07:24:34 +0300 |
---|---|---|
committer | justanothercatgirl <sotov2070@gmail.com> | 2024-07-08 10:36:43 +0300 |
commit | 5fa8a4e0787f23bac8810e41b331d85c540fbe21 (patch) | |
tree | 20dc699fb8575368a7b88828166e08d35d87f4ca | |
parent | 2abcf429d73408a109c4501339fd18850ab83347 (diff) |
added hash set
-rw-r--r-- | Makefile | 6 | ||||
-rw-r--r-- | include/container.h | 231 | ||||
-rw-r--r-- | tests/hashset.c | 53 |
3 files changed, 288 insertions, 2 deletions
@@ -4,7 +4,9 @@ CFLAGS := -g -Wall -Wextra -Wpedantic -Werror -Wno-language-extension-token tests: container types -container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c tests/binary_search.c include/container.h +container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c \ + tests/binary_search.c tests/linked_list.c tests/hashset.c \ + tests/hmap.c include/container.h \ $(CC) $(CFLAGS) -o $@ $< -DTT_CHAR ./$@ $(CC) $(CFLAGS) -o $@ $< -DTT_SHORT @@ -23,6 +25,8 @@ container: tests/dynarray.c tests/dynarray_struct.c tests/obscure.c tests/binary # ./$@ $(CC) $(CFLAGS) -o $@ tests/hmap.c ./$@ + #$(CC) $(CFLAGS) -o $@ tests/hashset.c + #./$@ rm $@ diff --git a/include/container.h b/include/container.h index 5fa1006..b3294c4 100644 --- a/include/container.h +++ b/include/container.h @@ -360,6 +360,79 @@ char hmapi_ne(const struct hash_map_iter *a, const struct hash_map_iter* b); /// 1 if `iter` is an end iterator, 0 otherwise char hmapi_end(const struct hash_map_iter* iter); +/* ----------------------------------------------------------------- */ +/* ------------------------HASH SET HEADER-------------------------- */ +/* ----------------------------------------------------------------- */ + +#ifndef HSET_MAX_BUCKET_SIZE + #define HSET_MAX_BUCKET_SIZE 8 +#endif +#ifndef HSET_INIT_SIZE + #define HSET_INIT_SIZE 32 +#endif +/// should return `0` only if first argument is equal to second, +/// otherwise should return `1` or any other integer +/// unlike hmap, operates on data structures directly +typedef int(*hset_equal_fn)(const void*, const void*); +typedef size_t(*hset_hash_fn)(const void*); +struct hash_set { + struct linked_list* buckets; + hmap_equal_fn eq; + hmap_hash_fn hash; + size_t elem_size; +}; +enum hseti_cmp_res { + hset_lt = -1, + hset_eq = 0, + hset_gt = 1, + hset_no = 0xB19A55, +}; +struct hash_set_iter { + const struct hash_set* set; + size_t bucket_pos; + struct linked_list_node* current_node; +}; +typedef struct hash_set_iter hset_iter; +struct hash_set hset_create(const size_t el_size, hset_equal_fn eq, hset_hash_fn hash); +/// copies data from pointers +struct hash_set hset_create_from_buffer(size_t el_size, hset_equal_fn eq, hset_hash_fn hash, const void* buf, size_t buf_len); +void hset_free(struct hash_set* set); +/// copy the contents of the pair into hash map (using `memcpy`). +/// The element is left unchanged, the memory pointed to by `element` may be anything, from stack to FFI allocator +void hset_insert_copy(struct hash_set* set, const void* element); +/// returns 1 if an object was inserted, 0 if it was not. This is done to be able to free uninserted pointer. +char hset_insert_mallocated(struct hash_set* set, void* element); +/// get a pointer to an element with same hash and modify it directly +void* hset_at(struct hash_set* set, void* element); +/// returns 1 if value was updated successfully, otherwise 0 (if key does not exist) +void hset_remove(struct hash_set* set, void* elem); +/// double the size of a hash set +void hset_rehash(struct hash_set* set); +void hset_rehash_to_size(struct hash_set* set, size_t new_size); + +/// initialize iterator `iter`. +void hseti_begin(const struct hash_set* map, struct hash_set_iter* iter); +/// set iter to next element. If there is no next element, `hash_set_iter.current_node` is set to NULL +void hseti_next(struct hash_set_iter* iter); +/// If iter is pointing beyond hash set (after the last element), NULL is returned +void* hseti_get(const struct hash_set_iter* iter); +/// the function does comparison the same way `qsort_cmp_t` might do it, except this is wrapped in an enum. +/// This is done because 2 iterators might be incomparable (come from different hash maps), in this case `hampi_no` is returned. +enum hseti_cmp_res hseti_cmp(const struct hash_set_iter *a, const struct hash_set_iter* b); +/// 1 if a > b, 0 otherwise. +char hseti_gt(const struct hash_set_iter *a, const struct hash_set_iter* b); +/// 1 if a = b, 0 otherwise +char hseti_eq(const struct hash_set_iter *a, const struct hash_set_iter* b); +/// 1 if a < b, 0 otherwise +char hseti_lt(const struct hash_set_iter *a, const struct hash_set_iter* b); +/// 1 if a >= b, 0 otherwise +char hseti_ge(const struct hash_set_iter *a, const struct hash_set_iter* b); +/// 1 if a <= b, 0 otherwise +char hseti_le(const struct hash_set_iter *a, const struct hash_set_iter* b); +/// 1 if a != b, otherwise 0 +char hseti_ne(const struct hash_set_iter *a, const struct hash_set_iter* b); +/// 1 if `iter` is an end iterator, 0 otherwise +char hseti_end(const struct hash_set_iter* iter); /* ------------------------------------------------------------------------- */ /* ------From now on, the rest of the header is implementation details------ */ @@ -388,7 +461,7 @@ void *_insert_to_index_dynarray(void *const dynarray, const void *const element, void* _array_extend(void* array, void* buffer, size_t len); // uncomment in dev mode so that LSP highlights the code -#define CONTAINER_IMPLEMENTATION +/* #define CONTAINER_IMPLEMENTATION */ #ifdef CONTAINER_IMPLEMENTATION /* ----------------------------------------------------------------- */ @@ -1140,7 +1213,163 @@ char hmapi_end(const struct hash_map_iter* iter) { return iter->current_node == NULL || iter->bucket_pos == SIZE_MAX; } +/* ----------------------------------------------------------------- */ +/* ---------------------HASH SET IMPLEMENTATION--------------------- */ +/* ----------------------------------------------------------------- */ + + +struct hash_set hset_create(const size_t el_size, hset_equal_fn eq, hset_hash_fn hash) { + struct hash_set ret = { + .buckets = array_new(struct linked_list, HMAP_INIT_SIZE), + .elem_size = el_size, + .eq = eq, + .hash = hash, + }; + const struct linked_list def = { + .meta = { + .element_size = el_size, + .assumed_size = 0, + .free_function = &free, + }, + .first = NULL, + .last = NULL, + }; + for (size_t i = 0; i < HSET_INIT_SIZE; ++i) ret.buckets[i] = def; + return ret; +} +struct hash_set hset_create_from_buffer(size_t el_size, hset_equal_fn eq, hset_hash_fn hash, const void* buf, size_t buf_len) { + struct hash_set ret = hset_create(el_size, eq, hash); + hset_rehash_to_size(&ret, buf_len / HSET_MAX_BUCKET_SIZE); + for (size_t i = 0; i < buf_len; ++i) hset_insert_copy(&ret, (byte*)buf + i * el_size); + return ret; +} +void hset_free(struct hash_set* set) { + for (size_t i = 0; i < array_size(set->buckets); ++i) ll_free(set->buckets + i); + array_free(set->buckets); + memset(set, 0, sizeof(*set)); +} +void hset_insert_copy(struct hash_set* set, const void* element) { + size_t index = set->hash(element) % array_size(set->buckets); + if (ll_search(set->buckets + index, element, set->eq)) { + void* target = ll_find(set->buckets + index, element, set->eq)->data; + memcpy(target, element, set->elem_size); + return; + } + ll_append(set->buckets + index, element); + if (set->buckets[index].meta.assumed_size > HSET_MAX_BUCKET_SIZE) hset_rehash(set); +} +char hset_insert_mallocated(struct hash_set* set, void* element) { + size_t index = set->hash(element) % array_size(set->buckets); + if (ll_search(set->buckets + index, element, set->eq)) { + void* target = ll_find(set->buckets + index, element, set->eq)->data; + free(target); + target = element; + return 0; + } + ll_append(set->buckets + index, element); + if (set->buckets[index].meta.assumed_size > HSET_MAX_BUCKET_SIZE) hset_rehash(set); + return 1; + +} +void* hset_at(struct hash_set* set, void* element) { + size_t index = set->hash(element) % array_size(set->buckets); + if (ll_search(set->buckets, element, set->eq)) + return ll_find(set->buckets + index, element, set->eq)->data; + return NULL; +} +void hset_remove(struct hash_set* set, void* elem) { + size_t index = set->hash(elem) % array_size(set->buckets); + struct linked_list* target_list = set->buckets + index; + ll_remove_elem(target_list, elem, set->eq); +} +void hset_rehash(struct hash_set* set) { + hset_rehash_to_size(set, array_size(set->buckets) << 1); +} +void hset_rehash_to_size(struct hash_set* set, size_t new_size) { + struct linked_list* new_buckets = array_new(struct linked_list, new_size); + for (size_t i = 0; i < new_size; ++i) { + new_buckets[i] = ll_create(sizeof(struct hmap_pair)); + new_buckets[i].meta.free_function = __hmap_ll_custom_free; + } + struct hash_set_iter iter; + for (hseti_begin(set, &iter); !hseti_end(&iter); hseti_next(&iter)) { + void* hash_data = hseti_get(&iter); + size_t hash_pos = set->hash(hash_data) % new_size; + ll_append(new_buckets + hash_pos, hash_data); + } + for (size_t i = 0; i < array_size(set->buckets); ++i) { + ll_free(set->buckets + i); + } + array_free(set->buckets); + set->buckets = new_buckets; +} +void hseti_begin(const struct hash_set* set, struct hash_set_iter* iter) { + memset(iter, 0, sizeof(struct hash_map_iter)); + iter->set = set; + struct linked_list* buckets = set->buckets; + for (size_t i = 0; i < array_size(buckets); ++i) { + if (buckets[i].first == NULL) continue; + iter->bucket_pos = i; + iter->current_node = buckets[i].first; + return; + } + iter->bucket_pos = SIZE_MAX; + iter->current_node = NULL; +} +void hseti_next(struct hash_set_iter* iter) { + if (iter->current_node == NULL) return; + if (iter->current_node->next != NULL) { + iter->current_node = iter->current_node->next; + return; + } + const struct linked_list *buckets = iter->set->buckets; + for (size_t i = iter->bucket_pos + 1; i < array_size(buckets); ++i) { + if (buckets[i].first == NULL) continue; + iter->bucket_pos = i; + iter->current_node = buckets[i].first; + return; + } + iter->bucket_pos = SIZE_MAX; + iter->current_node = NULL; +} +void* hseti_get(const struct hash_set_iter* iter) { + if (iter->current_node == NULL) return NULL; + return iter->current_node->data; +} +enum hseti_cmp_res hseti_cmp(const struct hash_set_iter *a, const struct hash_set_iter* b) { + if (a->set != b->set) return hset_no; + if (a->bucket_pos != b->bucket_pos) return (a->bucket_pos > b->bucket_pos) ? hset_gt : hset_lt; + if (a->current_node == b->current_node) return hset_eq; + for (const struct linked_list_node* nd = a->current_node; nd != NULL; nd = nd->next) { + if (nd == b->current_node) return hset_lt; + } + return hset_gt; +} +char hseti_gt(const struct hash_set_iter *a, const struct hash_set_iter* b) { + return hseti_cmp(a, b) == hset_gt; +} +char hseti_eq(const struct hash_set_iter *a, const struct hash_set_iter* b) { + return hseti_cmp(a, b) == hset_eq; +} +char hseti_lt(const struct hash_set_iter *a, const struct hash_set_iter* b) { + return hseti_cmp(a, b) == hset_lt; +} +char hseti_ge(const struct hash_set_iter *a, const struct hash_set_iter* b) { + enum hseti_cmp_res cmp = hseti_cmp(a, b); + return cmp == hset_gt || cmp == hset_eq; +} +char hseti_le(const struct hash_set_iter *a, const struct hash_set_iter* b) { + enum hseti_cmp_res cmp = hseti_cmp(a, b); + return cmp == hset_lt || cmp == hset_eq; +} +char hseti_ne(const struct hash_set_iter *a, const struct hash_set_iter* b) { + enum hseti_cmp_res cmp = hseti_cmp(a, b); + return cmp != hset_eq; +} +char hseti_end(const struct hash_set_iter* iter) { + return iter->current_node == NULL || iter->bucket_pos == SIZE_MAX; +} #endif // CONTAINER_IMPLEMENTATION #endif // JUSTANOTHERCATGIRL_HEADERS_CONTAINER diff --git a/tests/hashset.c b/tests/hashset.c new file mode 100644 index 0000000..d98e317 --- /dev/null +++ b/tests/hashset.c @@ -0,0 +1,53 @@ +#define CONTAINER_IMPLEMENTATION +#include "../include/container.h" +#include <stdio.h> + +struct user { + int id; + const char* name; +}; + +size_t user_hash(const struct user* u) { + return u->id; +} + +int user_eq(const struct user* a, const struct user* b) { + return !(a->id == b->id && strcmp(a->name, b->name) == 0); +} + +struct user user(int id, const char* name) { + return (struct user) {.id = id, .name = name}; +} + +void printuser(struct user* u) { + printf("(struct user) {.id=%i, .name=\"%s\"};\n", u->id, u->name); +} + +int main(void) { + struct hash_set set = hset_create(sizeof(struct user), (hset_equal_fn)user_eq, (hset_hash_fn)user_hash); + struct user u0 = user(0, "Jhon"); + struct user u1 = user(1, "John"); + struct user u2 = user(2, "John"); + struct user uw = user(2, "Jnoh"); + struct user ud = user(2, "John"); + hset_insert_copy(&set, &u0); + hset_insert_copy(&set, &u1); + hset_insert_copy(&set, &u2); + hset_insert_copy(&set, &uw); + hset_insert_copy(&set, &ud); + hset_insert_copy(&set, &ud); + hset_insert_copy(&set, &ud); + hset_insert_copy(&set, &ud); + hset_insert_copy(&set, &ud); + hset_insert_copy(&set, &ud); + hset_insert_copy(&set, &ud); + + hset_iter iter; + for (hseti_begin(&set, &iter); !hseti_end(&iter); hseti_next(&iter)) { + struct user* u = hseti_get(&iter); + printuser(u); + } + + hset_free(&set); + return 0; +} |