archeus/src/std/hashtable.c

424 lines
15 KiB
C
Raw Normal View History

2022-10-27 15:16:54 -06:00
#include "arc/std/hashtable.h"
#include "arc/std/errno.h"
#include <stdlib.h>
#include <stdio.h>
//a private struct to hold the keys and values of the hashtable
typedef struct ARC_HashtableNode ARC_HashtableNode;
struct ARC_HashtableNode {
void *key;
void *value;
uint32_t hashvalue;
//will be set if next slot is searched for, to be used to remove elements faster
uint32_t nextIndex;
};
2022-10-27 15:16:54 -06:00
struct ARC_Hashtable {
uint32_t currentCapacity;
uint32_t currentSize;
ARC_HashtableNode *nodes;
ARC_Hashtable_HashFn hashFn;
ARC_Hashtable_KeyCompareFn keyCompareFn;
ARC_Hashtable_DestroyKeyValueFn *destroyKeyValueFn;
2022-10-27 15:16:54 -06:00
};
//copied from here: https://en.wikipedia.org/wiki/Computation_of_cyclic_redundancy_checks#CRC-32_example
uint32_t CRC32Fn(void *key){
uint32_t hashvalue = 0xffffffff;
2022-10-27 15:16:54 -06:00
for(uint32_t i = 0; *(((char *)key) + i) != '\0'; i++){
2022-10-27 15:16:54 -06:00
uint8_t value = *(((uint8_t *)key) + i);
2022-10-27 15:16:54 -06:00
for(uint8_t j = 0; j < 8; j++){
uint8_t flag = (uint8_t)((value ^ hashvalue) & 1);
hashvalue >>= 1;
if(flag){
hashvalue ^= 0xEDB888320;
}
2022-10-27 15:16:54 -06:00
value >>= 1;
}
}
hashvalue = ~hashvalue;
2022-10-27 15:16:54 -06:00
return hashvalue;
2022-10-27 15:16:54 -06:00
}
//private function that will just check compare void pointers directly used as default key compare
ARC_Bool ARC_Hashtable_DefaultKeyCompareFn(void *key1, void *key2){
return (ARC_Bool)(key1 == key2);
2022-10-27 15:16:54 -06:00
}
void ARC_Hashtable_Create(ARC_Hashtable **hashtable, ARC_Hashtable_HashFn *hashFn, ARC_Hashtable_KeyCompareFn *keyCompareFn, ARC_Hashtable_DestroyKeyValueFn *destroyKeyValueFn){
//clear the hashtable
*hashtable = (ARC_Hashtable *) malloc(sizeof(ARC_Hashtable));
//set current capacity and size to start
(*hashtable)->currentCapacity = 1;
(*hashtable)->currentSize = 0;
//reserve enough memory for one node
(*hashtable)->nodes = (ARC_HashtableNode *)malloc(sizeof(ARC_HashtableNode));
//set first and only key to null
(*hashtable)->nodes[0] = (ARC_HashtableNode){ NULL, NULL, 0, 0 };
//default to CRC32, then override if hashFn exists
(*hashtable)->hashFn = CRC32Fn;
if(hashFn != NULL){
(*hashtable)->hashFn = *hashFn;
}
2022-10-27 15:16:54 -06:00
//default to comparing pointers, then override if keyCompareFn exists
(*hashtable)->keyCompareFn = ARC_Hashtable_DefaultKeyCompareFn;
if(keyCompareFn != NULL){
(*hashtable)->keyCompareFn = *keyCompareFn;
2022-10-27 15:16:54 -06:00
}
//default to NULL, then create and copy destroyKeyValueFn if it exists
(*hashtable)->destroyKeyValueFn = NULL;
if(destroyKeyValueFn != NULL){
(*hashtable)->destroyKeyValueFn = (ARC_Hashtable_DestroyKeyValueFn *)malloc(sizeof(ARC_Hashtable_DestroyKeyValueFn));
*((*hashtable)->destroyKeyValueFn) = *destroyKeyValueFn;
}
2022-10-27 15:16:54 -06:00
}
void ARC_Hashtable_Destroy(ARC_Hashtable *hashtable){
//remove all the contents before clearing the
ARC_Hashtable_Clear(hashtable);
//free the destroyKeyValueFn if it exists
if(hashtable->destroyKeyValueFn != NULL){
free(hashtable->destroyKeyValueFn);
}
//free the empty nodes container
free(hashtable->nodes);
//free the hashtable
free(hashtable);
2022-10-27 15:16:54 -06:00
}
//private function to add a node as close to its hashed index as possible
void ARC_HashtableNode_SetNearestNodeToArray(ARC_HashtableNode *nodes, uint32_t capacity, ARC_HashtableNode node){
//get the first possible index based on the node's hashvalue
uint32_t index = node.hashvalue % capacity;
//go to last added
while(nodes[index].nextIndex != index){
index = nodes[index].nextIndex;
}
index = nodes[index].nextIndex;
//init variable for found node
uint32_t nextIndex = index;
//check each available node for a free slot
while(nodes[nextIndex].key != NULL){
//up the current index by one
nextIndex++;
//cycle back to the first index if it is above the array's capacity
if(nextIndex >= capacity){
nextIndex = 0;
2022-10-27 15:16:54 -06:00
}
//check if the loop has circled back to the starting index to stop checking
if(index == nextIndex){
break;
}
2022-10-27 15:16:54 -06:00
}
//set the next index of the last added index
nodes[index].nextIndex = nextIndex;
//set the foundNode and next index
nodes[nextIndex] = node;
nodes[nextIndex].nextIndex = nextIndex;
2022-10-27 15:16:54 -06:00
}
void ARC_Hashtable_Add(ARC_Hashtable *hashtable, void *key, void *value){
//check to see if the current size is the same as a max uint32_t and if so it will overflow so throw an error
if(hashtable->currentSize == ~((uint32_t)0)){
arc_errno = ARC_ERRNO_OVERFLOW;
ARC_DEBUG_LOG_ERROR("ARC_Hashtable_Add(hashtable, key, value), hashtable at max capacity tried adding another value");
return;
2022-10-27 15:16:54 -06:00
}
//check to make sure key is not NULL
if(key == NULL){
arc_errno = ARC_ERRNO_NULL;
ARC_DEBUG_LOG_ERROR("ARC_Hashtable_Add(hashtable, key, value), NULL was passed in for the key, this function cannot handle that");
return;
}
2022-10-27 15:16:54 -06:00
//check if we are at the max of the current capacity
if(hashtable->currentSize == hashtable->currentCapacity){
//move the current nodes into a temporary variable to move into a resized array
uint64_t oldCapacity = hashtable->currentCapacity;
ARC_HashtableNode *oldNodes = hashtable->nodes;
//increase the current capacity by double
hashtable->currentCapacity <<= 1;
//if for some reason the capacity is 0, we should set it to one so we do not error on realloc
if(hashtable->currentCapacity != 0){
hashtable->currentCapacity++;
}
//resize the hashtable's array and copy the contents at the same time
hashtable->nodes = (ARC_HashtableNode *)malloc(sizeof(ARC_HashtableNode) * hashtable->currentCapacity);
//set nodes to null
for(uint32_t index = 0; index < hashtable->currentCapacity; index++){
hashtable->nodes[index] = (ARC_HashtableNode){ NULL, NULL, 0, index };
}
//add the old nodes into the new array
for(uint32_t index = 0; index < oldCapacity; index++){
ARC_HashtableNode_SetNearestNodeToArray(hashtable->nodes, hashtable->currentCapacity, oldNodes[index]);
}
//free the old array
free(oldNodes);
2022-10-27 15:16:54 -06:00
}
//get the hashvalue
uint32_t hashvalue = hashtable->hashFn(key);
//add to the vectors array and increase its current size
ARC_HashtableNode_SetNearestNodeToArray(hashtable->nodes, hashtable->currentCapacity, (ARC_HashtableNode){ key, value, hashvalue, 0 });
hashtable->currentSize++;
2022-10-27 15:16:54 -06:00
}
//private function used to remove a node at an index (moving next node values back)
void ARC_Hashtable_UnsetNodeAtIndexFromArray(ARC_HashtableNode *nodes, uint32_t capacity, uint32_t index, uint32_t previousIndex){
//if the first index is the end index just set it to null and return
if(nodes[index].nextIndex == index && nodes[previousIndex].nextIndex == index){
nodes[previousIndex].nextIndex = previousIndex;
nodes[index] = (ARC_HashtableNode){ NULL, NULL, 0, index };
return;
}
//loop through all remaining next nodes
while(nodes[index].nextIndex != index){
//get the currently used next index
uint32_t nextIndex = nodes[index].nextIndex;
2025-02-28 18:54:56 -07:00
//move the next node back
nodes[index] = nodes[nextIndex];
//if the next index will be moved into the correct spot
if(index == nodes[nextIndex].hashvalue % capacity){
//NOTE: I couldn't figure out an elegant way of handling this, so for now we remove then re-add clashing nodes
2025-02-28 18:54:56 -07:00
//reset the previous index's next index
nodes[previousIndex].nextIndex = previousIndex;
2025-02-28 18:54:56 -07:00
//reset the last moved node's next index
nodes[index].nextIndex = index;
2025-02-28 18:54:56 -07:00
//get the starting conflict index
index = nextIndex;
2025-02-19 18:28:49 -07:00
2025-02-28 18:54:56 -07:00
//get the starting conflict node (the first one will only be used for its next index)
ARC_HashtableNode nodeCopy = nodes[index];
2025-02-28 18:54:56 -07:00
//clear the last moved node
nodes[index] = (ARC_HashtableNode){ NULL, NULL, 0, index };
2025-02-28 18:54:56 -07:00
//loop through remaining next nodes adding them to a temporary vector and clearing them from the nodes
while(nodeCopy.nextIndex != index){
//move to the next node
index = nodeCopy.nextIndex;
nodeCopy = nodes[index];
2025-02-28 18:54:56 -07:00
//copy and clear node
nodes[index] = (ARC_HashtableNode){ NULL, NULL, 0, index };
2025-02-28 18:54:56 -07:00
//add back to the table
ARC_HashtableNode_SetNearestNodeToArray(nodes, capacity, nodeCopy);
}
return;
}
//moves the next index into the next used slot
nodes[index].nextIndex = nextIndex;
2025-02-19 18:28:49 -07:00
2025-02-28 18:54:56 -07:00
//update the previousIndex
previousIndex = index;
//check the next index
index = nextIndex;
2025-02-19 18:28:49 -07:00
}
//the previous index will be the last moved node, so set its next index to itself
nodes[previousIndex].nextIndex = previousIndex;
//set the current value to an empty node
nodes[index] = (ARC_HashtableNode){ NULL, NULL, 0, index };
}
void ARC_Hashtable_Remove(ARC_Hashtable *hashtable, void *key){
//get the index from a hashvalue
uint32_t initialIndex = hashtable->hashFn(key) % hashtable->currentCapacity;
uint32_t index = initialIndex;
uint32_t previousIndex = initialIndex;
2022-10-27 15:16:54 -06:00
//iterate through remaining possible nodes checking for a match
ARC_Bool nodeFound = ARC_False;
while(hashtable->nodes[index].nextIndex != index){
if(hashtable->keyCompareFn(hashtable->nodes[index].key, key) == ARC_True){
nodeFound = ARC_True;
break;
}
previousIndex = index;
index = hashtable->nodes[index].nextIndex;
}
//check the last index if the others could not find the node
if(nodeFound == ARC_False && hashtable->nodes[index].key != NULL && hashtable->keyCompareFn(hashtable->nodes[index].key, key) == ARC_True){
nodeFound = ARC_True;
2022-10-27 15:16:54 -06:00
}
//error if the node was not found
if(nodeFound == ARC_False){
arc_errno = ARC_ERRNO_DATA;
ARC_DEBUG_LOG_ERROR("ARC_Hashtable_Remove(hashtable, key), key was not found in hashtable, could not remove");
return;
2022-10-27 15:16:54 -06:00
}
//get the found node
ARC_HashtableNode node = hashtable->nodes[index];
//call delete data to clean up item if delete data function exists
if(hashtable->destroyKeyValueFn != NULL){
(*(hashtable->destroyKeyValueFn))(node.key, node.value);
}
//move all next items back
ARC_Hashtable_UnsetNodeAtIndexFromArray(hashtable->nodes, hashtable->currentCapacity, index, previousIndex);
2022-10-27 15:16:54 -06:00
//we have removed the item so we can decrease the current size
hashtable->currentSize--;
2022-10-27 15:16:54 -06:00
//if the current size is half the current capacity or the current capacity is at the smallest limit, we do not need to do anything else
if(hashtable->currentSize != hashtable->currentCapacity >> 1 || hashtable->currentCapacity == 1){
return;
}
2022-10-27 15:16:54 -06:00
//move the current nodes into a temporary variable to move into a resized array
uint64_t oldCapacity = hashtable->currentCapacity;
ARC_HashtableNode *oldNodes = hashtable->nodes;
2022-10-27 15:16:54 -06:00
//half the capacity and copy it into a smaller array
hashtable->currentCapacity >>= 1;
2022-10-27 15:16:54 -06:00
//resize the hashtable's array and copy the contents at the same time
hashtable->nodes = (ARC_HashtableNode *)malloc(sizeof(ARC_HashtableNode) * hashtable->currentCapacity);
//set keys to null
for(uint32_t index = 0; index < hashtable->currentCapacity; index++){
hashtable->nodes[index] = (ARC_HashtableNode){ NULL, NULL, 0, index };
2022-10-27 15:16:54 -06:00
}
//add the old nodes into the new array
for(uint32_t index = 0; index < oldCapacity; index++){
//null values do not need to be copied
if(oldNodes[index].key == NULL){
continue;
}
ARC_HashtableNode_SetNearestNodeToArray(hashtable->nodes, hashtable->currentCapacity, oldNodes[index]);
}
//free the old array
free(oldNodes);
}
2022-10-27 15:16:54 -06:00
//private callback function to delete all the key value pairs in the hashtable
void ARC_Hashtable_DestroyKeyValueIteratorFn(void *key, void *value, void *userData){
ARC_Hashtable *hashtable = (ARC_Hashtable *)userData;
(*(hashtable->destroyKeyValueFn))(key, value);
}
void ARC_Hashtable_Clear(ARC_Hashtable *hashtable){
//if the destroyKeyValueFn exists, run iterations to clear the table
if(hashtable->destroyKeyValueFn != NULL){
ARC_Hashtable_RunIteration(hashtable, ARC_Hashtable_DestroyKeyValueIteratorFn, hashtable);
}
//delete the array holding all the nodes
free(hashtable->nodes);
2022-10-27 15:16:54 -06:00
//set current capacity and size to start
hashtable->currentCapacity = 1;
hashtable->currentSize = 0;
//reserve enough memory for one node
hashtable->nodes = (ARC_HashtableNode *)malloc(sizeof(ARC_HashtableNode));
//set first and only key to null
hashtable->nodes[0].key = NULL;
}
void *ARC_Hashtable_Get(ARC_Hashtable *hashtable, void *key){
//check to make sure key is not NULL
if(key == NULL){
arc_errno = ARC_ERRNO_NULL;
ARC_DEBUG_LOG_ERROR("ARC_Hashtable_Get(hashtable, key), NULL was passed in for the key, this function cannot handle that");
return NULL;
}
//get the index from a hashvalue
uint32_t index = hashtable->hashFn(key) % hashtable->currentCapacity;
//get the first possible node
ARC_HashtableNode node = hashtable->nodes[index];
//check each available node for a match
while(node.nextIndex != index){
//if the key is found, return its value
if(hashtable->keyCompareFn(node.key, key) == ARC_True){
return node.value;
2022-10-27 15:16:54 -06:00
}
//up the current index to the next available index
index = node.nextIndex;
node = hashtable->nodes[index];
}
//if the key is found, return its value
if(node.key != NULL && hashtable->keyCompareFn(node.key, key) == ARC_True){
return node.value;
2022-10-27 15:16:54 -06:00
}
//could not find node, so return NULL
return NULL;
}
void ARC_Hashtable_RunIteration(ARC_Hashtable *hashtable, ARC_Hashtable_IteratorFn iteratorFn, void *userData){
//pass each non NULL nodes into an iteratorFn callback
for(uint32_t index = 0; index < hashtable->currentCapacity; index++){
//get the current node
ARC_HashtableNode node = hashtable->nodes[index];
//skip past NULL keys
if(node.key == NULL){
continue;
}
//passes current iteration into the callback function
iteratorFn(node.key, node.value, userData);
}
2022-10-27 15:16:54 -06:00
}