From 4466182539196d2a2379481f08f0b0cacdf8ca75 Mon Sep 17 00:00:00 2001 From: Kamil Kaminski Date: Sat, 27 Nov 2010 15:06:25 -0600 Subject: Initial commit --- Makefile | 24 +++++++ hashfuncs.c | 163 +++++++++++++++++++++++++++++++++++++++++++ hashfuncs.h | 23 ++++++ hashtbl.c | 228 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ hashtbl.h | 31 +++++++++ 5 files changed, 469 insertions(+) create mode 100644 Makefile create mode 100644 hashfuncs.c create mode 100644 hashfuncs.h create mode 100644 hashtbl.c create mode 100644 hashtbl.h diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3e20c8c --- /dev/null +++ b/Makefile @@ -0,0 +1,24 @@ +PROG = hashtbl +OBJS = $(PROG).o hashfuncs.o +CC = gcc +DBGFLAGS = -g -O0 +ifdef DEBUG + CFLAGS = $(DBGFLAGS) -Wall -std=c99 +else + CFLAGS = -Wall -std=c99 -O2 -march=native -mtune=native +endif +LDFLAGS = -lm + +$(PROG): $(OBJS) + $(CC) $(LDFLAGS) $(OBJS) -o $(PROG) + +$(PROG).o: $(PROG).c hashtbl.h hashfuncs.h + $(CC) -c $(CFLAGS) $(PROG).c + +hashfuncs.o: hashfuncs.c hashfuncs.h + $(CC) -c $(CFLAGS) hashfuncs.c + +.PHONY: clean + +clean: + rm -f *.o ./$(PROG) diff --git a/hashfuncs.c b/hashfuncs.c new file mode 100644 index 0000000..01a1be1 --- /dev/null +++ b/hashfuncs.c @@ -0,0 +1,163 @@ +/* hashfuncs.c + * + * Hash Functions + * + * notes: aphash may not be correct + * + */ + +#include "hashfuncs.h" + +/* does not mod by the size of the hash table, simple and clever, + * this allows seamless retrieval of collided elements */ +unsigned int defaulthash(const char *key) +{ + unsigned int hash = 0; + + for ( ; *key; key++) + hash += (unsigned char) *key; + + return hash; +} + +unsigned int rshash(const char *key) +{ + unsigned int b = 378551; + unsigned int a = 63689; + unsigned int hash = 0; + + for ( ; *key; key++) + { + hash = hash * a + (*key); + a = a * b; + } + + return hash; +} + +unsigned int jshash(const char *key) +{ + unsigned int hash = 1315423911; + + for ( ; *key; key++) + hash ^= ((hash << 5) + ((unsigned char) (*key)) + (hash >> 2)); + + return hash; +} + +unsigned int pjwhash(const char *key) +{ + const unsigned int BitsInUnsignedInt = (unsigned int) (sizeof(unsigned int) * 8); + const unsigned int ThreeQuarters = (unsigned int) ((BitsInUnsignedInt * 3) / 4); + const unsigned int OneEighth = (unsigned int) (BitsInUnsignedInt / 8); + const unsigned int HighBits = (unsigned int) (0xFFFFFFFF) << (BitsInUnsignedInt - OneEighth); + unsigned int hash = 0; + unsigned int test = 0; + + for ( ; *key; key++) + { + hash = (hash << OneEighth) + (*key); + if ((test = hash & HighBits) != 0) + hash = (( hash ^ (test >> ThreeQuarters)) & (~HighBits)); + } + + return hash; +} + +unsigned int elfhash(const char *key) +{ + unsigned int hash = 0; + unsigned int x = 0; + + for ( ; *key; key++) + { + hash = (hash << 4) + (*key); + + if ((x = hash & 0xF0000000L) != 0) + hash ^= (x >> 24); + + hash &= ~x; + } + + return hash; +} + +unsigned int bkdrhash(const char *key) +{ + unsigned int seed = 131; /* 31 131 1313 13131 131313 etc.. */ + unsigned int hash = 0; + + for ( ; *key; key++) + hash = (hash * seed) + (*key); + + return hash; +} + +unsigned int sdbmhash(const char *key) +{ + unsigned int hash = 0; + + for ( ; *key; key++) + hash = (*key) + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +unsigned int djbhash(const char *key) +{ + unsigned int hash = 5381; + + for ( ; *key; key++) + hash = ((hash << 5) + hash) + (*key); + + return hash; +} + +unsigned int dekhash(const char *key) +{ + unsigned int hash = strlen(key); + + for ( ; *key; key++) + hash = ((hash << 5) ^ (hash >> 27)) ^ (*key); + + return hash; +} + +unsigned int bphash(const char *key) +{ + unsigned int hash = 0; + + for ( ; *key; key++) + hash = hash << 7 ^ (*key); + + return hash; +} + +unsigned int fnvhash(const char *key) +{ + const unsigned int fnv_prime = 0x811C9DC5; + unsigned int hash = 0; + + for ( ; *key; key++) + { + hash *= fnv_prime; + hash ^= (*key); + } + + return hash; +} + +unsigned int aphash(const char *key) +{ + unsigned int hash = 0xAAAAAAAA; + unsigned int i = strlen(key); + + for ( ; *key; key++) + { + hash ^= ((i & 1) == 0) ? ( (hash << 7) ^ (*key) * (hash >> 3)) : + (~((hash << 11) + ((*key) ^ (hash >> 5)))); + } + + return hash; +} + diff --git a/hashfuncs.h b/hashfuncs.h new file mode 100644 index 0000000..f344810 --- /dev/null +++ b/hashfuncs.h @@ -0,0 +1,23 @@ +#ifndef _HASHFUNCS_H_ +#define _HASHFUNCS_H_ + +#include +#include + +/* typedef unsigned int (*hash_func)(const char *); */ + +unsigned int defaulthash(const char *); +unsigned int rshash(const char *); +unsigned int jshash(const char *); +unsigned int pjwhash(const char *); +unsigned int elfhash(const char *); +unsigned int bkdrhash(const char *); +unsigned int sdbmhash(const char *); +unsigned int djbhash(const char *); +unsigned int dekhash(const char *); +unsigned int bphash(const char *); +unsigned int fnvhash(const char *); +unsigned int aphash(const char *); + +#endif + diff --git a/hashtbl.c b/hashtbl.c new file mode 100644 index 0000000..f26a8dd --- /dev/null +++ b/hashtbl.c @@ -0,0 +1,228 @@ +/* hashtbl.c + * + * Hash Table + * + */ + +#include "hashtbl.h" +#include "hashfuncs.h" + +char *strdup(const char *s) +{ + char *b; + if (!(b = malloc(strlen(s) + 1))) + return NULL; + strcpy(b, s); + + return b; +} + +HASHTBL *hashtbl_create(size_t size, unsigned int (*hashfunc)(const char *)) +{ + HASHTBL *hashtbl; + + if (!(hashtbl = malloc(sizeof(HASHTBL)))) + return NULL; + + /* pointer array of nodes */ + if (!(hashtbl->nodes = calloc(size, sizeof(struct hashnode_s*)))) + { + free(hashtbl); + return NULL; + } + + hashtbl->size = size; + + if (hashfunc) + hashtbl->hashfunc = hashfunc; + else + hashtbl->hashfunc = aphash; + + return hashtbl; +} + +void hashtbl_destroy(HASHTBL *hashtbl) +{ + size_t n; + struct hashnode_s *node, *oldnode; + + for (n = 0; nsize; ++n) { + node = hashtbl->nodes[n]; + while (node) { + free(node->key); + oldnode = node; + node = node->next; + free(oldnode); + } + } + free(hashtbl->nodes); + free(hashtbl); +} + +int hashtbl_insert(HASHTBL *hashtbl, const char *key, void *data) +{ + struct hashnode_s *head; + size_t hash = hashtbl->hashfunc(key) % hashtbl->size; + +/* fprintf(stderr, "hashtbl_insert() key = %s, hash = %d, data = %s\n", key, hash, (char*)data); */ + + head = hashtbl->nodes[hash]; + while (head) + { + if (!strcmp(head->key, key)) + { + /* update the data in currently existing node */ + head->data = data; + return 0; + } + + head = head->next; + } + + /* if we get here, it means that this is the first node at particular index + * or we are appending new node to the end of the linked list */ + if (!(head = malloc(sizeof(struct hashnode_s)))) + return -1; + + if (!(head->key = strdup(key))) + { + free(head); + return -1; + } + + head->data = data; + head->next = hashtbl->nodes[hash]; + + /* update head ptr */ + hashtbl->nodes[hash] = head; + + return 0; +} + +int hashtbl_remove(HASHTBL *hashtbl, const char *key) +{ + struct hashnode_s *node, *prevnode = NULL; + size_t hash = hashtbl->hashfunc(key) % hashtbl->size; + + node = hashtbl->nodes[hash]; + while (node) + { + if (!strcmp(node->key, key)) + { + free(node->key); + if (prevnode) + prevnode->next = node->next; + else + hashtbl->nodes[hash] = node->next; + + free(node); + + return 0; + } + + prevnode = node; + node = node->next; + } + + return -1; +} + + +void *hashtbl_get(HASHTBL *hashtbl, const char *key) +{ + struct hashnode_s *head; + size_t hash = hashtbl->hashfunc(key) % hashtbl->size; + +/* fprintf(stderr, "hashtbl_get() key = %s, hash = %d\n", key, hash);*/ + + head = hashtbl->nodes[hash]; + while (head) + { + /* return data only when keys match */ + if (!strcmp(head->key, key)) + return head->data; + + head = head->next; + } + + return NULL; +} + +int hashtbl_resize(HASHTBL *hashtbl, size_t size) +{ + HASHTBL newtbl; + size_t n; + struct hashnode_s *node,*next; + + newtbl.size = size; + newtbl.hashfunc = hashtbl->hashfunc; + + if (!(newtbl.nodes = calloc(size, sizeof(struct hashnode_s*)))) return -1; + + for (n = 0; nsize; ++n) { + for (node = hashtbl->nodes[n]; node; node = next) { + next = node->next; + hashtbl_insert(&newtbl, node->key, node->data); + hashtbl_remove(hashtbl, node->key); + + } + } + + free(hashtbl->nodes); + hashtbl->size = newtbl.size; + hashtbl->nodes = newtbl.nodes; + + return 0; +} + +int main(void) +{ + char *key = "abcdefghijklmnopqrstuvwxyz1234567890"; + + (void) printf("Hash Functions\n"); + (void) printf("Key: %s\n\n", key); + (void) printf("ASCII default hash function value: %u\n", defaulthash(key)); + (void) printf("RS hash function value: %u\n", rshash(key)); + (void) printf("JS hash function value: %u\n", jshash(key)); + (void) printf("PJW hash function value: %u\n", pjwhash(key)); + (void) printf("ELF hash function value: %u\n", elfhash(key)); + (void) printf("BKDR hash function value: %u\n", bkdrhash(key)); + (void) printf("SDBM hash function value: %u\n", sdbmhash(key)); + (void) printf("DJB hash function value: %u\n", djbhash(key)); + (void) printf("DEK hash function value: %u\n", dekhash(key)); + (void) printf("BP hash function value: %u\n", bphash(key)); + (void) printf("FNV hash function value: %u\n", fnvhash(key)); + (void) printf("AP hash function value: %u\n", aphash(key)); + (void) puts(""); + + HASHTBL *hashtbl; + char *italy, *spain; + + hashtbl = hashtbl_create(16, NULL); + if (hashtbl == NULL) + { + fprintf(stderr, "error: hashtbl_create() failed\n"); + exit(-1); + } + + hashtbl_insert(hashtbl, "France", "Paris"); + hashtbl_insert(hashtbl, "England", "London"); + hashtbl_insert(hashtbl, "Sweden", "Stockholm"); + hashtbl_insert(hashtbl, "Germany", "Berlin"); + hashtbl_insert(hashtbl, "Norway", "Oslo"); + hashtbl_insert(hashtbl, "Italy", "Rome"); + hashtbl_insert(hashtbl, "Spain", "Madrid"); + hashtbl_insert(hashtbl, "Estonia", "Tallinn"); + hashtbl_insert(hashtbl, "Netherlands", "Amsterdam"); + hashtbl_insert(hashtbl, "Ireland", "Dublin"); + + italy = hashtbl_get(hashtbl, "Italy"); + (void) printf("Italy: %s\n", italy ? italy : ""); + spain = hashtbl_get(hashtbl, "Spain"); + (void) printf("Spain: %s\n", spain ? spain : ""); + + hashtbl_destroy(hashtbl); + + return 0; +} + diff --git a/hashtbl.h b/hashtbl.h new file mode 100644 index 0000000..70428f8 --- /dev/null +++ b/hashtbl.h @@ -0,0 +1,31 @@ +#ifndef HASHTBL_H_ +#define HASHTBL_H_ + +#include +#include +#include + +typedef struct hashtbl +{ + size_t size; + struct hashnode_s **nodes; + unsigned int (*hashfunc)(const char *); +} HASHTBL; + +struct hashnode_s { + char *key; + void *data; + struct hashnode_s *next; +}; + +/* function prototypes */ +char *strdup(const char *); +HASHTBL *hashtbl_create(size_t, unsigned int (*hashfunc)(const char *)); +void hashtbl_destroy(HASHTBL *); +int hashtbl_insert(HASHTBL *, const char *, void *); +int hashtbl_remove(HASHTBL *, const char *); +void *hashtbl_get(HASHTBL *, const char *); +int hashtbl_resize(HASHTBL *, size_t); + +#endif + -- cgit v1.2.3