Define a proper hash table library

Define a proper hash table library, instead of the current ad hoc stuff used for both labels and macros. This only implements the actual library; it is not yet used. We use a CRC64 as a prehash. This is almost certainly overkill, although it is rather efficient (except, arguably, the table lookup) on 64-bit platforms, and not all that bad on 32-bit platforms. All we really need is a function which produces two independent 32-bit results which are used as the primary and secondary hash respectively. Either way, the prehash function is easily replacable if/when we have a quicker alternative.
author: H. Peter Anvin <hpa@zytor.com> 2007-09-13 23:34:21 -0700
committer: H. Peter Anvin <hpa@zytor.com> 2007-09-14 09:24:38 -0700
commit: cde08292d65072c6a3fec5ee0f0a304d5bdf301c (patch)
tree: 23579b5d3540b982d42374a3a19d4ebb98773e68 /hashtbl.c
parent: 9ab60dabcca3d7c9fe9a0bb6e7e01f9295d95532 (diff)
download: nasm-cde08292d65072c6a3fec5ee0f0a304d5bdf301c.tar.gz
nasm-cde08292d65072c6a3fec5ee0f0a304d5bdf301c.tar.bz2
nasm-cde08292d65072c6a3fec5ee0f0a304d5bdf301c.zip
1 files changed, 128 insertions, 0 deletions
diff --git a/hashtbl.c b/hashtbl.c
new file mode 100644
index 0000000..2a38973
--- /dev/null
+++ b/hashtbl.c
@@ -0,0 +1,128 @@
+/*
+ * hashtbl.c
+ *
+ * Efficient dictionary hash table class.
+ */
+
+#include <inttypes.h>
+#include <string.h>
+#include "nasm.h"
+#include "hashtbl.h"
+
+#define HASH_INITIAL_SIZE	64
+#define HASH_MAX_LOAD		2 /* Higher = more memory-efficient, slower */
+
+static struct hash_tbl_node *alloc_table(size_t newsize)
+{
+    size_t bytes = newsize*sizeof(struct hash_tbl_node);
+    struct hash_tbl_node *newtbl = nasm_malloc(bytes);
+
+    memset(newtbl, 0, bytes);
+
+    return newtbl;
+}
+
+struct hash_table *hash_init(void)
+{
+    struct hash_table *head = nasm_malloc(sizeof(struct hash_table));
+
+    head->table    = alloc_table(HASH_INITIAL_SIZE);
+    head->load     = 0;
+    head->size     = HASH_INITIAL_SIZE;
+    head->max_load = HASH_INITIAL_SIZE*(HASH_MAX_LOAD-1)/HASH_MAX_LOAD;
+
+    return head;
+}
+
+/*
+ * Find an entry in a hash table.
+ *
+ * On failure, if "insert" is non-NULL, store data in that structure
+ * which can be used to insert that node using hash_add().
+ *
+ * WARNING: this data is only valid until the very next call of
+ * hash_add(); it cannot be "saved" to a later date.
+ */
+void *hash_find(struct hash_table *head, const char *key,
+		struct hash_insert *insert)
+{
+    struct hash_tbl_node *np;
+    uint64_t hash = crc64(key);
+    struct hash_tbl_node *tbl = head->table;
+    size_t mask = head->size-1;
+    size_t pos  = hash & mask;
+    size_t inc  = ((hash >> 32) & mask) | 1;	/* Always odd */
+
+    while ((np = &tbl[pos])->key) {
+	if (hash == np->hash && !strcmp(key, np->key))
+	    return np->data;
+	pos = (pos+inc) & mask;
+    }
+
+    /* Not found.  Store info for insert if requested. */
+    if (insert) {
+	insert->head  = head;
+	insert->hash  = hash;
+	insert->where = np;
+    }
+    return NULL;
+}
+
+/*
+ * Insert node.
+ */
+void hash_add(struct hash_insert *insert, const char *key, void *data)
+{
+    struct hash_table *head  = insert->head;
+    struct hash_tbl_node *np = insert->where;
+
+    /* Insert node.  We can always do this, even if we need to
+       rebalance immediately after. */
+    np->hash = insert->hash;
+    np->key  = key;
+    np->data = data;
+
+    if (++head->load > head->max_load) {
+	/* Need to expand the table */
+	size_t newsize = head->size << 1;
+	struct hash_tbl_node *newtbl = alloc_table(newsize);
+	size_t mask = newsize-1;
+
+	if (head->table) {
+	    struct hash_tbl_node *op;
+	    size_t i;
+
+	    /* Rebalance all the entries */
+	    for (i = 0, op = head->table; i < head->size; i++, op++) {
+		if (op->key) {
+		    size_t pos = op->hash & mask;
+		    size_t inc = ((op->hash >> 32) & mask) | 1;
+		    
+		    while ((np = &newtbl[pos])->data)
+			pos = (pos+inc) & mask;
+
+		    *np = *op;
+		}
+	    }
+	    nasm_free(head->table);
+	}
+
+	head->table    = newtbl;
+	head->size     = newsize;
+	head->max_load = newsize*(HASH_MAX_LOAD-1)/HASH_MAX_LOAD;
+    }
+}
+
+void hash_free(struct hash_table *head, void (*free_func)(char *, void *))
+{
+    size_t i;
+    struct hash_tbl_node *op;
+
+    for (i = 0, op = head->table; i < head->size; i++, op++) {
+	if (op->key)
+	    free_func((char *)op->key, op->data);
+    }
+
+    nasm_free(head->table);
+    nasm_free(head);
+}
author	H. Peter Anvin <hpa@zytor.com>	2007-09-13 23:34:21 -0700
committer	H. Peter Anvin <hpa@zytor.com>	2007-09-14 09:24:38 -0700
commit	cde08292d65072c6a3fec5ee0f0a304d5bdf301c (patch)
tree	23579b5d3540b982d42374a3a19d4ebb98773e68 /hashtbl.c
parent	9ab60dabcca3d7c9fe9a0bb6e7e01f9295d95532 (diff)
download	nasm-cde08292d65072c6a3fec5ee0f0a304d5bdf301c.tar.gz nasm-cde08292d65072c6a3fec5ee0f0a304d5bdf301c.tar.bz2 nasm-cde08292d65072c6a3fec5ee0f0a304d5bdf301c.zip