Use pool id's for hash table key, lookup strings from pool as needed

- The pool itself can address its contents by id alone, storing pointers to the strings only hurts as reallocation moving the data blob requires rehashing the whole thing needlessly. - We now store just the key id in the hash buckets, and lookup the actual string for comparison from the pool. This avoids the need to rehash on realloc and saves memory too, and this is one of the biggest reasons for wanting a separate hash implementation for the string pool. Incidentally, this is how libsolv does it too. - Individual bucket allocation becomes rather wasteful now: a bucket stores a single integer, and a single pointer to the next bucket, a pointer which can be twice the size of the key data it holds. Further tuning and cleaning up after the marriage of these two datatypes left after the honeymoon is over...
author: Panu Matilainen <pmatilai@redhat.com> 2012-09-17 15:52:59 +0300
committer: Panu Matilainen <pmatilai@redhat.com> 2012-09-17 15:52:59 +0300
commit: 1abd80f9c2a6d746bdc12d708210b69680d7e1ca (patch)
tree: c83c3ab58d852496c1b0647f68eb61bcb8f7e551 /rpmio
parent: 7cb0a71a115047fe94d7ab1bb42a3c9556796ee3 (diff)
download: librpm-tizen-1abd80f9c2a6d746bdc12d708210b69680d7e1ca.tar.gz
librpm-tizen-1abd80f9c2a6d746bdc12d708210b69680d7e1ca.tar.bz2
librpm-tizen-1abd80f9c2a6d746bdc12d708210b69680d7e1ca.zip
1 files changed, 19 insertions, 23 deletions
diff --git a/rpmio/rpmstrpool.c b/rpmio/rpmstrpool.c
index a00688ce6..c0fb4bcf4 100644
--- a/rpmio/rpmstrpool.c
+++ b/rpmio/rpmstrpool.c
@@ -16,8 +16,7 @@ typedef struct poolHashBucket_s * poolHashBucket;
 
 struct poolHashBucket_s {
     poolHashBucket next;
-    const char * key;
-    rpmsid data;
+    rpmsid keyid;
 };
 
 struct poolHash_s {
@@ -50,15 +49,17 @@ static poolHash poolHashCreate(int numBuckets)
     return ht;
 }
 
-static void poolHashResize(poolHash ht, int numBuckets)
+static void poolHashResize(rpmstrPool pool, int numBuckets)
 {
+    poolHash ht = pool->hash;
     poolHashBucket * buckets = xcalloc(numBuckets, sizeof(*ht->buckets));
 
     for (int i=0; i<ht->numBuckets; i++) {
 	poolHashBucket b = ht->buckets[i];
 	poolHashBucket nextB;
 	while (b != NULL) {
-	    unsigned int hash = rstrhash(b->key) % numBuckets;
+	    unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, b->keyid));
+	    unsigned int hash = keyHash % numBuckets;
 	    nextB = b->next;
 	    b->next = buckets[hash];
 	    buckets[hash] = b;
@@ -70,8 +71,9 @@ static void poolHashResize(poolHash ht, int numBuckets)
     ht->numBuckets = numBuckets;
 }
 
-static void poolHashAddHEntry(poolHash ht, const char * key, unsigned int keyHash, rpmsid data)
+static void poolHashAddHEntry(rpmstrPool pool, const char * key, unsigned int keyHash, rpmsid keyid)
 {
+    poolHash ht = pool->hash;
     unsigned int hash = keyHash % ht->numBuckets;
     poolHashBucket b = ht->buckets[hash];
 
@@ -79,27 +81,26 @@ static void poolHashAddHEntry(poolHash ht, const char * key, unsigned int keyHas
 	ht->bucketCount += 1;
     }
 
-    while (b && strcmp(b->key, key)) {
+    while (b && strcmp(rpmstrPoolStr(pool, b->keyid), key)) {
 	b = b->next;
     }
 
     if (b == NULL) {
 	ht->keyCount += 1;
 	b = xmalloc(sizeof(*b));
-	b->key = key;
-	b->data = data;
+	b->keyid = keyid;
 	b->next = ht->buckets[hash];
 	ht->buckets[hash] = b;
 
 	if (ht->keyCount > ht->numBuckets) {
-	    poolHashResize(ht, ht->numBuckets * 2);
+	    poolHashResize(pool, ht->numBuckets * 2);
 	}
     }
 }
 
-static void poolHashAddEntry(poolHash ht, const char * key, rpmsid data)
+static void poolHashAddEntry(rpmstrPool pool, const char * key, rpmsid keyid)
 {
-    poolHashAddHEntry(ht, key, rstrhash(key), data);
+    poolHashAddHEntry(pool, key, rstrhash(key), keyid);
 }
 
 static void poolHashEmpty( poolHash ht)
@@ -135,15 +136,16 @@ static poolHash poolHashFree(poolHash ht)
     return NULL;
 }
 
-static rpmsid poolHashGetHEntry(poolHash ht, const char * key, unsigned int keyHash)
+static rpmsid poolHashGetHEntry(rpmstrPool pool, const char * key, unsigned int keyHash)
 {
+    poolHash ht = pool->hash;
     unsigned int hash = keyHash % ht->numBuckets;
     poolHashBucket b = ht->buckets[hash];
 
-    while (b && strcmp(b->key, key))
+    while (b && strcmp(rpmstrPoolStr(pool, b->keyid), key))
 	b = b->next;
 
-    return (b == NULL) ? 0 : b->data;
+    return (b == NULL) ? 0 : b->keyid;
 }
 
 static void poolHashPrintStats(poolHash ht)
@@ -183,7 +185,7 @@ static void rpmstrPoolRehash(rpmstrPool pool)
 
     pool->hash = poolHashCreate(sizehint);
     for (int i = 1; i < pool->offs_size; i++)
-	poolHashAddEntry(pool->hash, rpmstrPoolStr(pool, i), i);
+	poolHashAddEntry(pool, rpmstrPoolStr(pool, i), i);
 }
 
 rpmstrPool rpmstrPoolCreate(void)
@@ -253,7 +255,6 @@ static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigne
     size_t ssize = slen + 1;
 
     if (ssize > pool->data_alloced - pool->data_size) {
-	const char * prev_data = pool->data;
 	size_t need = pool->data_size + ssize;
 	size_t alloced = pool->data_alloced;
 
@@ -262,11 +263,6 @@ static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigne
 
 	pool->data = xrealloc(pool->data, alloced);
 	pool->data_alloced = alloced;
-
-	/* ouch, need to rehash the whole lot if key addresses change */
-	if (pool->offs_size > 0 && pool->data != prev_data) {
-	    rpmstrPoolRehash(pool);
-	}
     }
 
     pool->offs_size += 1;
@@ -280,7 +276,7 @@ static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigne
     pool->offs[pool->offs_size] = pool->data_size;
     pool->data_size += ssize;
 
-    poolHashAddHEntry(pool->hash, t, hash, pool->offs_size);
+    poolHashAddHEntry(pool, t, hash, pool->offs_size);
 
     return pool->offs_size;
 }
@@ -291,7 +287,7 @@ rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create)
 
     if (pool && pool->hash && s) {
 	unsigned int hash = rstrhash(s);
-	sid = poolHashGetHEntry(pool->hash, s, hash);
+	sid = poolHashGetHEntry(pool, s, hash);
 	if (sid == 0 && create && !pool->frozen) {
 	    sid = rpmstrPoolPut(pool, s, slen, hash);
 	}
author	Panu Matilainen <pmatilai@redhat.com>	2012-09-17 15:52:59 +0300
committer	Panu Matilainen <pmatilai@redhat.com>	2012-09-17 15:52:59 +0300
commit	1abd80f9c2a6d746bdc12d708210b69680d7e1ca (patch)
tree	c83c3ab58d852496c1b0647f68eb61bcb8f7e551 /rpmio
parent	7cb0a71a115047fe94d7ab1bb42a3c9556796ee3 (diff)
download	librpm-tizen-1abd80f9c2a6d746bdc12d708210b69680d7e1ca.tar.gz librpm-tizen-1abd80f9c2a6d746bdc12d708210b69680d7e1ca.tar.bz2 librpm-tizen-1abd80f9c2a6d746bdc12d708210b69680d7e1ca.zip