diff options
author | Panu Matilainen <pmatilai@redhat.com> | 2012-09-17 15:52:59 +0300 |
---|---|---|
committer | Panu Matilainen <pmatilai@redhat.com> | 2012-09-17 15:52:59 +0300 |
commit | 1abd80f9c2a6d746bdc12d708210b69680d7e1ca (patch) | |
tree | c83c3ab58d852496c1b0647f68eb61bcb8f7e551 | |
parent | 7cb0a71a115047fe94d7ab1bb42a3c9556796ee3 (diff) | |
download | librpm-tizen-1abd80f9c2a6d746bdc12d708210b69680d7e1ca.tar.gz librpm-tizen-1abd80f9c2a6d746bdc12d708210b69680d7e1ca.tar.bz2 librpm-tizen-1abd80f9c2a6d746bdc12d708210b69680d7e1ca.zip |
Use pool id's for hash table key, lookup strings from pool as needed
- The pool itself can address its contents by id alone, storing
pointers to the strings only hurts as reallocation moving the
data blob requires rehashing the whole thing needlessly.
- We now store just the key id in the hash buckets, and lookup the
actual string for comparison from the pool. This avoids the
need to rehash on realloc and saves memory too, and this is one of
the biggest reasons for wanting a separate hash implementation for
the string pool. Incidentally, this is how libsolv does it too.
- Individual bucket allocation becomes rather wasteful now: a bucket
stores a single integer, and a single pointer to the next bucket,
a pointer which can be twice the size of the key data it holds.
Further tuning and cleaning up after the marriage of these two
datatypes left after the honeymoon is over...
-rw-r--r-- | rpmio/rpmstrpool.c | 42 |
1 files changed, 19 insertions, 23 deletions
diff --git a/rpmio/rpmstrpool.c b/rpmio/rpmstrpool.c index a00688ce6..c0fb4bcf4 100644 --- a/rpmio/rpmstrpool.c +++ b/rpmio/rpmstrpool.c @@ -16,8 +16,7 @@ typedef struct poolHashBucket_s * poolHashBucket; struct poolHashBucket_s { poolHashBucket next; - const char * key; - rpmsid data; + rpmsid keyid; }; struct poolHash_s { @@ -50,15 +49,17 @@ static poolHash poolHashCreate(int numBuckets) return ht; } -static void poolHashResize(poolHash ht, int numBuckets) +static void poolHashResize(rpmstrPool pool, int numBuckets) { + poolHash ht = pool->hash; poolHashBucket * buckets = xcalloc(numBuckets, sizeof(*ht->buckets)); for (int i=0; i<ht->numBuckets; i++) { poolHashBucket b = ht->buckets[i]; poolHashBucket nextB; while (b != NULL) { - unsigned int hash = rstrhash(b->key) % numBuckets; + unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, b->keyid)); + unsigned int hash = keyHash % numBuckets; nextB = b->next; b->next = buckets[hash]; buckets[hash] = b; @@ -70,8 +71,9 @@ static void poolHashResize(poolHash ht, int numBuckets) ht->numBuckets = numBuckets; } -static void poolHashAddHEntry(poolHash ht, const char * key, unsigned int keyHash, rpmsid data) +static void poolHashAddHEntry(rpmstrPool pool, const char * key, unsigned int keyHash, rpmsid keyid) { + poolHash ht = pool->hash; unsigned int hash = keyHash % ht->numBuckets; poolHashBucket b = ht->buckets[hash]; @@ -79,27 +81,26 @@ static void poolHashAddHEntry(poolHash ht, const char * key, unsigned int keyHas ht->bucketCount += 1; } - while (b && strcmp(b->key, key)) { + while (b && strcmp(rpmstrPoolStr(pool, b->keyid), key)) { b = b->next; } if (b == NULL) { ht->keyCount += 1; b = xmalloc(sizeof(*b)); - b->key = key; - b->data = data; + b->keyid = keyid; b->next = ht->buckets[hash]; ht->buckets[hash] = b; if (ht->keyCount > ht->numBuckets) { - poolHashResize(ht, ht->numBuckets * 2); + poolHashResize(pool, ht->numBuckets * 2); } } } -static void poolHashAddEntry(poolHash ht, const char * key, rpmsid data) +static void poolHashAddEntry(rpmstrPool pool, const char * key, rpmsid keyid) { - poolHashAddHEntry(ht, key, rstrhash(key), data); + poolHashAddHEntry(pool, key, rstrhash(key), keyid); } static void poolHashEmpty( poolHash ht) @@ -135,15 +136,16 @@ static poolHash poolHashFree(poolHash ht) return NULL; } -static rpmsid poolHashGetHEntry(poolHash ht, const char * key, unsigned int keyHash) +static rpmsid poolHashGetHEntry(rpmstrPool pool, const char * key, unsigned int keyHash) { + poolHash ht = pool->hash; unsigned int hash = keyHash % ht->numBuckets; poolHashBucket b = ht->buckets[hash]; - while (b && strcmp(b->key, key)) + while (b && strcmp(rpmstrPoolStr(pool, b->keyid), key)) b = b->next; - return (b == NULL) ? 0 : b->data; + return (b == NULL) ? 0 : b->keyid; } static void poolHashPrintStats(poolHash ht) @@ -183,7 +185,7 @@ static void rpmstrPoolRehash(rpmstrPool pool) pool->hash = poolHashCreate(sizehint); for (int i = 1; i < pool->offs_size; i++) - poolHashAddEntry(pool->hash, rpmstrPoolStr(pool, i), i); + poolHashAddEntry(pool, rpmstrPoolStr(pool, i), i); } rpmstrPool rpmstrPoolCreate(void) @@ -253,7 +255,6 @@ static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigne size_t ssize = slen + 1; if (ssize > pool->data_alloced - pool->data_size) { - const char * prev_data = pool->data; size_t need = pool->data_size + ssize; size_t alloced = pool->data_alloced; @@ -262,11 +263,6 @@ static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigne pool->data = xrealloc(pool->data, alloced); pool->data_alloced = alloced; - - /* ouch, need to rehash the whole lot if key addresses change */ - if (pool->offs_size > 0 && pool->data != prev_data) { - rpmstrPoolRehash(pool); - } } pool->offs_size += 1; @@ -280,7 +276,7 @@ static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigne pool->offs[pool->offs_size] = pool->data_size; pool->data_size += ssize; - poolHashAddHEntry(pool->hash, t, hash, pool->offs_size); + poolHashAddHEntry(pool, t, hash, pool->offs_size); return pool->offs_size; } @@ -291,7 +287,7 @@ rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create) if (pool && pool->hash && s) { unsigned int hash = rstrhash(s); - sid = poolHashGetHEntry(pool->hash, s, hash); + sid = poolHashGetHEntry(pool, s, hash); if (sid == 0 && create && !pool->frozen) { sid = rpmstrPoolPut(pool, s, slen, hash); } |