#include "system.h" #include #include #include #include #include "debug.h" #define STRDATA_CHUNKS 1024 #define STRDATA_CHUNK 65536 #define STROFFS_CHUNK 2048 /* XXX this is ridiculously small... */ #define STRHASH_INITSIZE 1024 static int pool_debug = 0; typedef struct poolHash_s * poolHash; typedef struct poolHashBucket_s poolHashBucket; struct poolHashBucket_s { rpmsid keyid; }; struct poolHash_s { int numBuckets; poolHashBucket * buckets; int keyCount; }; struct rpmstrPool_s { char ** offs; /* pointers into data area */ rpmsid offs_size; /* largest offset index */; rpmsid offs_alloced; /* offsets allocation size */ char ** chunks; /* memory chunks for storing the strings */ size_t chunks_size; /* current chunk */ size_t chunks_allocated; /* allocated size of the chunks array */ size_t chunk_allocated; /* size of the current chunk */ poolHash hash; /* string -> sid hash table */ int frozen; /* are new id additions allowed? */ int nrefs; /* refcount */ }; /* calculate hash and string length on at once */ static inline unsigned int rstrlenhash(const char * str, size_t * len) { /* Jenkins One-at-a-time hash */ unsigned int hash = 0xe4721b68; const char * s = str; while (*s != '\0') { hash += *s; hash += (hash << 10); hash ^= (hash >> 6); s++; } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); if (len) *len = (s - str); return hash; } static inline unsigned int rstrnlenhash(const char * str, size_t n, size_t * len) { /* Jenkins One-at-a-time hash */ unsigned int hash = 0xe4721b68; const char * s = str; while (*s != '\0' && n-- > 0) { hash += *s; hash += (hash << 10); hash ^= (hash >> 6); s++; } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); if (len) *len = (s - str); return hash; } static inline unsigned int rstrnhash(const char * string, size_t n) { return rstrnlenhash(string, n, NULL); } unsigned int rstrhash(const char * string) { return rstrlenhash(string, NULL); } static inline unsigned int hashbucket(unsigned int hash, unsigned int number) { return hash + number*number; } static poolHash poolHashCreate(int numBuckets) { poolHash ht; ht = xmalloc(sizeof(*ht)); ht->numBuckets = numBuckets; ht->buckets = xcalloc(numBuckets, sizeof(*ht->buckets)); ht->keyCount = 0; return ht; } static void poolHashResize(rpmstrPool pool, int numBuckets) { poolHash ht = pool->hash; poolHashBucket * buckets = xcalloc(numBuckets, sizeof(*ht->buckets)); for (int i=0; inumBuckets; i++) { if (!ht->buckets[i].keyid) continue; unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid)); for (unsigned int j=0;;j++) { unsigned int hash = hashbucket(keyHash, j) % numBuckets; if (!buckets[hash].keyid) { buckets[hash].keyid = ht->buckets[i].keyid; break; } } } free((void *)(ht->buckets)); ht->buckets = buckets; ht->numBuckets = numBuckets; } static void poolHashAddHEntry(rpmstrPool pool, const char * key, unsigned int keyHash, rpmsid keyid) { poolHash ht = pool->hash; /* keep load factor inbetween 0.25 and 0.5 */ if (2*(ht->keyCount) > ht->numBuckets) { poolHashResize(pool, ht->numBuckets * 2); } for (unsigned int i=0;;i++) { unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets; if (!ht->buckets[hash].keyid) { ht->buckets[hash].keyid = keyid; ht->keyCount++; break; } else if (!strcmp(rpmstrPoolStr(pool, ht->buckets[hash].keyid), key)) { return; } } } static void poolHashAddEntry(rpmstrPool pool, const char * key, rpmsid keyid) { poolHashAddHEntry(pool, key, rstrhash(key), keyid); } static void poolHashEmpty( poolHash ht) { unsigned int i; if (ht->keyCount == 0) return; for (i = 0; i < ht->numBuckets; i++) { ht->buckets[i].keyid = 0; } ht->keyCount = 0; } static poolHash poolHashFree(poolHash ht) { if (ht==NULL) return ht; poolHashEmpty(ht); ht->buckets = _free(ht->buckets); ht = _free(ht); return NULL; } static void poolHashPrintStats(rpmstrPool pool) { poolHash ht = pool->hash; int i; int collisions = 0; int maxcollisions = 0; for (i=0; inumBuckets; i++) { unsigned int keyHash = rstrhash(rpmstrPoolStr(pool, ht->buckets[i].keyid)); for (unsigned int j=0;;j++) { unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets; if (hash==i) { collisions += j; maxcollisions = maxcollisions > j ? maxcollisions : j; break; } } } fprintf(stderr, "Hashsize: %i\n", ht->numBuckets); fprintf(stderr, "Keys: %i\n", ht->keyCount); fprintf(stderr, "Collisions: %i\n", collisions); fprintf(stderr, "Max collisions: %i\n", maxcollisions); } static void rpmstrPoolRehash(rpmstrPool pool) { int sizehint; if (pool->offs_size < STRHASH_INITSIZE) sizehint = STRHASH_INITSIZE; else sizehint = pool->offs_size * 2; if (pool->hash) pool->hash = poolHashFree(pool->hash); pool->hash = poolHashCreate(sizehint); for (int i = 1; i < pool->offs_size; i++) poolHashAddEntry(pool, rpmstrPoolStr(pool, i), i); } rpmstrPool rpmstrPoolCreate(void) { rpmstrPool pool = xcalloc(1, sizeof(*pool)); pool->offs_alloced = STROFFS_CHUNK; pool->offs = xcalloc(pool->offs_alloced, sizeof(*pool->offs)); pool->chunks_allocated = STRDATA_CHUNKS; pool->chunks = xcalloc(pool->chunks_allocated, sizeof(*pool->chunks)); pool->chunks_size = 1; pool->chunk_allocated = STRDATA_CHUNK; pool->offs[1] = xcalloc(1, pool->chunk_allocated); pool->chunks[pool->chunks_size] = pool->offs[1]; rpmstrPoolRehash(pool); pool->nrefs = 1; return pool; } rpmstrPool rpmstrPoolFree(rpmstrPool pool) { if (pool) { if (pool->nrefs > 1) { pool->nrefs--; } else { if (pool_debug) poolHashPrintStats(pool); poolHashFree(pool->hash); free(pool->offs); for (int i=1;i<=pool->chunks_size;i++) { pool->chunks[i] = _free(pool->chunks[i]); } free(pool->chunks); free(pool); } } return NULL; } rpmstrPool rpmstrPoolLink(rpmstrPool pool) { if (pool) pool->nrefs++; return pool; } void rpmstrPoolFreeze(rpmstrPool pool, int keephash) { if (pool && !pool->frozen) { if (!keephash) { pool->hash = poolHashFree(pool->hash); } pool->offs_alloced = pool->offs_size + 2; /* space for end marker */ pool->offs = xrealloc(pool->offs, pool->offs_alloced * sizeof(*pool->offs)); pool->frozen = 1; } } void rpmstrPoolUnfreeze(rpmstrPool pool) { if (pool) { if (pool->hash == NULL) { rpmstrPoolRehash(pool); } pool->frozen = 0; } } static rpmsid rpmstrPoolPut(rpmstrPool pool, const char *s, size_t slen, unsigned int hash) { char *t = NULL; size_t ssize = slen + 1; size_t chunk_used; pool->offs_size += 1; /* need one extra for end of string */ /* and one extra to mark the end of the chunk */ if (pool->offs_alloced <= pool->offs_size + 2) { pool->offs_alloced += STROFFS_CHUNK; pool->offs = xrealloc(pool->offs, pool->offs_alloced * sizeof(*pool->offs)); } chunk_used = pool->offs[pool->offs_size] - pool->chunks[pool->chunks_size]; if (ssize + 1 > pool->chunk_allocated - chunk_used) { /* check size of ->chunks */ pool->chunks_size += 1; if (pool->chunks_size >= pool->chunks_allocated) { pool->chunks_allocated += pool->chunks_allocated; pool->chunks = xrealloc(pool->chunks, pool->chunks_allocated * sizeof(*pool->chunks)); } /* Check if string is bigger than chunks */ if (ssize > pool->chunk_allocated) { pool->chunk_allocated = 2 * ssize; } /* Dummy entry for end of last string*/ pool->offs_size += 1; pool->offs[pool->offs_size] = xcalloc(1, pool->chunk_allocated); pool->chunks[pool->chunks_size] = pool->offs[pool->offs_size]; } t = memcpy(pool->offs[pool->offs_size], s, slen); t[slen] = '\0'; pool->offs[pool->offs_size+1] = t + ssize; poolHashAddHEntry(pool, t, hash, pool->offs_size); return pool->offs_size; } static rpmsid rpmstrPoolGet(rpmstrPool pool, const char * key, size_t keylen, unsigned int keyHash) { poolHash ht = pool->hash; const char * s; for (unsigned int i=0;; i++) { unsigned int hash = hashbucket(keyHash, i) % ht->numBuckets; if (!ht->buckets[hash].keyid) { return 0; } s = rpmstrPoolStr(pool, ht->buckets[hash].keyid); /* pool string could be longer than keylen, require exact matche */ if (strncmp(s, key, keylen) == 0 && s[keylen] == '\0') return ht->buckets[hash].keyid; } } static inline rpmsid strn2id(rpmstrPool pool, const char *s, size_t slen, unsigned int hash, int create) { rpmsid sid = 0; if (pool && pool->hash) { sid = rpmstrPoolGet(pool, s, slen, hash); if (sid == 0 && create && !pool->frozen) sid = rpmstrPoolPut(pool, s, slen, hash); } return sid; } rpmsid rpmstrPoolIdn(rpmstrPool pool, const char *s, size_t slen, int create) { rpmsid sid = 0; if (s != NULL) { unsigned int hash = rstrnhash(s, slen); sid = strn2id(pool, s, slen, hash, create); } return sid; } rpmsid rpmstrPoolId(rpmstrPool pool, const char *s, int create) { rpmsid sid = 0; if (s != NULL) { size_t slen; unsigned int hash = rstrlenhash(s, &slen); sid = strn2id(pool, s, slen, hash, create); } return sid; } const char * rpmstrPoolStr(rpmstrPool pool, rpmsid sid) { const char *s = NULL; if (pool && sid > 0 && sid <= pool->offs_size) s = pool->offs[sid]; return s; } size_t rpmstrPoolStrlen(rpmstrPool pool, rpmsid sid) { size_t slen = 0; if (pool && sid <= pool->offs_size) { slen = pool->offs[sid+1] - pool->offs[sid] - 1; } return slen; } int rpmstrPoolStreq(rpmstrPool poolA, rpmsid sidA, rpmstrPool poolB, rpmsid sidB) { if (poolA == poolB) return (sidA == sidB); else return rstreq(rpmstrPoolStr(poolA, sidA), rpmstrPoolStr(poolB, sidB)); } rpmsid rpmstrPoolNumStr(rpmstrPool pool) { return (pool != NULL) ? pool->offs_size : 0; }