diff options
Diffstat (limited to 'db/mp/mp_region.c')
-rw-r--r-- | db/mp/mp_region.c | 302 |
1 files changed, 152 insertions, 150 deletions
diff --git a/db/mp/mp_region.c b/db/mp/mp_region.c index 3c7ee6a4b..a02683f21 100644 --- a/db/mp/mp_region.c +++ b/db/mp/mp_region.c @@ -1,29 +1,20 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2004 - * Sleepycat Software. All rights reserved. + * Copyright (c) 1996-2006 + * Oracle Corporation. All rights reserved. * - * $Id: mp_region.c,v 11.68 2004/10/15 16:59:43 bostic Exp $ + * $Id: mp_region.c,v 12.21 2006/08/24 14:46:15 bostic Exp $ */ #include "db_config.h" -#ifndef NO_SYSTEM_INCLUDES -#include <sys/types.h> - -#include <string.h> -#endif - #include "db_int.h" -#include "dbinc/db_shash.h" #include "dbinc/mp.h" static int __memp_init __P((DB_ENV *, DB_MPOOL *, u_int, u_int32_t)); -static void __memp_init_config __P((DB_ENV *, MPOOL *)); -#ifdef HAVE_MUTEX_SYSTEM_RESOURCES -static size_t __memp_region_maint __P((REGINFO *)); -#endif +static int __memp_init_config __P((DB_ENV *, MPOOL *)); +static void __memp_region_size __P((DB_ENV *, roff_t *, u_int32_t *)); /* * __memp_open -- @@ -43,20 +34,8 @@ __memp_open(dbenv) u_int32_t htab_buckets, *regids; int ret; - /* Figure out how big each cache region is. */ - reg_size = (dbenv->mp_gbytes / dbenv->mp_ncache) * GIGABYTE; - reg_size += ((dbenv->mp_gbytes % - dbenv->mp_ncache) * GIGABYTE) / dbenv->mp_ncache; - reg_size += dbenv->mp_bytes / dbenv->mp_ncache; - - /* - * Figure out how many hash buckets each region will have. Assume we - * want to keep the hash chains with under 10 pages on each chain. We - * don't know the pagesize in advance, and it may differ for different - * files. Use a pagesize of 1K for the calculation -- we walk these - * chains a lot, they must be kept short. - */ - htab_buckets = __db_tablesize((u_int32_t)(reg_size / (1 * 1024)) / 10); + /* Calculate the region size and hash bucket count. */ + __memp_region_size(dbenv, ®_size, &htab_buckets); /* Create and initialize the DB_MPOOL structure. */ if ((ret = __os_calloc(dbenv, 1, sizeof(*dbmp), &dbmp)) != 0) @@ -116,14 +95,9 @@ __memp_open(dbenv) if ((ret = __memp_init(dbenv, dbmp, i, htab_buckets)) != 0) goto err; - R_UNLOCK(dbenv, &dbmp->reginfo[i]); regids[i] = dbmp->reginfo[i].id; } - - __memp_init_config(dbenv, mp); - - R_UNLOCK(dbenv, dbmp->reginfo); } else { /* * Determine how many regions there are going to be, allocate @@ -140,21 +114,6 @@ __memp_open(dbenv) dbmp->reginfo[i].id = INVALID_REGION_ID; dbmp->reginfo[0] = reginfo; - __memp_init_config(dbenv, mp); - - /* - * We have to unlock the primary mpool region before we attempt - * to join the additional mpool regions. If we don't, we can - * deadlock. The scenario is that we hold the primary mpool - * region lock. We then try to attach to an additional mpool - * region, which requires the acquisition/release of the main - * region lock (to search the list of regions). If another - * thread of control already holds the main region lock and is - * waiting on our primary mpool region lock, we'll deadlock. - * See [#4696] for more information. - */ - R_UNLOCK(dbenv, dbmp->reginfo); - /* Join remaining regions. */ regids = R_ADDR(dbmp->reginfo, mp->regids); for (i = 1; i < dbmp->nreg; ++i) { @@ -165,7 +124,6 @@ __memp_open(dbenv) if ((ret = __db_r_attach( dbenv, &dbmp->reginfo[i], 0)) != 0) goto err; - R_UNLOCK(dbenv, &dbmp->reginfo[i]); } } @@ -175,28 +133,28 @@ __memp_open(dbenv) R_ADDR(&dbmp->reginfo[i], dbmp->reginfo[i].rp->primary); /* If the region is threaded, allocate a mutex to lock the handles. */ - if (F_ISSET(dbenv, DB_ENV_THREAD) && - (ret = __db_mutex_setup(dbenv, dbmp->reginfo, &dbmp->mutexp, - MUTEX_ALLOC | MUTEX_THREAD)) != 0) + if ((ret = __mutex_alloc(dbenv, + MTX_MPOOL_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbmp->mutex)) != 0) goto err; dbenv->mp_handle = dbmp; - return (0); -err: if (dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) { - if (F_ISSET(dbmp->reginfo, REGION_CREATE)) - ret = __db_panic(dbenv, ret); + /* A process joining the region may reset the mpool configuration. */ + if ((ret = __memp_init_config(dbenv, mp)) != 0) + return (ret); - R_UNLOCK(dbenv, dbmp->reginfo); + return (0); +err: dbenv->mp_handle = NULL; + if (dbmp->reginfo != NULL && dbmp->reginfo[0].addr != NULL) { for (i = 0; i < dbmp->nreg; ++i) if (dbmp->reginfo[i].id != INVALID_REGION_ID) (void)__db_r_detach( dbenv, &dbmp->reginfo[i], 0); __os_free(dbenv, dbmp->reginfo); } - if (dbmp->mutexp != NULL) - __db_mutex_free(dbenv, dbmp->reginfo, dbmp->mutexp); + + (void)__mutex_free(dbenv, &dbmp->mutex); __os_free(dbenv, dbmp); return (ret); } @@ -212,37 +170,26 @@ __memp_init(dbenv, dbmp, reginfo_off, htab_buckets) u_int reginfo_off; u_int32_t htab_buckets; { - DB_MPOOL_HASH *htab; + DB_MPOOL_HASH *htab, *hp; MPOOL *mp; REGINFO *reginfo; -#ifdef HAVE_MUTEX_SYSTEM_RESOURCES - size_t maint_size; -#endif u_int32_t i; int ret; void *p; reginfo = &dbmp->reginfo[reginfo_off]; - if ((ret = __db_shalloc(reginfo, - sizeof(MPOOL), MUTEX_ALIGN, ®info->primary)) != 0) + if ((ret = __db_shalloc( + reginfo, sizeof(MPOOL), 0, ®info->primary)) != 0) goto mem_err; reginfo->rp->primary = R_OFFSET(reginfo, reginfo->primary); mp = reginfo->primary; memset(mp, 0, sizeof(*mp)); -#ifdef HAVE_MUTEX_SYSTEM_RESOURCES - maint_size = __memp_region_maint(reginfo); - /* Allocate room for the maintenance info and initialize it. */ - if ((ret = __db_shalloc(reginfo, - sizeof(REGMAINT) + maint_size, 0, &p)) != 0) - goto mem_err; - __db_maintinit(reginfo, p, maint_size); - mp->maint_off = R_OFFSET(reginfo, p); -#endif + if ((ret = + __mutex_alloc(dbenv, MTX_MPOOL_REGION, 0, &mp->mtx_region)) != 0) + return (ret); if (reginfo_off == 0) { - SH_TAILQ_INIT(&mp->mpfq); - ZERO_LSN(mp->lsn); mp->nreg = dbmp->nreg; @@ -250,22 +197,45 @@ __memp_init(dbenv, dbmp, reginfo_off, htab_buckets) dbmp->nreg * sizeof(u_int32_t), 0, &p)) != 0) goto mem_err; mp->regids = R_OFFSET(dbmp->reginfo, p); + + /* Allocate file table space and initialize it. */ + if ((ret = __db_shalloc(reginfo, + MPOOL_FILE_BUCKETS * sizeof(DB_MPOOL_HASH), 0, &htab)) != 0) + goto mem_err; + mp->ftab = R_OFFSET(reginfo, htab); + for (i = 0; i < MPOOL_FILE_BUCKETS; i++) { + if ((ret = __mutex_alloc(dbenv, + MTX_MPOOL_FILE_BUCKET, 0, &htab[i].mtx_hash)) != 0) + return (ret); + SH_TAILQ_INIT(&htab[i].hash_bucket); + htab[i].hash_page_dirty = htab[i].hash_priority = 0; + } + } /* Allocate hash table space and initialize it. */ if ((ret = __db_shalloc(reginfo, - htab_buckets * sizeof(DB_MPOOL_HASH), MUTEX_ALIGN, &htab)) != 0) + htab_buckets * sizeof(DB_MPOOL_HASH), 0, &htab)) != 0) goto mem_err; mp->htab = R_OFFSET(reginfo, htab); for (i = 0; i < htab_buckets; i++) { - if ((ret = __db_mutex_setup(dbenv, - reginfo, &htab[i].hash_mutex, MUTEX_NO_RLOCK)) != 0) + hp = &htab[i]; + if ((ret = __mutex_alloc(dbenv, + MTX_MPOOL_HASH_BUCKET, 0, &hp->mtx_hash)) != 0) + return (ret); + if ((ret = __mutex_alloc(dbenv, + MTX_MPOOL_IO, DB_MUTEX_SELF_BLOCK, &hp->mtx_io)) != 0) return (ret); - SH_TAILQ_INIT(&htab[i].hash_bucket); - htab[i].hash_page_dirty = htab[i].hash_priority = 0; + SH_TAILQ_INIT(&hp->hash_bucket); + hp->hash_page_dirty = hp->hash_priority = hp->hash_io_wait = 0; + hp->flags = 0; + ZERO_LSN(hp->old_reader); } mp->htab_buckets = mp->stat.st_hash_buckets = htab_buckets; + SH_TAILQ_INIT(&mp->free_frozen); + SH_TAILQ_INIT(&mp->alloc_frozen); + /* * Only the environment creator knows the total cache size, fill in * those statistics now. @@ -274,20 +244,81 @@ __memp_init(dbenv, dbmp, reginfo_off, htab_buckets) mp->stat.st_bytes = dbenv->mp_bytes; return (0); -mem_err:__db_err(dbenv, "Unable to allocate memory for mpool region"); +mem_err:__db_errx(dbenv, "Unable to allocate memory for mpool region"); return (ret); } /* + * __memp_region_size -- + * Size the region and figure out how many hash buckets we'll have. + */ +static void +__memp_region_size(dbenv, reg_sizep, htab_bucketsp) + DB_ENV *dbenv; + roff_t *reg_sizep; + u_int32_t *htab_bucketsp; +{ + roff_t reg_size; + + /* + * Figure out how big each cache region is. Cast an operand to roff_t + * so we do 64-bit arithmetic as appropriate. + */ + reg_size = ((roff_t)GIGABYTE / dbenv->mp_ncache) * dbenv->mp_gbytes; + reg_size += dbenv->mp_bytes / dbenv->mp_ncache; + *reg_sizep = reg_size; + + /* + * Figure out how many hash buckets each region will have. Assume we + * want to keep the hash chains with under 10 pages on each chain. We + * don't know the pagesize in advance, and it may differ for different + * files. Use a pagesize of 1K for the calculation -- we walk these + * chains a lot, they must be kept short. + * + * XXX + * Cache sizes larger than 10TB would cause 32-bit wrapping in the + * calculation of the number of hash buckets. This probably isn't + * something we need to worry about right now, but is checked when the + * cache size is set. + */ + *htab_bucketsp = __db_tablesize((u_int32_t)(reg_size / (10 * 1024))); +} + +/* + * __memp_region_mutex_count -- + * Return the number of mutexes the mpool region will need. + * + * PUBLIC: u_int32_t __memp_region_mutex_count __P((DB_ENV *)); + */ +u_int32_t +__memp_region_mutex_count(dbenv) + DB_ENV *dbenv; +{ + roff_t reg_size; + u_int32_t htab_buckets; + + __memp_region_size(dbenv, ®_size, &htab_buckets); + + /* + * We need a couple of mutexes for the region itself, one for each + * file handle (MPOOLFILE) the application allocates, one for each + * of the MPOOL_FILE_BUCKETS, and each cache has two mutexes per + * hash bucket. + */ + return (dbenv->mp_ncache * htab_buckets * 2 + 50 + MPOOL_FILE_BUCKETS); +} + +/* * __memp_init_config -- * Initialize shared configuration information. */ -static void +static int __memp_init_config(dbenv, mp) DB_ENV *dbenv; MPOOL *mp; { - /* A process joining the region may reset the mpool configuration. */ + MPOOL_SYSTEM_LOCK(dbenv); + if (dbenv->mp_mmapsize != 0) mp->mp_mmapsize = dbenv->mp_mmapsize; if (dbenv->mp_maxopenfd != 0) @@ -296,6 +327,10 @@ __memp_init_config(dbenv, mp) mp->mp_maxwrite = dbenv->mp_maxwrite; if (dbenv->mp_maxwrite_sleep != 0) mp->mp_maxwrite_sleep = dbenv->mp_maxwrite_sleep; + + MPOOL_SYSTEM_UNLOCK(dbenv); + + return (0); } /* @@ -309,6 +344,7 @@ __memp_dbenv_refresh(dbenv) DB_ENV *dbenv; { BH *bhp; + BH_FROZEN_ALLOC *frozen_alloc; DB_MPOOL *dbmp; DB_MPOOLFILE *dbmfp; DB_MPOOL_HASH *hp; @@ -333,11 +369,31 @@ __memp_dbenv_refresh(dbenv) reginfo = &dbmp->reginfo[i]; mp = reginfo->primary; for (hp = R_ADDR(reginfo, mp->htab), bucket = 0; - bucket < mp->htab_buckets; ++hp, ++bucket) + bucket < mp->htab_buckets; ++hp, ++bucket) { while ((bhp = SH_TAILQ_FIRST( &hp->hash_bucket, __bh)) != NULL) - __memp_bhfree(dbmp, hp, bhp, - BH_FREE_FREEMEM | BH_FREE_UNLOCKED); + if (F_ISSET(bhp, BH_FROZEN)) + SH_TAILQ_REMOVE( + &hp->hash_bucket, bhp, + hq, __bh); + else if ((t_ret = __memp_bhfree( + dbmp, hp, bhp, + BH_FREE_FREEMEM | + BH_FREE_UNLOCKED)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __mutex_free( + dbenv, &hp->mtx_hash)) != 0 && ret == 0) + ret = t_ret; + if ((t_ret = __mutex_free( + dbenv, &hp->mtx_io)) != 0 && ret == 0) + ret = t_ret; + } + while ((frozen_alloc = SH_TAILQ_FIRST( + &mp->alloc_frozen, __bh_frozen_a)) != NULL) { + SH_TAILQ_REMOVE(&mp->alloc_frozen, frozen_alloc, + links, __bh_frozen_a); + __db_shalloc_free(reginfo, frozen_alloc); + } } /* Discard DB_MPOOLFILEs. */ @@ -346,26 +402,31 @@ __memp_dbenv_refresh(dbenv) ret = t_ret; /* Discard DB_MPREGs. */ + if (dbmp->pg_inout != NULL) + __os_free(dbenv, dbmp->pg_inout); while ((mpreg = LIST_FIRST(&dbmp->dbregq)) != NULL) { LIST_REMOVE(mpreg, q); __os_free(dbenv, mpreg); } /* Discard the DB_MPOOL thread mutex. */ - if (dbmp->mutexp != NULL) - __db_mutex_free(dbenv, dbmp->reginfo, dbmp->mutexp); + if ((t_ret = __mutex_free(dbenv, &dbmp->mutex)) != 0 && ret == 0) + ret = t_ret; if (F_ISSET(dbenv, DB_ENV_PRIVATE)) { /* Discard REGION IDs. */ reginfo = &dbmp->reginfo[0]; mp = dbmp->reginfo[0].primary; - __db_shalloc_free(reginfo, R_ADDR(reginfo, mp->regids)); + __memp_free(reginfo, NULL, R_ADDR(reginfo, mp->regids)); + + /* Discard the File table. */ + __memp_free(reginfo, NULL, R_ADDR(reginfo, mp->ftab)); /* Discard Hash tables. */ for (i = 0; i < dbmp->nreg; ++i) { reginfo = &dbmp->reginfo[i]; mp = reginfo->primary; - __db_shalloc_free(reginfo, R_ADDR(reginfo, mp->htab)); + __memp_free(reginfo, NULL, R_ADDR(reginfo, mp->htab)); } } @@ -383,62 +444,3 @@ __memp_dbenv_refresh(dbenv) dbenv->mp_handle = NULL; return (ret); } - -#ifdef HAVE_MUTEX_SYSTEM_RESOURCES -/* - * __memp_region_maint -- - * Return the amount of space needed for region maintenance info. - * - */ -static size_t -__memp_region_maint(infop) - REGINFO *infop; -{ - size_t s; - int numlocks; - - /* - * For mutex maintenance we need one mutex per possible page. - * Compute the maximum number of pages this cache can have. - * Also add in an mpool mutex and mutexes for all dbenv and db - * handles. - */ - numlocks = ((infop->rp->size / DB_MIN_PGSIZE) + 1); - numlocks += DB_MAX_HANDLES; - s = sizeof(roff_t) * numlocks; - return (s); -} -#endif - -/* - * __memp_region_destroy - * Destroy any region maintenance info. - * - * PUBLIC: void __memp_region_destroy __P((DB_ENV *, REGINFO *)); - */ -void -__memp_region_destroy(dbenv, infop) - DB_ENV *dbenv; - REGINFO *infop; -{ - /* - * This routine is called in two cases: when discarding the mutexes - * from a previous Berkeley DB run, during recovery, and two, when - * discarding the mutexes as we shut down the database environment. - * In the latter case, we also need to discard shared memory segments, - * this is the last time we use them, and the last region-specific - * call we make. - */ -#ifdef HAVE_MUTEX_SYSTEM_RESOURCES - MPOOL *mp; - - mp = R_ADDR(infop, infop->rp->primary); - - /* Destroy mutexes. */ - __db_shlocks_destroy(infop, R_ADDR(infop, mp->maint_off)); - if (infop->primary != NULL && F_ISSET(dbenv, DB_ENV_PRIVATE)) - __db_shalloc_free(infop, R_ADDR(infop, mp->maint_off)); -#endif - if (infop->primary != NULL && F_ISSET(dbenv, DB_ENV_PRIVATE)) - __db_shalloc_free(infop, infop->primary); -} |