diff options
Diffstat (limited to 'db/mp')
-rw-r--r-- | db/mp/mp_bh.c | 479 | ||||
-rw-r--r-- | db/mp/mp_fopen.c | 845 |
2 files changed, 696 insertions, 628 deletions
diff --git a/db/mp/mp_bh.c b/db/mp/mp_bh.c index 5c438b202..24f14ab1d 100644 --- a/db/mp/mp_bh.c +++ b/db/mp/mp_bh.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2001 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "Id: mp_bh.c,v 11.45 2001/07/26 19:53:31 bostic Exp "; +static const char revid[] = "Id: mp_bh.c,v 11.68 2002/05/03 15:21:16 bostic Exp "; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -18,43 +18,41 @@ static const char revid[] = "Id: mp_bh.c,v 11.45 2001/07/26 19:53:31 bostic Exp #endif #include "db_int.h" -#include "db_shash.h" -#include "mp.h" -#include "log.h" -#include "db_page.h" +#include "dbinc/db_shash.h" +#include "dbinc/mp.h" +#include "dbinc/log.h" +#include "dbinc/db_page.h" +static int __memp_pgwrite + __P((DB_MPOOL *, DB_MPOOLFILE *, DB_MPOOL_HASH *, BH *)); static int __memp_upgrade __P((DB_MPOOL *, DB_MPOOLFILE *, MPOOLFILE *)); /* * __memp_bhwrite -- - * Write the page associated with a given bucket header. + * Write the page associated with a given buffer header. * - * PUBLIC: int __memp_bhwrite - * PUBLIC: __P((DB_MPOOL *, MPOOLFILE *, BH *, int, int *, int *)); + * PUBLIC: int __memp_bhwrite __P((DB_MPOOL *, + * PUBLIC: DB_MPOOL_HASH *, MPOOLFILE *, BH *, int)); */ int -__memp_bhwrite(dbmp, mfp, bhp, open_extents, restartp, wrotep) +__memp_bhwrite(dbmp, hp, mfp, bhp, open_extents) DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; MPOOLFILE *mfp; BH *bhp; - int open_extents, *restartp, *wrotep; + int open_extents; { DB_ENV *dbenv; DB_MPOOLFILE *dbmfp; DB_MPREG *mpreg; - int incremented, ret; + int local_open, incremented, ret; dbenv = dbmp->dbenv; - - if (restartp != NULL) - *restartp = 0; - if (wrotep != NULL) - *wrotep = 0; - incremented = 0; + local_open = incremented = 0; /* - * If the file has been removed or is a closed temporary file, Jump - * right ahead and pretend that we've found the file we want-- the + * If the file has been removed or is a closed temporary file, jump + * right ahead and pretend that we've found the file we want -- the * page-write function knows how to handle the fact that we don't have * (or need!) any real file descriptor information. */ @@ -74,25 +72,35 @@ __memp_bhwrite(dbmp, mfp, bhp, open_extents, restartp, wrotep) dbmfp != NULL; dbmfp = TAILQ_NEXT(dbmfp, q)) if (dbmfp->mfp == mfp) { if (F_ISSET(dbmfp, MP_READONLY) && - __memp_upgrade(dbmp, dbmfp, mfp)) { + !F_ISSET(dbmfp, MP_UPGRADE) && + (F_ISSET(dbmfp, MP_UPGRADE_FAIL) || + __memp_upgrade(dbmp, dbmfp, mfp))) { MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); - return (0); + return (EPERM); } /* * Increment the reference count -- see the comment in - * memp_fclose(). + * __memp_fclose_int(). */ ++dbmfp->ref; incremented = 1; break; } MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); + if (dbmfp != NULL) goto found; /* * !!! + * It's the caller's choice if we're going to open extent files. + */ + if (!open_extents && F_ISSET(mfp, MP_EXTENT)) + return (EPERM); + + /* + * !!! * Don't try to attach to temporary files. There are two problems in * trying to do that. First, if we have different privileges than the * process that "owns" the temporary file, we might create the backing @@ -107,15 +115,12 @@ __memp_bhwrite(dbmp, mfp, bhp, open_extents, restartp, wrotep) * with resource starvation, and the memp_trickle thread couldn't do * anything about it. That's a pretty unlikely scenario, though. * - * Note that we should never get here when the temporary file - * in question has already been closed in another process, in which - * case it should be marked MP_DEADFILE. + * Note we should never get here when the temporary file in question + * has already been closed in another process, in which case it should + * be marked MP_DEADFILE. */ - if (F_ISSET(mfp, MP_TEMP) - || (F_ISSET(mfp, MP_EXTENT) && !open_extents)) { - DB_ASSERT(!F_ISSET(mfp, MP_DEADFILE)); - return (0); - } + if (F_ISSET(mfp, MP_TEMP)) + return (EPERM); /* * It's not a page from a file we've opened. If the file requires @@ -131,7 +136,7 @@ __memp_bhwrite(dbmp, mfp, bhp, open_extents, restartp, wrotep) break; MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); if (mpreg == NULL) - return (0); + return (EPERM); } /* @@ -142,25 +147,24 @@ __memp_bhwrite(dbmp, mfp, bhp, open_extents, restartp, wrotep) * There's no negative cache, so we may repeatedly try and open files * that we have previously tried (and failed) to open. */ - if (dbenv->memp_fcreate(dbenv, &dbmfp, 0) != 0) - return (0); - if (__memp_fopen_int(dbmfp, mfp, + if ((ret = dbenv->memp_fcreate(dbenv, &dbmfp, 0)) != 0) + return (ret); + if ((ret = __memp_fopen_int(dbmfp, mfp, R_ADDR(dbmp->reginfo, mfp->path_off), - 0, 0, mfp->stat.st_pagesize, 0) != 0) { + 0, 0, mfp->stat.st_pagesize)) != 0) { (void)dbmfp->close(dbmfp, 0); - return (0); + return (ret); } - F_SET(dbmfp, MP_FLUSH); - if (F_ISSET(mfp, MP_EXTENT)) - dbmp->extents = 1; + local_open = 1; -found: ret = __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep); +found: ret = __memp_pgwrite(dbmp, dbmfp, hp, bhp) == 0 ? 0 : 1; - if (incremented) { - MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp); + MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp); + if (incremented) --dbmfp->ref; - MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); - } + else if (local_open) + F_SET(dbmfp, MP_FLUSH); + MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); return (ret); } @@ -169,11 +173,12 @@ found: ret = __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep); * __memp_pgread -- * Read a page from a file. * - * PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, BH *, int)); + * PUBLIC: int __memp_pgread __P((DB_MPOOLFILE *, DB_MUTEX *, BH *, int)); */ int -__memp_pgread(dbmfp, bhp, can_create) +__memp_pgread(dbmfp, mutexp, bhp, can_create) DB_MPOOLFILE *dbmfp; + DB_MUTEX *mutexp; BH *bhp; int can_create; { @@ -181,19 +186,21 @@ __memp_pgread(dbmfp, bhp, can_create) DB_ENV *dbenv; DB_MPOOL *dbmp; MPOOLFILE *mfp; - size_t len, pagesize; - size_t nr; - int created, ret; + size_t len, nr, pagesize; + int ret; dbmp = dbmfp->dbmp; dbenv = dbmp->dbenv; mfp = dbmfp->mfp; pagesize = mfp->stat.st_pagesize; - created = 0; + /* We should never be called with a dirty or a locked buffer. */ + DB_ASSERT(!F_ISSET(bhp, BH_DIRTY | BH_DIRTY_CREATE | BH_LOCKED)); + + /* Lock the buffer and swap the hash bucket lock for the buffer lock. */ F_SET(bhp, BH_LOCKED | BH_TRASH); - MUTEX_LOCK(dbenv, &bhp->mutex, dbenv->lockfhp); - R_UNLOCK(dbenv, dbmp->reginfo); + MUTEX_LOCK(dbenv, &bhp->mutex); + MUTEX_UNLOCK(dbenv, mutexp); /* * Temporary files may not yet have been created. We don't create @@ -208,125 +215,101 @@ __memp_pgread(dbmfp, bhp, can_create) db_io.buf = bhp->buf; /* - * The page may not exist; if it doesn't, nr may well be 0, + * The page may not exist; if it doesn't, nr may well be 0, * but we expect the underlying OS calls not to return an * error code in this case. */ if ((ret = __os_io(dbenv, &db_io, DB_IO_READ, &nr)) != 0) goto err; - } else - ret = 0; + } if (nr < pagesize) { - if (can_create) - created = 1; - else { - /* - * Don't output error messages for short reads. In - * particular, DB recovery processing may request pages - * that have never been written to disk or for which - * only some part have been written to disk, in which - * case we won't find the page. The caller must know - * how to handle the error. - */ + /* + * Don't output error messages for short reads. In particular, + * DB recovery processing may request pages never written to + * disk or for which only some part have been written to disk, + * in which case we won't find the page. The caller must know + * how to handle the error. + */ + if (can_create == 0) { ret = DB_PAGE_NOTFOUND; goto err; } + + /* Clear any bytes that need to be cleared. */ + len = mfp->clear_len == 0 ? pagesize : mfp->clear_len; + memset(bhp->buf, 0, len); + +#if defined(DIAGNOSTIC) || defined(UMRW) /* - * Clear any bytes that need to be cleared -- if we did a short - * read, we assume that a page was not completely written and - * clear even the bytes that we read. This is so our caller - * isn't surprised (for example, if the first sector only of a - * DB page was written, the LSN will indicate that the page was - * updated, but the page contents will be wrong). Support for - * page checksums might make this unnecessary in the future -- - * I would prefer not to discard data potentially written by - * the application, under any circumstances. - * * If we're running in diagnostic mode, corrupt any bytes on * the page that are unknown quantities for the caller. */ - len = mfp->clear_len == 0 ? pagesize : mfp->clear_len; - memset(bhp->buf, 0, len); -#if defined(DIAGNOSTIC) || defined(UMRW) if (len < pagesize) memset(bhp->buf + len, CLEAR_BYTE, pagesize - len); #endif - } + ++mfp->stat.st_page_create; + } else + ++mfp->stat.st_page_in; /* Call any pgin function. */ ret = mfp->ftype == 0 ? 0 : __memp_pg(dbmfp, bhp, 1); - /* Unlock the buffer and reacquire the region lock. */ + /* Unlock the buffer and reacquire the hash bucket lock. */ err: MUTEX_UNLOCK(dbenv, &bhp->mutex); - R_LOCK(dbenv, dbmp->reginfo); + MUTEX_LOCK(dbenv, mutexp); /* * If no errors occurred, the data is now valid, clear the BH_TRASH * flag; regardless, clear the lock bit and let other threads proceed. */ F_CLR(bhp, BH_LOCKED); - if (ret == 0) { + if (ret == 0) F_CLR(bhp, BH_TRASH); - /* Update the statistics. */ - if (created) - ++mfp->stat.st_page_create; - else - ++mfp->stat.st_page_in; - } - return (ret); } /* * __memp_pgwrite -- * Write a page to a file. - * - * PUBLIC: int __memp_pgwrite - * PUBLIC: __P((DB_MPOOL *, DB_MPOOLFILE *, BH *, int *, int *)); */ -int -__memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep) +static int +__memp_pgwrite(dbmp, dbmfp, hp, bhp) DB_MPOOL *dbmp; DB_MPOOLFILE *dbmfp; + DB_MPOOL_HASH *hp; BH *bhp; - int *restartp, *wrotep; { DB_ENV *dbenv; DB_IO db_io; DB_LSN lsn; - MPOOL *c_mp, *mp; + MPOOL *mp; MPOOLFILE *mfp; size_t nw; - int callpgin, dosync, ret, syncfail; - const char *fail; + int callpgin, ret; dbenv = dbmp->dbenv; mp = dbmp->reginfo[0].primary; mfp = dbmfp == NULL ? NULL : dbmfp->mfp; + callpgin = ret = 0; - if (restartp != NULL) - *restartp = 0; - if (wrotep != NULL) - *wrotep = 0; - callpgin = 0; - - /* We should never be called with a clean or a locked buffer. */ + /* + * We should never be called with a clean or trash buffer. + * The sync code does call us with already locked buffers. + */ DB_ASSERT(F_ISSET(bhp, BH_DIRTY)); - DB_ASSERT(!F_ISSET(bhp, BH_LOCKED)); + DB_ASSERT(!F_ISSET(bhp, BH_TRASH)); /* - * Lock the buffer, set the I/O in progress flag, and discard the - * region lock. + * If we have not already traded the hash bucket lock for the buffer + * lock, do so now. */ - MUTEX_LOCK(dbenv, &bhp->mutex, dbenv->lockfhp); - F_SET(bhp, BH_LOCKED); - R_UNLOCK(dbenv, dbmp->reginfo); - - /* Tell the caller that the region lock was discarded. */ - if (restartp != NULL) - *restartp = 1; + if (!F_ISSET(bhp, BH_LOCKED)) { + F_SET(bhp, BH_LOCKED); + MUTEX_LOCK(dbenv, &bhp->mutex); + MUTEX_UNLOCK(dbenv, &hp->hash_mutex); + } /* * It's possible that the underlying file doesn't exist, either @@ -342,13 +325,9 @@ __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep) /* * If the page is in a file for which we have LSN information, we have - * to ensure the appropriate log records are on disk. If the page is - * being written as part of a sync operation, the flush has been done - * already, unless it was modified by the application *after* the sync - * was scheduled. + * to ensure the appropriate log records are on disk. */ - if (LOGGING_ON(dbenv) && !IS_RECOVERING(dbenv) && mfp->lsn_off != -1 && - (!F_ISSET(bhp, BH_SYNC) || F_ISSET(bhp, BH_SYNC_LOGFLSH))) { + if (LOGGING_ON(dbenv) && mfp->lsn_off != -1) { memcpy(&lsn, bhp->buf + mfp->lsn_off, sizeof(DB_LSN)); if ((ret = dbenv->log_flush(dbenv, &lsn)) != 0) goto err; @@ -361,7 +340,7 @@ __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep) * !!! * One special case. There is a single field on the meta-data page, * the last-page-number-in-the-file field, for which we do not log - * changes. So, if the page was original created in a database that + * changes. If the page was originally created in a database that * didn't have logging turned on, we can see a page marked dirty but * for which no corresponding log record has been written. However, * the only way that a page can be created for which there isn't a @@ -394,9 +373,7 @@ __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep) * that the contents of the buffer will need to be passed through pgin * before they are reused. */ - if (mfp->ftype == 0) - ret = 0; - else { + if (mfp->ftype != 0) { callpgin = 1; if ((ret = __memp_pg(dbmfp, bhp, 0)) != 0) goto err; @@ -405,17 +382,16 @@ __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep) /* Temporary files may not yet have been created. */ if (!F_ISSET(dbmfp->fhp, DB_FH_VALID)) { MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp); - if (!F_ISSET(dbmfp->fhp, DB_FH_VALID) && - ((ret = __db_appname(dbenv, DB_APP_TMP, NULL, NULL, - DB_OSO_CREATE | DB_OSO_EXCL | DB_OSO_TEMP, - dbmfp->fhp, NULL)) != 0 || - !F_ISSET(dbmfp->fhp, DB_FH_VALID))) { - MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); + ret = F_ISSET(dbmfp->fhp, DB_FH_VALID) ? 0 : + __db_appname(dbenv, DB_APP_TMP, NULL, + F_ISSET(dbenv, DB_ENV_DIRECT_DB) ? DB_OSO_DIRECT : 0, + dbmfp->fhp, NULL); + MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); + if (ret != 0) { __db_err(dbenv, "unable to create temporary backing file"); goto err; } - MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); } /* Write the page. */ @@ -425,104 +401,44 @@ __memp_pgwrite(dbmp, dbmfp, bhp, restartp, wrotep) db_io.pgno = bhp->pgno; db_io.buf = bhp->buf; if ((ret = __os_io(dbenv, &db_io, DB_IO_WRITE, &nw)) != 0) { - fail = "write"; - goto syserr; - } - if (nw != mfp->stat.st_pagesize) { - ret = EIO; - fail = "write"; - goto syserr; + __db_err(dbenv, "%s: write failed for page %lu", + __memp_fn(dbmfp), (u_long)bhp->pgno); + goto err; } + ++mfp->stat.st_page_out; +err: file_dead: /* * !!! * Once we pass this point, dbmfp and mfp may be NULL, we may not have * a valid file reference. * - * Unlock the buffer and reacquire the region lock. + * Unlock the buffer and reacquire the hash lock. */ MUTEX_UNLOCK(dbenv, &bhp->mutex); - R_LOCK(dbenv, dbmp->reginfo); + MUTEX_LOCK(dbenv, &hp->hash_mutex); /* - * Clean up the flags based on a successful write. - * * If we rewrote the page, it will need processing by the pgin * routine before reuse. */ if (callpgin) F_SET(bhp, BH_CALLPGIN); - F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE | BH_LOCKED); /* - * If we write a buffer for which a checkpoint is waiting, update - * the count of pending buffers (both in the mpool as a whole and - * for this file). If the count for this file goes to zero, set a - * flag so we flush the writes. + * Update the hash bucket statistics, reset the flags. + * If we were successful, the page is no longer dirty. */ - dosync = 0; - if (F_ISSET(bhp, BH_SYNC)) { - F_CLR(bhp, BH_SYNC | BH_SYNC_LOGFLSH); - - --mp->lsn_cnt; - if (mfp != NULL) - dosync = --mfp->lsn_cnt == 0 ? 1 : 0; - } - - /* Update the page clean/dirty statistics. */ - c_mp = BH_TO_CACHE(dbmp, bhp); - ++c_mp->stat.st_page_clean; - DB_ASSERT(c_mp->stat.st_page_dirty != 0); - --c_mp->stat.st_page_dirty; - - /* Update I/O statistics. */ - if (mfp != NULL) - ++mfp->stat.st_page_out; + if (ret == 0) { + DB_ASSERT(hp->hash_page_dirty != 0); + --hp->hash_page_dirty; - /* - * Do the sync after everything else has been updated, so any incoming - * checkpoint doesn't see inconsistent information. - * - * XXX: - * Don't lock the region around the sync, fsync(2) has no atomicity - * issues. - * - * XXX: - * We ignore errors from the sync -- it makes no sense to return an - * error to the calling process, so set a flag causing the checkpoint - * to be retried later. There is a possibility, of course, that a - * subsequent checkpoint was started and that we're going to force it - * to fail. That should be unlikely, and fixing it would be difficult. - */ - if (dosync) { - R_UNLOCK(dbenv, dbmp->reginfo); - syncfail = __os_fsync(dbenv, dbmfp->fhp) != 0; - R_LOCK(dbenv, dbmp->reginfo); - if (syncfail) - F_SET(mp, MP_LSN_RETRY); + F_CLR(bhp, BH_DIRTY | BH_DIRTY_CREATE); } - if (wrotep != NULL) - *wrotep = 1; - - return (0); - -syserr: __db_err(dbenv, "%s: %s failed for page %lu", - __memp_fn(dbmfp), fail, (u_long)bhp->pgno); - -err: /* Unlock the buffer and reacquire the region lock. */ - MUTEX_UNLOCK(dbenv, &bhp->mutex); - R_LOCK(dbenv, dbmp->reginfo); - - /* - * Clean up the flags based on a failure. - * - * The page remains dirty but we remove our lock. If we rewrote the - * page, it will need processing by the pgin routine before reuse. - */ - if (callpgin) - F_SET(bhp, BH_CALLPGIN); + /* Regardless, clear any sync wait-for count and remove our lock. */ + bhp->ref_sync = 0; F_CLR(bhp, BH_LOCKED); return (ret); @@ -541,15 +457,17 @@ __memp_pg(dbmfp, bhp, is_pgin) int is_pgin; { DBT dbt, *dbtp; + DB_ENV *dbenv; DB_MPOOL *dbmp; DB_MPREG *mpreg; MPOOLFILE *mfp; int ftype, ret; dbmp = dbmfp->dbmp; + dbenv = dbmp->dbenv; mfp = dbmfp->mfp; - MUTEX_THREAD_LOCK(dbmp->dbenv, dbmp->mutexp); + MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp); ftype = mfp->ftype; for (mpreg = LIST_FIRST(&dbmp->dbregq); @@ -563,28 +481,28 @@ __memp_pg(dbmfp, bhp, is_pgin) dbt.data = R_ADDR(dbmp->reginfo, mfp->pgcookie_off); dbtp = &dbt; } - MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp); + MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); if (is_pgin) { if (mpreg->pgin != NULL && - (ret = mpreg->pgin(dbmp->dbenv, + (ret = mpreg->pgin(dbenv, bhp->pgno, bhp->buf, dbtp)) != 0) goto err; } else if (mpreg->pgout != NULL && - (ret = mpreg->pgout(dbmp->dbenv, + (ret = mpreg->pgout(dbenv, bhp->pgno, bhp->buf, dbtp)) != 0) goto err; break; } if (mpreg == NULL) - MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp); + MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); return (0); -err: MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp); - __db_err(dbmp->dbenv, "%s: %s failed for page %lu", +err: MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); + __db_err(dbenv, "%s: %s failed for page %lu", __memp_fn(dbmfp), is_pgin ? "pgin" : "pgout", (u_long)bhp->pgno); return (ret); } @@ -593,56 +511,78 @@ err: MUTEX_THREAD_UNLOCK(dbmp->dbenv, dbmp->mutexp); * __memp_bhfree -- * Free a bucket header and its referenced data. * - * PUBLIC: void __memp_bhfree __P((DB_MPOOL *, BH *, int)); + * PUBLIC: void __memp_bhfree __P((DB_MPOOL *, DB_MPOOL_HASH *, BH *, int)); */ void -__memp_bhfree(dbmp, bhp, free_mem) +__memp_bhfree(dbmp, hp, bhp, free_mem) DB_MPOOL *dbmp; + DB_MPOOL_HASH *hp; BH *bhp; int free_mem; { - DB_HASHTAB *dbht; + DB_ENV *dbenv; MPOOL *c_mp, *mp; MPOOLFILE *mfp; - int n_bucket, n_cache; + u_int32_t n_cache; + /* + * Assumes the hash bucket is locked and the MPOOL is not. + */ + dbenv = dbmp->dbenv; mp = dbmp->reginfo[0].primary; - c_mp = BH_TO_CACHE(dbmp, bhp); - n_cache = NCACHE(mp, bhp->pgno); - n_bucket = NBUCKET(c_mp, bhp->mf_offset, bhp->pgno); - dbht = R_ADDR(&dbmp->reginfo[n_cache], c_mp->htab); + n_cache = NCACHE(mp, bhp->mf_offset, bhp->pgno); - /* Delete the buffer header from the hash bucket queue. */ - SH_TAILQ_REMOVE(&dbht[n_bucket], bhp, hq, __bh); + /* + * Delete the buffer header from the hash bucket queue and reset + * the hash bucket's priority, if necessary. + */ + SH_TAILQ_REMOVE(&hp->hash_bucket, bhp, hq, __bh); + if (bhp->priority == hp->hash_priority) + hp->hash_priority = + SH_TAILQ_FIRST(&hp->hash_bucket, __bh) == NULL ? + 0 : SH_TAILQ_FIRST(&hp->hash_bucket, __bh)->priority; - /* Delete the buffer header from the LRU queue. */ - SH_TAILQ_REMOVE(&c_mp->bhq, bhp, q, __bh); + /* + * Discard the hash bucket's mutex, it's no longer needed, and + * we don't want to be holding it when acquiring other locks. + */ + MUTEX_UNLOCK(dbenv, &hp->hash_mutex); - /* Clear the mutex this buffer recorded */ - __db_shlocks_clear(&bhp->mutex, &dbmp->reginfo[n_cache], - (REGMAINT *)R_ADDR(&dbmp->reginfo[n_cache], mp->maint_off)); /* * Find the underlying MPOOLFILE and decrement its reference count. * If this is its last reference, remove it. */ mfp = R_ADDR(dbmp->reginfo, bhp->mf_offset); + MUTEX_LOCK(dbenv, &mfp->mutex); if (--mfp->block_cnt == 0 && mfp->mpf_cnt == 0) __memp_mf_discard(dbmp, mfp); + else + MUTEX_UNLOCK(dbenv, &mfp->mutex); - DB_ASSERT(c_mp->stat.st_page_clean != 0); - --c_mp->stat.st_page_clean; + R_LOCK(dbenv, &dbmp->reginfo[n_cache]); + + /* + * Clear the mutex this buffer recorded; requires the region lock + * be held. + */ + __db_shlocks_clear(&bhp->mutex, &dbmp->reginfo[n_cache], + (REGMAINT *)R_ADDR(&dbmp->reginfo[n_cache], mp->maint_off)); /* - * If we're not reusing it immediately, free the buffer header + * If we're not reusing the buffer immediately, free the buffer header * and data for real. */ - if (free_mem) + if (free_mem) { __db_shalloc_free(dbmp->reginfo[n_cache].addr, bhp); + c_mp = dbmp->reginfo[n_cache].primary; + c_mp->stat.st_pages--; + } + R_UNLOCK(dbenv, &dbmp->reginfo[n_cache]); } /* * __memp_upgrade -- - * Upgrade a file descriptor from readonly to readwrite. + * Upgrade a file descriptor from read-only to read-write. */ static int __memp_upgrade(dbmp, dbmfp, mfp) @@ -650,41 +590,58 @@ __memp_upgrade(dbmp, dbmfp, mfp) DB_MPOOLFILE *dbmfp; MPOOLFILE *mfp; { - DB_FH fh; + DB_ENV *dbenv; + DB_FH *fhp, *tfhp; int ret; char *rpath; - /* - * !!! - * We expect the handle to already be locked. - */ - - /* Check to see if we've already upgraded. */ - if (F_ISSET(dbmfp, MP_UPGRADE)) - return (0); - - /* Check to see if we've already failed. */ - if (F_ISSET(dbmfp, MP_UPGRADE_FAIL)) - return (1); + dbenv = dbmp->dbenv; + fhp = NULL; + rpath = NULL; /* * Calculate the real name for this file and try to open it read/write. * We know we have a valid pathname for the file because it's the only * way we could have gotten a file descriptor of any kind. */ - if ((ret = __db_appname(dbmp->dbenv, DB_APP_DATA, - NULL, R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) != 0) - return (ret); - if (__os_open(dbmp->dbenv, rpath, 0, 0, &fh) != 0) { + if ((ret = __os_calloc(dbenv, 1, sizeof(DB_FH), &fhp)) != 0) + goto err; + + if ((ret = __db_appname(dbenv, DB_APP_DATA, + R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) != 0) + goto err; + + if (__os_open(dbenv, rpath, + F_ISSET(mfp, MP_DIRECT) ? DB_OSO_DIRECT : 0, 0, fhp) != 0) { F_SET(dbmfp, MP_UPGRADE_FAIL); - ret = 1; - } else { - /* Swap the descriptors and set the upgrade flag. */ - (void)__os_closehandle(dbmfp->fhp); - *dbmfp->fhp = fh; - F_SET(dbmfp, MP_UPGRADE); - ret = 0; + goto err; } - __os_freestr(dbmp->dbenv, rpath); + + /* + * Swap the descriptors and set the upgrade flag. + * + * XXX + * There is a race here. If another process schedules a read using the + * existing file descriptor and is swapped out before making the system + * call, this code could theoretically close the file descriptor out + * from under it. While it's very unlikely, this code should still be + * rewritten. + */ + tfhp = dbmfp->fhp; + dbmfp->fhp = fhp; + fhp = tfhp; + + (void)__os_closehandle(dbenv, fhp); + F_SET(dbmfp, MP_UPGRADE); + + ret = 0; + if (0) { +err: ret = 1; + } + if (fhp != NULL) + __os_free(dbenv, fhp); + if (rpath != NULL) + __os_free(dbenv, rpath); + return (ret); } diff --git a/db/mp/mp_fopen.c b/db/mp/mp_fopen.c index bb3937e10..7209bf066 100644 --- a/db/mp/mp_fopen.c +++ b/db/mp/mp_fopen.c @@ -1,13 +1,13 @@ /*- * See the file LICENSE for redistribution information. * - * Copyright (c) 1996-2001 + * Copyright (c) 1996-2002 * Sleepycat Software. All rights reserved. */ #include "db_config.h" #ifndef lint -static const char revid[] = "Id: mp_fopen.c,v 11.60 2001/10/04 21:26:56 bostic Exp "; +static const char revid[] = "Id: mp_fopen.c,v 11.88 2002/07/01 15:05:30 bostic Exp "; #endif /* not lint */ #ifndef NO_SYSTEM_INCLUDES @@ -17,14 +17,13 @@ static const char revid[] = "Id: mp_fopen.c,v 11.60 2001/10/04 21:26:56 bostic E #endif #include "db_int.h" -#include "db_shash.h" -#include "mp.h" +#include "dbinc/db_shash.h" +#include "dbinc/mp.h" static int __memp_fclose __P((DB_MPOOLFILE *, u_int32_t)); static int __memp_fopen __P((DB_MPOOLFILE *, const char *, u_int32_t, int, size_t)); -static int __memp_mf_open __P((DB_MPOOLFILE *, - const char *, size_t, db_pgno_t, u_int32_t, MPOOLFILE **)); +static void __memp_get_fileid __P((DB_MPOOLFILE *, u_int8_t *)); static void __memp_last_pgno __P((DB_MPOOLFILE *, db_pgno_t *)); static void __memp_refcnt __P((DB_MPOOLFILE *, db_pgno_t *)); static int __memp_set_clear_len __P((DB_MPOOLFILE *, u_int32_t)); @@ -32,21 +31,9 @@ static int __memp_set_fileid __P((DB_MPOOLFILE *, u_int8_t *)); static int __memp_set_ftype __P((DB_MPOOLFILE *, int)); static int __memp_set_lsn_offset __P((DB_MPOOLFILE *, int32_t)); static int __memp_set_pgcookie __P((DB_MPOOLFILE *, DBT *)); +static int __memp_set_priority __P((DB_MPOOLFILE *, DB_CACHE_PRIORITY)); static void __memp_set_unlink __P((DB_MPOOLFILE *, int)); -/* - * MEMP_FREMOVE -- - * Discard an MPOOLFILE and any buffers it references: update the flags - * so we never try to write buffers associated with the file, nor can we - * find it when looking for files to join. In addition, clear the ftype - * field, there's no reason to post-process pages, they can be discarded - * by any thread. - */ -#define MEMP_FREMOVE(mfp) { \ - mfp->ftype = 0; \ - F_SET(mfp, MP_DEADFILE); \ -} - /* Initialization methods cannot be called after open is called. */ #define MPF_ILLEGAL_AFTER_OPEN(dbmfp, name) \ if (F_ISSET(dbmfp, MP_OPEN_CALLED)) \ @@ -81,32 +68,23 @@ __memp_fcreate(dbenv, retp, flags) /* Allocate and initialize the per-process structure. */ if ((ret = __os_calloc(dbenv, 1, sizeof(DB_MPOOLFILE), &dbmfp)) != 0) return (ret); - if ((ret = __os_calloc(dbenv, 1, sizeof(DB_FH), &dbmfp->fhp)) != 0) { - __os_free(dbenv, dbmfp, sizeof(DB_MPOOLFILE)); - return (ret); - } + if ((ret = __os_calloc(dbenv, 1, sizeof(DB_FH), &dbmfp->fhp)) != 0) + goto err; /* Allocate and initialize a mutex if necessary. */ - if (F_ISSET(dbenv, DB_ENV_THREAD)) { - if ((ret = __db_mutex_alloc( - dbenv, dbmp->reginfo, 0, &dbmfp->mutexp)) != 0) - return (ret); - - if ((ret = __db_shmutex_init(dbenv, dbmfp->mutexp, 0, - MUTEX_THREAD, dbmp->reginfo, - (REGMAINT *)R_ADDR(dbmp->reginfo, - ((MPOOL *)dbmp->reginfo->primary)->maint_off))) != 0) { - __db_mutex_free(dbenv, dbmp->reginfo, dbmfp->mutexp); - return (ret); - } - } + if (F_ISSET(dbenv, DB_ENV_THREAD) && + (ret = __db_mutex_setup(dbenv, dbmp->reginfo, &dbmfp->mutexp, + MUTEX_ALLOC | MUTEX_THREAD)) != 0) + goto err; dbmfp->ref = 1; dbmfp->lsn_offset = -1; dbmfp->dbmp = dbmp; + dbmfp->mfp = INVALID_ROFF; dbmfp->close = __memp_fclose; dbmfp->get = __memp_fget; + dbmfp->get_fileid = __memp_get_fileid; dbmfp->last_pgno = __memp_last_pgno; dbmfp->open = __memp_fopen; dbmfp->put = __memp_fput; @@ -117,16 +95,19 @@ __memp_fcreate(dbenv, retp, flags) dbmfp->set_ftype = __memp_set_ftype; dbmfp->set_lsn_offset = __memp_set_lsn_offset; dbmfp->set_pgcookie = __memp_set_pgcookie; + dbmfp->set_priority = __memp_set_priority; dbmfp->set_unlink = __memp_set_unlink; dbmfp->sync = __memp_fsync; - /* Add the file to the environment's list of files. */ - MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp); - TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); - MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); - *retp = dbmfp; return (0); + +err: if (dbmfp != NULL) { + if (dbmfp->fhp != NULL) + (void)__os_free(dbenv, dbmfp->fhp); + (void)__os_free(dbenv, dbmfp); + } + return (ret); } /* @@ -168,6 +149,10 @@ __memp_set_ftype(dbmfp, ftype) DB_MPOOLFILE *dbmfp; int ftype; { + DB_ENV *dbenv; + + dbenv = dbmfp->dbmp->dbenv; + MPF_ILLEGAL_AFTER_OPEN(dbmfp, "set_ftype"); dbmfp->ftype = ftype; @@ -205,6 +190,40 @@ __memp_set_pgcookie(dbmfp, pgcookie) } /* + * __memp_set_priority -- + * Set the cache priority for pages from this file. + */ +static int +__memp_set_priority(dbmfp, priority) + DB_MPOOLFILE *dbmfp; + DB_CACHE_PRIORITY priority; +{ + switch (priority) { + case DB_PRIORITY_VERY_LOW: + dbmfp->mfp->priority = MPOOL_PRI_VERY_LOW; + break; + case DB_PRIORITY_LOW: + dbmfp->mfp->priority = MPOOL_PRI_LOW; + break; + case DB_PRIORITY_DEFAULT: + dbmfp->mfp->priority = MPOOL_PRI_DEFAULT; + break; + case DB_PRIORITY_HIGH: + dbmfp->mfp->priority = MPOOL_PRI_HIGH; + break; + case DB_PRIORITY_VERY_HIGH: + dbmfp->mfp->priority = MPOOL_PRI_VERY_HIGH; + break; + default: + __db_err(dbmfp->dbmp->dbenv, + "Unknown priority value: %d", priority); + return (EINVAL); + } + + return (0); +} + +/* * __memp_fopen -- * Open a backing file for the memory pool. */ @@ -227,7 +246,7 @@ __memp_fopen(dbmfp, path, flags, mode, pagesize) /* Validate arguments. */ if ((ret = __db_fchk(dbenv, "memp_fopen", flags, - DB_CREATE | DB_EXTENT | + DB_CREATE | DB_DIRECT | DB_EXTENT | DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE)) != 0) return (ret); @@ -242,26 +261,18 @@ __memp_fopen(dbmfp, path, flags, mode, pagesize) } if (dbmfp->clear_len > pagesize) { __db_err(dbenv, - "memp_fopen: clear length larger than page size."); + "memp_fopen: clear length larger than page size"); return (EINVAL); } /* Read-only checks, and local flag. */ - if (LF_ISSET(DB_RDONLY)) { - if (path == NULL) { - __db_err(dbenv, - "memp_fopen: temporary files can't be readonly"); - return (EINVAL); - } - F_SET(dbmfp, MP_READONLY); + if (LF_ISSET(DB_RDONLY) && path == NULL) { + __db_err(dbenv, + "memp_fopen: temporary files can't be readonly"); + return (EINVAL); } - if ((ret = __memp_fopen_int( - dbmfp, NULL, path, flags, mode, pagesize, 1)) != 0) - return (ret); - - F_SET(dbmfp, MP_OPEN_CALLED); - return (0); + return (__memp_fopen_int(dbmfp, NULL, path, flags, mode, pagesize)); } /* @@ -269,51 +280,199 @@ __memp_fopen(dbmfp, path, flags, mode, pagesize) * Open a backing file for the memory pool; internal version. * * PUBLIC: int __memp_fopen_int __P((DB_MPOOLFILE *, - * PUBLIC: MPOOLFILE *, const char *, u_int32_t, int, size_t, int)); + * PUBLIC: MPOOLFILE *, const char *, u_int32_t, int, size_t)); */ int -__memp_fopen_int(dbmfp, mfp, path, flags, mode, pagesize, needlock) +__memp_fopen_int(dbmfp, mfp, path, flags, mode, pagesize) DB_MPOOLFILE *dbmfp; MPOOLFILE *mfp; const char *path; u_int32_t flags; - int mode, needlock; + int mode; size_t pagesize; { DB_ENV *dbenv; DB_MPOOL *dbmp; + MPOOL *mp; db_pgno_t last_pgno; size_t maxmap; u_int32_t mbytes, bytes, oflags; - int ret; + int mfp_alloc, ret; u_int8_t idbuf[DB_FILE_ID_LEN]; char *rpath; + void *p; dbmp = dbmfp->dbmp; dbenv = dbmp->dbenv; - ret = 0; + mp = dbmp->reginfo[0].primary; + mfp_alloc = ret = 0; rpath = NULL; + /* + * Set the page size so os_open can decide whether to turn buffering + * off if the DB_DIRECT_DB flag is set. + */ + dbmfp->fhp->pagesize = (u_int32_t)pagesize; + + /* + * If it's a temporary file, delay the open until we actually need + * to write the file, and we know we can't join any existing files. + */ if (path == NULL) - last_pgno = 0; - else { - /* Get the real name for this file and open it. */ - if ((ret = __db_appname(dbenv, - DB_APP_DATA, NULL, path, 0, NULL, &rpath)) != 0) + goto alloc; + + /* + * Get the real name for this file and open it. If it's a Queue extent + * file, it may not exist, and that's OK. + */ + oflags = 0; + if (LF_ISSET(DB_CREATE)) + oflags |= DB_OSO_CREATE; + if (LF_ISSET(DB_DIRECT)) + oflags |= DB_OSO_DIRECT; + if (LF_ISSET(DB_RDONLY)) { + F_SET(dbmfp, MP_READONLY); + oflags |= DB_OSO_RDONLY; + } + if ((ret = + __db_appname(dbenv, DB_APP_DATA, path, 0, NULL, &rpath)) != 0) + goto err; + if ((ret = __os_open(dbenv, rpath, oflags, mode, dbmfp->fhp)) != 0) { + if (!LF_ISSET(DB_EXTENT)) + __db_err(dbenv, "%s: %s", rpath, db_strerror(ret)); + goto err; + } + + /* + * Get the file id if we weren't given one. Generated file id's + * don't use timestamps, otherwise there'd be no chance of any + * other process joining the party. + */ + if (dbmfp->fileid == NULL) { + if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0) goto err; - oflags = 0; - if (LF_ISSET(DB_CREATE)) - oflags |= DB_OSO_CREATE; - if (LF_ISSET(DB_RDONLY)) - oflags |= DB_OSO_RDONLY; - if ((ret = - __os_open(dbenv, rpath, oflags, mode, dbmfp->fhp)) != 0) { - if (!LF_ISSET(DB_EXTENT)) - __db_err(dbenv, - "%s: %s", rpath, db_strerror(ret)); + dbmfp->fileid = idbuf; + } + + /* + * If our caller knows what mfp we're using, increment the ref count, + * no need to search. + * + * We don't need to acquire a lock other than the mfp itself, because + * we know there's another reference and it's not going away. + */ + if (mfp != NULL) { + MUTEX_LOCK(dbenv, &mfp->mutex); + ++mfp->mpf_cnt; + MUTEX_UNLOCK(dbenv, &mfp->mutex); + goto check_map; + } + + /* + * If not creating a temporary file, walk the list of MPOOLFILE's, + * looking for a matching file. Files backed by temporary files + * or previously removed files can't match. + * + * DB_TRUNCATE support. + * + * The fileID is a filesystem unique number (e.g., a UNIX dev/inode + * pair) plus a timestamp. If files are removed and created in less + * than a second, the fileID can be repeated. The problem with + * repetition happens when the file that previously had the fileID + * value still has pages in the pool, since we don't want to use them + * to satisfy requests for the new file. + * + * Because the DB_TRUNCATE flag reuses the dev/inode pair, repeated + * opens with that flag set guarantees matching fileIDs when the + * machine can open a file and then re-open with truncate within a + * second. For this reason, we pass that flag down, and, if we find + * a matching entry, we ensure that it's never found again, and we + * create a new entry for the current request. + */ + R_LOCK(dbenv, dbmp->reginfo); + for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile); + mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { + /* Skip dead files and temporary files. */ + if (F_ISSET(mfp, MP_DEADFILE | MP_TEMP)) + continue; + + /* Skip non-matching files. */ + if (memcmp(dbmfp->fileid, R_ADDR(dbmp->reginfo, + mfp->fileid_off), DB_FILE_ID_LEN) != 0) + continue; + + /* + * If the file is being truncated, remove it from the system + * and create a new entry. + * + * !!! + * We should be able to set mfp to NULL and break out of the + * loop, but I like the idea of checking all the entries. + */ + if (LF_ISSET(DB_TRUNCATE)) { + MUTEX_LOCK(dbenv, &mfp->mutex); + MPOOLFILE_IGNORE(mfp); + MUTEX_UNLOCK(dbenv, &mfp->mutex); + continue; + } + + /* + * Some things about a file cannot be changed: the clear length, + * page size, or lSN location. + * + * The file type can change if the application's pre- and post- + * processing needs change. For example, an application that + * created a hash subdatabase in a database that was previously + * all btree. + * + * XXX + * We do not check to see if the pgcookie information changed, + * or update it if it is, this might be a bug. + */ + if (dbmfp->clear_len != mfp->clear_len || + pagesize != mfp->stat.st_pagesize || + dbmfp->lsn_offset != mfp->lsn_off) { + __db_err(dbenv, + "%s: clear length, page size or LSN location changed", + path); + R_UNLOCK(dbenv, dbmp->reginfo); + ret = EINVAL; goto err; } + if (dbmfp->ftype != 0) + mfp->ftype = dbmfp->ftype; + + MUTEX_LOCK(dbenv, &mfp->mutex); + ++mfp->mpf_cnt; + MUTEX_UNLOCK(dbenv, &mfp->mutex); + break; + } + R_UNLOCK(dbenv, dbmp->reginfo); + + if (mfp != NULL) + goto check_map; + +alloc: /* Allocate and initialize a new MPOOLFILE. */ + if ((ret = __memp_alloc( + dbmp, dbmp->reginfo, NULL, sizeof(MPOOLFILE), NULL, &mfp)) != 0) + goto err; + mfp_alloc = 1; + memset(mfp, 0, sizeof(MPOOLFILE)); + mfp->mpf_cnt = 1; + mfp->ftype = dbmfp->ftype; + mfp->stat.st_pagesize = pagesize; + mfp->lsn_off = dbmfp->lsn_offset; + mfp->clear_len = dbmfp->clear_len; + + if (LF_ISSET(DB_DIRECT)) + F_SET(mfp, MP_DIRECT); + if (LF_ISSET(DB_EXTENT)) + F_SET(mfp, MP_EXTENT); + + if (path == NULL) + F_SET(mfp, MP_TEMP); + else { /* * Don't permit files that aren't a multiple of the pagesize, * and find the number of the last page in the file, all the @@ -327,79 +486,84 @@ __memp_fopen_int(dbmfp, mfp, path, flags, mode, pagesize, needlock) * environments where an off_t is 32-bits, but still run where * offsets are 64-bits, and they pay us a lot of money. */ - if ((ret = __os_ioinfo(dbenv, rpath, - dbmfp->fhp, &mbytes, &bytes, NULL)) != 0) { + if ((ret = __os_ioinfo( + dbenv, rpath, dbmfp->fhp, &mbytes, &bytes, NULL)) != 0) { __db_err(dbenv, "%s: %s", rpath, db_strerror(ret)); goto err; } /* - * If we're doing a verify, we might have to cope with - * a truncated file; if the file size is not a multiple - * of the page size, round down to a page -- we'll - * take care of the partial page outside the memp system. + * During verify or recovery, we might have to cope with a + * truncated file; if the file size is not a multiple of the + * page size, round down to a page, we'll take care of the + * partial page outside the mpool system. */ if (bytes % pagesize != 0) { if (LF_ISSET(DB_ODDFILESIZE)) - /* - * During verify or recovery, we might have - * to cope with a truncated file; round down, - * we'll worry about the partial page outside - * the memp system. - */ - bytes -= (bytes % pagesize); + bytes -= (u_int32_t)(bytes % pagesize); else { __db_err(dbenv, - "%s: file size not a multiple of the pagesize", - rpath); + "%s: file size not a multiple of the pagesize", rpath); ret = EINVAL; goto err; } } - last_pgno = mbytes * (MEGABYTE / pagesize); - last_pgno += bytes / pagesize; - - /* Correction: page numbers are zero-based, not 1-based. */ + /* + * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a + * page get, we have to increment the last page in the file. + * Figure it out and save it away. + * + * Note correction: page numbers are zero-based, not 1-based. + */ + last_pgno = (db_pgno_t)(mbytes * (MEGABYTE / pagesize)); + last_pgno += (db_pgno_t)(bytes / pagesize); if (last_pgno != 0) --last_pgno; + mfp->orig_last_pgno = mfp->last_pgno = last_pgno; - /* - * Get the file id if we weren't given one. Generated file id's - * don't use timestamps, otherwise there'd be no chance of any - * other process joining the party. - */ - if (dbmfp->fileid == NULL) { - if ((ret = __os_fileid(dbenv, rpath, 0, idbuf)) != 0) - goto err; - dbmfp->fileid = idbuf; - } + /* Copy the file path into shared memory. */ + if ((ret = __memp_alloc(dbmp, dbmp->reginfo, + NULL, strlen(path) + 1, &mfp->path_off, &p)) != 0) + goto err; + memcpy(p, path, strlen(path) + 1); + + /* Copy the file identification string into shared memory. */ + if ((ret = __memp_alloc(dbmp, dbmp->reginfo, + NULL, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) + goto err; + memcpy(p, dbmfp->fileid, DB_FILE_ID_LEN); + } + + /* Copy the page cookie into shared memory. */ + if (dbmfp->pgcookie == NULL || dbmfp->pgcookie->size == 0) { + mfp->pgcookie_len = 0; + mfp->pgcookie_off = 0; + } else { + if ((ret = __memp_alloc(dbmp, dbmp->reginfo, + NULL, dbmfp->pgcookie->size, &mfp->pgcookie_off, &p)) != 0) + goto err; + memcpy(p, dbmfp->pgcookie->data, dbmfp->pgcookie->size); + mfp->pgcookie_len = dbmfp->pgcookie->size; } /* - * If we weren't provided an underlying shared object to join with, - * find/allocate the shared file objects. Also allocate space for - * for the per-process thread lock. + * Prepend the MPOOLFILE to the list of MPOOLFILE's. */ - if (needlock) - R_LOCK(dbenv, dbmp->reginfo); - if (mfp == NULL) - ret = __memp_mf_open( - dbmfp, path, pagesize, last_pgno, flags, &mfp); - else { - ++mfp->mpf_cnt; - ret = 0; - } - dbmfp->mfp = mfp; - if (needlock) - R_UNLOCK(dbenv, dbmp->reginfo); + R_LOCK(dbenv, dbmp->reginfo); + ret = __db_mutex_setup(dbenv, dbmp->reginfo, &mfp->mutex, + MUTEX_NO_RLOCK); + if (ret == 0) + SH_TAILQ_INSERT_HEAD(&mp->mpfq, mfp, q, __mpoolfile); + R_UNLOCK(dbenv, dbmp->reginfo); if (ret != 0) goto err; +check_map: /* * If a file: - * + is read-only * + isn't temporary + * + is read-only * + doesn't require any pgin/pgout support * + the DB_NOMMAP flag wasn't set (in either the file open or * the environment in which it was opened) @@ -411,7 +575,6 @@ __memp_fopen_int(dbmfp, mfp, path, flags, mode, pagesize, needlock) * NFS mounted partition, and we can fail in buffer I/O just as easily * as here. * - * XXX * We'd like to test to see if the file is too big to mmap. Since we * don't know what size or type off_t's or size_t's are, or the largest * unsigned integral type is, or what random insanity the local C @@ -420,10 +583,10 @@ __memp_fopen_int(dbmfp, mfp, path, flags, mode, pagesize, needlock) */ #define DB_MAXMMAPSIZE (10 * 1024 * 1024) /* 10 Mb. */ if (F_ISSET(mfp, MP_CAN_MMAP)) { - if (!F_ISSET(dbmfp, MP_READONLY)) - F_CLR(mfp, MP_CAN_MMAP); if (path == NULL) F_CLR(mfp, MP_CAN_MMAP); + if (!F_ISSET(dbmfp, MP_READONLY)) + F_CLR(mfp, MP_CAN_MMAP); if (dbmfp->ftype != 0) F_CLR(mfp, MP_CAN_MMAP); if (LF_ISSET(DB_NOMMAP) || F_ISSET(dbenv, DB_ENV_NOMMAP)) @@ -433,179 +596,72 @@ __memp_fopen_int(dbmfp, mfp, path, flags, mode, pagesize, needlock) if (mbytes > maxmap / MEGABYTE || (mbytes == maxmap / MEGABYTE && bytes >= maxmap % MEGABYTE)) F_CLR(mfp, MP_CAN_MMAP); - } - dbmfp->addr = NULL; - if (F_ISSET(mfp, MP_CAN_MMAP)) { - dbmfp->len = (size_t)mbytes * MEGABYTE + bytes; - if (__os_mapfile(dbenv, rpath, - dbmfp->fhp, dbmfp->len, 1, &dbmfp->addr) != 0) { - dbmfp->addr = NULL; - F_CLR(mfp, MP_CAN_MMAP); + + dbmfp->addr = NULL; + if (F_ISSET(mfp, MP_CAN_MMAP)) { + dbmfp->len = (size_t)mbytes * MEGABYTE + bytes; + if (__os_mapfile(dbenv, rpath, + dbmfp->fhp, dbmfp->len, 1, &dbmfp->addr) != 0) { + dbmfp->addr = NULL; + F_CLR(mfp, MP_CAN_MMAP); + } } } - if (rpath != NULL) - __os_freestr(dbenv, rpath); - return (0); + dbmfp->mfp = mfp; + + F_SET(dbmfp, MP_OPEN_CALLED); -err: if (rpath != NULL) - __os_freestr(dbenv, rpath); - if (F_ISSET(dbmfp->fhp, DB_FH_VALID)) - (void)__os_closehandle(dbmfp->fhp); + /* Add the file to the process' list of DB_MPOOLFILEs. */ + MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp); + TAILQ_INSERT_TAIL(&dbmp->dbmfq, dbmfp, q); + MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); + + if (0) { +err: if (F_ISSET(dbmfp->fhp, DB_FH_VALID)) + (void)__os_closehandle(dbenv, dbmfp->fhp); + + if (mfp_alloc) { + R_LOCK(dbenv, dbmp->reginfo); + if (mfp->path_off != 0) + __db_shalloc_free(dbmp->reginfo[0].addr, + R_ADDR(dbmp->reginfo, mfp->path_off)); + if (mfp->fileid_off != 0) + __db_shalloc_free(dbmp->reginfo[0].addr, + R_ADDR(dbmp->reginfo, mfp->fileid_off)); + __db_shalloc_free(dbmp->reginfo[0].addr, mfp); + R_UNLOCK(dbenv, dbmp->reginfo); + } + + } + if (rpath != NULL) + __os_free(dbenv, rpath); return (ret); } /* - * __memp_mf_open -- - * Open an MPOOLFILE. + * __memp_get_fileid -- + * Return the file ID. + * + * XXX + * Undocumented interface: DB private. */ -static int -__memp_mf_open(dbmfp, path, pagesize, last_pgno, flags, retp) +static void +__memp_get_fileid(dbmfp, fidp) DB_MPOOLFILE *dbmfp; - const char *path; - size_t pagesize; - db_pgno_t last_pgno; - u_int32_t flags; - MPOOLFILE **retp; + u_int8_t *fidp; { - DB_MPOOL *dbmp; - MPOOL *mp; - MPOOLFILE *mfp; - int ret; - void *p; - -#define ISTEMPORARY (path == NULL) - - dbmp = dbmfp->dbmp; - /* - * If not creating a temporary file, walk the list of MPOOLFILE's, - * looking for a matching file. Files backed by temporary files - * or previously removed files can't match. - * - * DB_TRUNCATE support. + * No lock needed -- we're using the handle, it had better not + * be going away. * - * The fileID is a filesystem unique number (e.g., a UNIX dev/inode - * pair) plus a timestamp. If files are removed and created in less - * than a second, the fileID can be repeated. The problem with - * repetition happens when the file that previously had the fileID - * value still has pages in the pool, since we don't want to use them - * to satisfy requests for the new file. - * - * Because the DB_TRUNCATE flag reuses the dev/inode pair, repeated - * opens with that flag set guarantees matching fileIDs when the - * machine can open a file and then re-open with truncate within a - * second. For this reason, we pass that flag down, and, if we find - * a matching entry, we ensure that it's never found again, and we - * create a new entry for the current request. + * !!! + * Get the fileID out of the region, not out of the DB_MPOOLFILE + * structure because the DB_MPOOLFILE reference is possibly short + * lived, and isn't to be trusted. */ - if (!ISTEMPORARY) { - mp = dbmp->reginfo[0].primary; - for (mfp = SH_TAILQ_FIRST(&mp->mpfq, __mpoolfile); - mfp != NULL; mfp = SH_TAILQ_NEXT(mfp, q, __mpoolfile)) { - if (F_ISSET(mfp, MP_DEADFILE | MP_TEMP)) - continue; - if (memcmp(dbmfp->fileid, R_ADDR(dbmp->reginfo, - mfp->fileid_off), DB_FILE_ID_LEN) == 0) { - if (LF_ISSET(DB_TRUNCATE)) { - MEMP_FREMOVE(mfp); - continue; - } - if (dbmfp->clear_len != mfp->clear_len || - pagesize != mfp->stat.st_pagesize) { - __db_err(dbmp->dbenv, - "%s: page size or clear length changed", - path); - return (EINVAL); - } - - /* - * It's possible that our needs for pre- and - * post-processing are changing. For example, - * an application created a hash subdatabase - * in a database that was previously all btree. - */ - if (dbmfp->ftype != 0) - mfp->ftype = dbmfp->ftype; - - ++mfp->mpf_cnt; - - *retp = mfp; - return (0); - } - } - } - - /* Allocate a new MPOOLFILE. */ - if ((ret = __memp_alloc( - dbmp, dbmp->reginfo, NULL, sizeof(MPOOLFILE), NULL, &mfp)) != 0) - goto mem_err; - *retp = mfp; - - /* Initialize the structure. */ - memset(mfp, 0, sizeof(MPOOLFILE)); - mfp->mpf_cnt = 1; - mfp->ftype = dbmfp->ftype; - mfp->lsn_off = dbmfp->lsn_offset; - mfp->clear_len = dbmfp->clear_len; - - /* - * If the user specifies DB_MPOOL_LAST or DB_MPOOL_NEW on a memp_fget, - * we have to know the last page in the file. Figure it out and save - * it away. - */ - mfp->stat.st_pagesize = pagesize; - mfp->orig_last_pgno = mfp->last_pgno = last_pgno; - - if (ISTEMPORARY) - F_SET(mfp, MP_TEMP); - else { - /* Copy the file path into shared memory. */ - if ((ret = __memp_alloc(dbmp, dbmp->reginfo, - NULL, strlen(path) + 1, &mfp->path_off, &p)) != 0) - goto err; - memcpy(p, path, strlen(path) + 1); - - /* Copy the file identification string into shared memory. */ - if ((ret = __memp_alloc(dbmp, dbmp->reginfo, - NULL, DB_FILE_ID_LEN, &mfp->fileid_off, &p)) != 0) - goto err; - memcpy(p, dbmfp->fileid, DB_FILE_ID_LEN); - - F_SET(mfp, MP_CAN_MMAP); - if (LF_ISSET(DB_EXTENT)) - F_SET(mfp, MP_EXTENT); - } - - /* Copy the page cookie into shared memory. */ - if (dbmfp->pgcookie == NULL || dbmfp->pgcookie->size == 0) { - mfp->pgcookie_len = 0; - mfp->pgcookie_off = 0; - } else { - if ((ret = __memp_alloc(dbmp, dbmp->reginfo, - NULL, dbmfp->pgcookie->size, &mfp->pgcookie_off, &p)) != 0) - goto err; - memcpy(p, dbmfp->pgcookie->data, dbmfp->pgcookie->size); - mfp->pgcookie_len = dbmfp->pgcookie->size; - } - - /* Prepend the MPOOLFILE to the list of MPOOLFILE's. */ - mp = dbmp->reginfo[0].primary; - SH_TAILQ_INSERT_HEAD(&mp->mpfq, mfp, q, __mpoolfile); - - if (0) { -err: if (mfp->path_off != 0) - __db_shalloc_free(dbmp->reginfo[0].addr, - R_ADDR(dbmp->reginfo, mfp->path_off)); - if (mfp->fileid_off != 0) - __db_shalloc_free(dbmp->reginfo[0].addr, - R_ADDR(dbmp->reginfo, mfp->fileid_off)); - if (mfp != NULL) - __db_shalloc_free(dbmp->reginfo[0].addr, mfp); -mem_err: __db_err(dbmp->dbenv, - "Unable to allocate memory for mpool file"); - } - return (ret); + memcpy(fidp, R_ADDR( + dbmfp->dbmp->reginfo, dbmfp->mfp->fileid_off), DB_FILE_ID_LEN); } /* @@ -644,14 +700,12 @@ __memp_refcnt(dbmfp, cntp) db_pgno_t *cntp; { DB_ENV *dbenv; - DB_MPOOL *dbmp; - dbmp = dbmfp->dbmp; - dbenv = dbmp->dbenv; + dbenv = dbmfp->dbmp->dbenv; - R_LOCK(dbenv, dbmp->reginfo); + MUTEX_LOCK(dbenv, &dbmfp->mfp->mutex); *cntp = dbmfp->mfp->mpf_cnt; - R_UNLOCK(dbenv, dbmp->reginfo); + MUTEX_UNLOCK(dbenv, &dbmfp->mfp->mutex); } /* @@ -666,26 +720,16 @@ __memp_set_unlink(dbmpf, set) DB_MPOOLFILE *dbmpf; int set; { - DB_MPOOL *dbmp; + DB_ENV *dbenv; - dbmp = dbmpf->dbmp; + dbenv = dbmpf->dbmp->dbenv; - if (set) { - R_LOCK(dbmp->dbenv, dbmp->reginfo); + MUTEX_LOCK(dbenv, &dbmpf->mfp->mutex); + if (set) F_SET(dbmpf->mfp, MP_UNLINK); - R_UNLOCK(dbmp->dbenv, dbmp->reginfo); - } else { - /* - * This bit is protected in the queue code because the metapage - * is locked, so we can avoid getting the region lock. If this - * gets used from other than the queue code, we cannot. - */ - if (F_ISSET(dbmpf->mfp, MP_UNLINK)) { - R_LOCK(dbmp->dbenv, dbmp->reginfo); - F_CLR(dbmpf->mfp, MP_UNLINK); - R_UNLOCK(dbmp->dbenv, dbmp->reginfo); - } - } + else + F_CLR(dbmpf->mfp, MP_UNLINK); + MUTEX_UNLOCK(dbenv, &dbmpf->mfp->mutex); } /* @@ -698,7 +742,7 @@ __memp_fclose(dbmfp, flags) u_int32_t flags; { DB_ENV *dbenv; - int ret; + int ret, t_ret; dbenv = dbmfp->dbmp->dbenv; @@ -708,79 +752,83 @@ __memp_fclose(dbmfp, flags) * XXX * DB_MPOOL_DISCARD: Undocumented flag: DB private. */ - if (flags != 0 && (ret = __db_fchk(dbenv, - "DB_MPOOLFILE->close", flags, DB_MPOOL_DISCARD)) != 0) - return (ret); + ret = __db_fchk(dbenv, "DB_MPOOLFILE->close", flags, DB_MPOOL_DISCARD); + + if ((t_ret = __memp_fclose_int(dbmfp, flags)) != 0 && ret == 0) + ret = t_ret; - return (__memp_fclose_int(dbmfp, flags, 1)); + return (ret); } /* * __memp_fclose_int -- * Internal version of __memp_fclose. * - * PUBLIC: int __memp_fclose_int __P((DB_MPOOLFILE *, u_int32_t, int)); + * PUBLIC: int __memp_fclose_int __P((DB_MPOOLFILE *, u_int32_t)); */ int -__memp_fclose_int(dbmfp, flags, needlock) +__memp_fclose_int(dbmfp, flags) DB_MPOOLFILE *dbmfp; u_int32_t flags; - int needlock; { DB_ENV *dbenv; DB_MPOOL *dbmp; MPOOLFILE *mfp; char *rpath; - int ret, t_ret; + int deleted, ret, t_ret; dbmp = dbmfp->dbmp; dbenv = dbmp->dbenv; ret = 0; /* - * Remove the DB_MPOOLFILE from the queue. This has to happen before - * we perform any action that can fail, otherwise __memp_close may - * loop infinitely when calling us to discard all of the DB_MPOOLFILEs. + * We have to reference count DB_MPOOLFILE structures as other threads + * in the process may be using them. Here's the problem: + * + * Thread A opens a database. + * Thread B uses thread A's DB_MPOOLFILE to write a buffer + * in order to free up memory in the mpool cache. + * Thread A closes the database while thread B is using the + * DB_MPOOLFILE structure. + * + * By opening all databases before creating any threads, and closing + * the databases after all the threads have exited, applications get + * better performance and avoid the problem path entirely. + * + * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer is a + * short-term lock, even in worst case, since we better be the only + * thread of control using the DB_MPOOLFILE structure to read pages + * *into* the cache. Wait until we're the only reference holder and + * remove the DB_MPOOLFILE structure from the list, so nobody else can + * find it. We do this, rather than have the last reference holder + * (whoever that might be) discard the DB_MPOOLFILE structure, because + * we'd rather write error messages to the application in the close + * routine, not in the checkpoint/sync routine. + * + * !!! + * It's possible the DB_MPOOLFILE was never added to the DB_MPOOLFILE + * file list, check the DB_OPEN_CALLED flag to be sure. */ - for (;;) { + for (deleted = 0;;) { MUTEX_THREAD_LOCK(dbenv, dbmp->mutexp); - - /* - * We have to reference count DB_MPOOLFILE structures as other - * threads may be using them. The problem only happens if the - * application makes a bad design choice. Here's the path: - * - * Thread A opens a database. - * Thread B uses thread A's DB_MPOOLFILE to write a buffer - * in order to free up memory in the mpool cache. - * Thread A closes the database while thread B is using the - * DB_MPOOLFILE structure. - * - * By opening all databases before creating the threads, and - * closing them after the threads have exited, applications - * get better performance and avoid the problem path entirely. - * - * Regardless, holding the DB_MPOOLFILE to flush a dirty buffer - * is a short-term lock, even in worst case, since we better be - * the only thread of control using the DB_MPOOLFILE structure - * to read pages *into* the cache. Wait until we're the only - * reference holder and remove the DB_MPOOLFILE structure from - * the list, so nobody else can even find it. - */ if (dbmfp->ref == 1) { - TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q); - break; + if (F_ISSET(dbmfp, MP_OPEN_CALLED)) + TAILQ_REMOVE(&dbmp->dbmfq, dbmfp, q); + deleted = 1; } MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); - (void)__os_sleep(dbenv, 1, 0); + if (deleted) + break; + __os_sleep(dbenv, 1, 0); } - MUTEX_THREAD_UNLOCK(dbenv, dbmp->mutexp); /* Complain if pinned blocks never returned. */ - if (dbmfp->pinref != 0) + if (dbmfp->pinref != 0) { __db_err(dbenv, "%s: close: %lu blocks left pinned", __memp_fn(dbmfp), (u_long)dbmfp->pinref); + ret = __db_panic(dbenv, DB_RUNRECOVERY); + } /* Discard any mmap information. */ if (dbmfp->addr != NULL && @@ -789,7 +837,7 @@ __memp_fclose_int(dbmfp, flags, needlock) /* Close the file; temporary files may not yet have been created. */ if (F_ISSET(dbmfp->fhp, DB_FH_VALID) && - (t_ret = __os_closehandle(dbmfp->fhp)) != 0) { + (t_ret = __os_closehandle(dbenv, dbmfp->fhp)) != 0) { __db_err(dbenv, "%s: %s", __memp_fn(dbmfp), db_strerror(t_ret)); if (ret == 0) ret = t_ret; @@ -801,41 +849,51 @@ __memp_fclose_int(dbmfp, flags, needlock) /* * Discard our reference on the the underlying MPOOLFILE, and close - * it if it's no longer useful to anyone. - * + * it if it's no longer useful to anyone. It possible the open of + * the file never happened or wasn't successful, in which case, mpf + * will be NULL; + */ + if ((mfp = dbmfp->mfp) == NULL) + goto done; + + /* * If it's a temp file, all outstanding references belong to unflushed * buffers. (A temp file can only be referenced by one DB_MPOOLFILE). * We don't care about preserving any of those buffers, so mark the * MPOOLFILE as dead so that even the dirty ones just get discarded * when we try to flush them. */ - if ((mfp = dbmfp->mfp) == NULL) - goto done; - if (needlock) - R_LOCK(dbenv, dbmp->reginfo); + deleted = 0; + MUTEX_LOCK(dbenv, &mfp->mutex); if (--mfp->mpf_cnt == 0 || LF_ISSET(DB_MPOOL_DISCARD)) { if (LF_ISSET(DB_MPOOL_DISCARD) || F_ISSET(mfp, MP_TEMP | MP_UNLINK)) - MEMP_FREMOVE(mfp); + MPOOLFILE_IGNORE(mfp); if (F_ISSET(mfp, MP_UNLINK)) { if ((t_ret = __db_appname(dbmp->dbenv, - DB_APP_DATA, NULL, R_ADDR(dbmp->reginfo, + DB_APP_DATA, R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) != 0 && ret == 0) ret = t_ret; - if (t_ret == 0 && (t_ret = - __os_unlink(dbmp->dbenv, rpath) != 0) && ret == 0) + if (t_ret == 0) { + if ((t_ret = __os_unlink( + dbmp->dbenv, rpath) != 0) && ret == 0) + ret = t_ret; + __os_free(dbenv, rpath); + } + } + if (mfp->block_cnt == 0) { + if ((t_ret = + __memp_mf_discard(dbmp, mfp)) != 0 && ret == 0) ret = t_ret; - __os_free(dbenv, rpath, 0); + deleted = 1; } - if (mfp->block_cnt == 0) - __memp_mf_discard(dbmp, mfp); } - if (needlock) - R_UNLOCK(dbenv, dbmp->reginfo); + if (deleted == 0) + MUTEX_UNLOCK(dbenv, &mfp->mutex); -done: /* Discard the DB_MPOOLFILE structure. */ - __os_free(dbenv, dbmfp->fhp, sizeof(DB_FH)); - __os_free(dbenv, dbmfp, sizeof(DB_MPOOLFILE)); + /* Discard the DB_MPOOLFILE structure. */ +done: __os_free(dbenv, dbmfp->fhp); + __os_free(dbenv, dbmfp); return (ret); } @@ -844,20 +902,69 @@ done: /* Discard the DB_MPOOLFILE structure. */ * __memp_mf_discard -- * Discard an MPOOLFILE. * - * PUBLIC: void __memp_mf_discard __P((DB_MPOOL *, MPOOLFILE *)); + * PUBLIC: int __memp_mf_discard __P((DB_MPOOL *, MPOOLFILE *)); */ -void +int __memp_mf_discard(dbmp, mfp) DB_MPOOL *dbmp; MPOOLFILE *mfp; { + DB_ENV *dbenv; + DB_FH fh; + DB_MPOOL_STAT *sp; MPOOL *mp; + char *rpath; + int ret; + dbenv = dbmp->dbenv; mp = dbmp->reginfo[0].primary; + ret = 0; + + /* + * Expects caller to be holding the MPOOLFILE mutex. + * + * When discarding a file, we have to flush writes from it to disk. + * The scenario is that dirty buffers from this file need to be + * flushed to satisfy a future checkpoint, but when the checkpoint + * calls mpool sync, the sync code won't know anything about them. + */ + if (!F_ISSET(mfp, MP_DEADFILE) && + (ret = __db_appname(dbenv, DB_APP_DATA, + R_ADDR(dbmp->reginfo, mfp->path_off), 0, NULL, &rpath)) == 0) { + if ((ret = __os_open(dbenv, rpath, 0, 0, &fh)) == 0) { + ret = __os_fsync(dbenv, &fh); + (void)__os_closehandle(dbenv, &fh); + } + __os_free(dbenv, rpath); + } + + /* + * We have to release the MPOOLFILE lock before acquiring the region + * lock so that we don't deadlock. Make sure nobody ever looks at + * this structure again. + */ + MPOOLFILE_IGNORE(mfp); + + /* Discard the mutex we're holding. */ + MUTEX_UNLOCK(dbenv, &mfp->mutex); /* Delete from the list of MPOOLFILEs. */ + R_LOCK(dbenv, dbmp->reginfo); SH_TAILQ_REMOVE(&mp->mpfq, mfp, q, __mpoolfile); + /* Copy the statistics into the region. */ + sp = &mp->stat; + sp->st_cache_hit += mfp->stat.st_cache_hit; + sp->st_cache_miss += mfp->stat.st_cache_miss; + sp->st_map += mfp->stat.st_map; + sp->st_page_create += mfp->stat.st_page_create; + sp->st_page_in += mfp->stat.st_page_in; + sp->st_page_out += mfp->stat.st_page_out; + + /* Clear the mutex this MPOOLFILE recorded. */ + __db_shlocks_clear(&mfp->mutex, dbmp->reginfo, + (REGMAINT *)R_ADDR(dbmp->reginfo, mp->maint_off)); + /* Free the space. */ if (mfp->path_off != 0) __db_shalloc_free(dbmp->reginfo[0].addr, @@ -869,6 +976,10 @@ __memp_mf_discard(dbmp, mfp) __db_shalloc_free(dbmp->reginfo[0].addr, R_ADDR(dbmp->reginfo, mfp->pgcookie_off)); __db_shalloc_free(dbmp->reginfo[0].addr, mfp); + + R_UNLOCK(dbenv, dbmp->reginfo); + + return (ret); } /* |