Initial revision

CVS patchset: 4644 CVS date: 2001/03/21 18:33:35
author: jbj <devnull@localhost> 2001-03-21 18:33:35 +0000
committer: jbj <devnull@localhost> 2001-03-21 18:33:35 +0000
commit: 731946f4b90eb1173452dd30f1296dd825155d82 (patch)
tree: 67535f54ecb7e5463c06e62044e4efd84ae0291d /db/hash/hash.c
parent: 7ed904da030dc4640ff9bce8458ba07cc09d830d (diff)
download: rpm-731946f4b90eb1173452dd30f1296dd825155d82.tar.gz
rpm-731946f4b90eb1173452dd30f1296dd825155d82.tar.bz2
rpm-731946f4b90eb1173452dd30f1296dd825155d82.zip
1 files changed, 2096 insertions, 0 deletions
diff --git a/db/hash/hash.c b/db/hash/hash.c
new file mode 100644
index 000000000..e96fd4898
--- /dev/null
+++ b/db/hash/hash.c
@@ -0,0 +1,2096 @@
+/*-
+ * See the file LICENSE for redistribution information.
+ *
+ * Copyright (c) 1996, 1997, 1998, 1999, 2000
+ *	Sleepycat Software.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *	Margo Seltzer.  All rights reserved.
+ */
+/*
+ * Copyright (c) 1990, 1993, 1994
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Margo Seltzer.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include "db_config.h"
+
+#ifndef lint
+static const char revid[] = "$Id: hash.c,v 11.94 2001/01/03 16:42:26 ubell Exp $";
+#endif /* not lint */
+
+#ifndef NO_SYSTEM_INCLUDES
+#include <sys/types.h>
+
+#include <stdlib.h>
+#include <string.h>
+#endif
+
+#include "db_int.h"
+#include "db_page.h"
+#include "db_am.h"
+#include "db_ext.h"
+#include "db_shash.h"
+#include "db_swap.h"
+#include "hash.h"
+#include "btree.h"
+#include "log.h"
+#include "lock.h"
+#include "txn.h"
+
+static int  __ham_c_close __P((DBC *, db_pgno_t, int *));
+static int  __ham_c_del __P((DBC *));
+static int  __ham_c_destroy __P((DBC *));
+static int  __ham_c_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
+static int  __ham_c_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *));
+static int  __ham_c_writelock __P((DBC *));
+static int  __ham_del_dups __P((DBC *, DBT *));
+static int  __ham_delete __P((DB *, DB_TXN *, DBT *, u_int32_t));
+static int  __ham_dup_return __P((DBC *, DBT *, u_int32_t));
+static int  __ham_expand_table __P((DBC *));
+static int  __ham_init_htab __P((DBC *,
+		const char *, db_pgno_t, u_int32_t, u_int32_t));
+static int  __ham_lookup __P((DBC *,
+		const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *));
+static int  __ham_overwrite __P((DBC *, DBT *, u_int32_t));
+
+/*
+ * __ham_metachk --
+ *
+ * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *));
+ */
+int
+__ham_metachk(dbp, name, hashm)
+	DB *dbp;
+	const char *name;
+	HMETA *hashm;
+{
+	DB_ENV *dbenv;
+	u_int32_t vers;
+	int ret;
+
+	dbenv = dbp->dbenv;
+
+	/*
+	 * At this point, all we know is that the magic number is for a Hash.
+	 * Check the version, the database may be out of date.
+	 */
+	vers = hashm->dbmeta.version;
+	if (F_ISSET(dbp, DB_AM_SWAP))
+		M_32_SWAP(vers);
+	switch (vers) {
+	case 4:
+	case 5:
+	case 6:
+		__db_err(dbenv,
+		    "%s: hash version %lu requires a version upgrade",
+		    name, (u_long)vers);
+		return (DB_OLD_VERSION);
+	case 7:
+		break;
+	default:
+		__db_err(dbenv,
+		    "%s: unsupported hash version: %lu", name, (u_long)vers);
+		return (EINVAL);
+	}
+
+	/* Swap the page if we need to. */
+	if (F_ISSET(dbp, DB_AM_SWAP) && (ret = __ham_mswap((PAGE *)hashm)) != 0)
+		return (ret);
+
+	/* Check the type. */
+	if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN)
+		return (EINVAL);
+	dbp->type = DB_HASH;
+	DB_ILLEGAL_METHOD(dbp, DB_OK_HASH);
+
+	/*
+	 * Check application info against metadata info, and set info, flags,
+	 * and type based on metadata info.
+	 */
+	if ((ret = __db_fchk(dbenv,
+	    "DB->open", hashm->dbmeta.flags,
+	    DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0)
+		return (ret);
+
+	if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP))
+		F_SET(dbp, DB_AM_DUP);
+	else
+		if (F_ISSET(dbp, DB_AM_DUP)) {
+			__db_err(dbenv,
+		"%s: DB_DUP specified to open method but not set in database",
+			    name);
+			return (EINVAL);
+		}
+
+	if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB))
+		F_SET(dbp, DB_AM_SUBDB);
+	else
+		if (F_ISSET(dbp, DB_AM_SUBDB)) {
+			__db_err(dbenv,
+	    "%s: multiple databases specified but not supported in file",
+			name);
+			return (EINVAL);
+		}
+
+	if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) {
+		if (dbp->dup_compare == NULL)
+			dbp->dup_compare = __bam_defcmp;
+	} else
+		if (dbp->dup_compare != NULL) {
+			__db_err(dbenv,
+		"%s: duplicate sort function specified but not set in database",
+			    name);
+			return (EINVAL);
+		}
+
+	/* Set the page size. */
+	dbp->pgsize = hashm->dbmeta.pagesize;
+
+	/* Copy the file's ID. */
+	memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN);
+
+	return (0);
+}
+
+/*
+ * __ham_open --
+ *
+ * PUBLIC: int __ham_open __P((DB *, const char *, db_pgno_t, u_int32_t));
+ */
+int
+__ham_open(dbp, name, base_pgno, flags)
+	DB *dbp;
+	const char *name;
+	db_pgno_t base_pgno;
+	u_int32_t flags;
+{
+	DB_ENV *dbenv;
+	DBC *dbc;
+	HASH_CURSOR *hcp;
+	HASH *hashp;
+	int need_sync, ret, t_ret;
+
+	dbc = NULL;
+	dbenv = dbp->dbenv;
+	need_sync = 0;
+
+	/* Initialize the remaining fields/methods of the DB. */
+	dbp->del = __ham_delete;
+	dbp->stat = __ham_stat;
+
+	/*
+	 * Get a cursor.  If DB_CREATE is specified, we may be creating
+	 * pages, and to do that safely in CDB we need a write cursor.
+	 * In STD_LOCKING mode, we'll synchronize using the meta page
+	 * lock instead.
+	 */
+	if ((ret = dbp->cursor(dbp,
+	    dbp->open_txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbenv) ?
+	    DB_WRITECURSOR : 0)) != 0)
+		return (ret);
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	hashp = dbp->h_internal;
+	hashp->meta_pgno = base_pgno;
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto err1;
+
+	/*
+	 * If this is a new file, initialize it, and put it back dirty.
+	 *
+	 * Initialize the hdr structure.
+	 */
+	if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) {
+		/* File exists, verify the data in the header. */
+		if (hashp->h_hash == NULL)
+			hashp->h_hash = hcp->hdr->dbmeta.version < 5
+			? __ham_func4 : __ham_func5;
+		if (!F_ISSET(dbp, DB_RDONLY) &&
+		    hashp->h_hash(dbp,
+		    CHARKEY, sizeof(CHARKEY)) != hcp->hdr->h_charkey) {
+			__db_err(dbp->dbenv,
+			    "hash: incompatible hash function");
+			ret = EINVAL;
+			goto err2;
+		}
+		if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP))
+			F_SET(dbp, DB_AM_DUP);
+		if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT))
+			F_SET(dbp, DB_AM_DUPSORT);
+		if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB))
+			F_SET(dbp, DB_AM_SUBDB);
+	} else if (!IS_RECOVERING(dbenv)) {
+		/*
+		 * File does not exist, we must initialize the header.  If
+		 * locking is enabled that means getting a write lock first.
+		 * During recovery the meta page will be in the log.
+		 */
+		dbc->lock.pgno = base_pgno;
+
+		if (STD_LOCKING(dbc) &&
+		    ((ret = lock_put(dbenv, &hcp->hlock)) != 0 ||
+		    (ret = lock_get(dbenv, dbc->locker,
+		    DB_NONBLOCK(dbc) ? DB_LOCK_NOWAIT : 0,
+		    &dbc->lock_dbt, DB_LOCK_WRITE, &hcp->hlock)) != 0))
+			goto err2;
+		else if (CDB_LOCKING(dbp->dbenv)) {
+			DB_ASSERT(LF_ISSET(DB_CREATE));
+			if ((ret = lock_get(dbenv, dbc->locker,
+			    DB_LOCK_UPGRADE, &dbc->lock_dbt, DB_LOCK_WRITE,
+			    &dbc->mylock)) != 0)
+				goto err2;
+		}
+		if ((ret = __ham_init_htab(dbc, name,
+		    base_pgno, hashp->h_nelem, hashp->h_ffactor)) != 0)
+			goto err2;
+
+		need_sync = 1;
+	}
+
+err2:	/* Release the meta data page */
+	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+err1:	if ((t_ret  = dbc->c_close(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+	/* Sync the file so that we know that the meta data goes to disk. */
+	if (ret == 0 && need_sync)
+		ret = dbp->sync(dbp, 0);
+#if CONFIG_TEST
+	if (ret == 0)
+		DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name);
+
+DB_TEST_RECOVERY_LABEL
+#endif
+	if (ret != 0)
+		(void)__ham_db_close(dbp);
+
+	return (ret);
+}
+
+/************************** LOCAL CREATION ROUTINES **********************/
+/*
+ * Returns 0 on No Error
+ */
+static int
+__ham_init_htab(dbc, name, pgno, nelem, ffactor)
+	DBC *dbc;
+	const char *name;
+	db_pgno_t pgno;
+	u_int32_t nelem, ffactor;
+{
+	DB *dbp;
+	DB_LOCK metalock;
+	DB_LSN orig_lsn;
+	DBMETA *mmeta;
+	HASH_CURSOR *hcp;
+	HASH *hashp;
+	PAGE *h;
+	db_pgno_t mpgno;
+	int32_t l2, nbuckets;
+	int dirty_mmeta, i, ret, t_ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	dbp = dbc->dbp;
+	hashp = dbp->h_internal;
+	mmeta = NULL;
+	h = NULL;
+	ret = 0;
+	dirty_mmeta = 0;
+	metalock.off = LOCK_INVALID;
+
+	if (hashp->h_hash == NULL)
+		hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5;
+
+	if (nelem != 0 && ffactor != 0) {
+		nelem = (nelem - 1) / ffactor + 1;
+		l2 = __db_log2(nelem > 2 ? nelem : 2);
+	} else
+		l2 = 1;
+	nbuckets = 1 << l2;
+
+	orig_lsn = hcp->hdr->dbmeta.lsn;
+	memset(hcp->hdr, 0, sizeof(HMETA));
+	ZERO_LSN(hcp->hdr->dbmeta.lsn);
+	hcp->hdr->dbmeta.pgno = pgno;
+	hcp->hdr->dbmeta.magic = DB_HASHMAGIC;
+	hcp->hdr->dbmeta.version = DB_HASHVERSION;
+	hcp->hdr->dbmeta.pagesize = dbp->pgsize;
+	hcp->hdr->dbmeta.type = P_HASHMETA;
+	hcp->hdr->dbmeta.free = PGNO_INVALID;
+	hcp->hdr->max_bucket = hcp->hdr->high_mask = nbuckets - 1;
+	hcp->hdr->low_mask = (nbuckets >> 1) - 1;
+	hcp->hdr->ffactor = ffactor;
+	hcp->hdr->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY));
+	memcpy(hcp->hdr->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN);
+
+	if (F_ISSET(dbp, DB_AM_DUP))
+		F_SET(&hcp->hdr->dbmeta, DB_HASH_DUP);
+	if (F_ISSET(dbp, DB_AM_SUBDB))
+		F_SET(&hcp->hdr->dbmeta, DB_HASH_SUBDB);
+	if (dbp->dup_compare != NULL)
+		F_SET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT);
+
+	if ((ret = memp_fset(dbp->mpf, hcp->hdr, DB_MPOOL_DIRTY)) != 0)
+		goto err;
+
+	/*
+	 * Create the first and second buckets pages so that we have the
+	 * page numbers for them and we can store that page number
+	 * in the meta-data header (spares[0]).
+	 */
+	hcp->hdr->spares[0] = nbuckets;
+	if ((ret = memp_fget(dbp->mpf,
+	    &hcp->hdr->spares[0], DB_MPOOL_NEW_GROUP, &h)) != 0)
+		goto err;
+
+	P_INIT(h, dbp->pgsize, hcp->hdr->spares[0], PGNO_INVALID,
+	    PGNO_INVALID, 0, P_HASH);
+
+	/* Fill in the last fields of the meta data page. */
+	hcp->hdr->spares[0] -= (nbuckets - 1);
+	for (i = 1; i <= l2; i++)
+		hcp->hdr->spares[i] = hcp->hdr->spares[0];
+	for (; i < NCACHED; i++)
+		hcp->hdr->spares[i] = PGNO_INVALID;
+
+	/*
+	 * Before we are about to put any dirty pages, we need to log
+	 * the meta-data page create.
+	 */
+	ret = __db_log_page(dbp, name, &orig_lsn, pgno, (PAGE *)hcp->hdr);
+
+	if (dbp->open_txn != NULL) {
+		mmeta = (DBMETA *) hcp->hdr;
+		if (F_ISSET(dbp, DB_AM_SUBDB)) {
+
+			/*
+			 * If this is a subdatabase, then we need to
+			 * get the LSN off the master meta data page
+			 * because that's where free pages are linked
+			 * and during recovery we need to access
+			 * that page and roll it backward/forward
+			 * correctly with respect to LSN.
+			 */
+			mpgno = PGNO_BASE_MD;
+			if ((ret = __db_lget(dbc,
+			   0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0)
+				goto err;
+			if ((ret = memp_fget(dbp->mpf,
+			    &mpgno, 0, (PAGE **)&mmeta)) != 0)
+				goto err;
+		}
+		if ((t_ret = __ham_groupalloc_log(dbp->dbenv,
+		    dbp->open_txn, &LSN(mmeta), 0, dbp->log_fileid,
+		    &LSN(mmeta), hcp->hdr->spares[0],
+		    hcp->hdr->max_bucket + 1, mmeta->free)) != 0 && ret == 0)
+			ret = t_ret;
+		if (ret == 0) {
+			/* need to update real LSN for buffer manager */
+			dirty_mmeta = 1;
+		}
+
+	}
+
+	DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOG, ret, name);
+
+DB_TEST_RECOVERY_LABEL
+err:	if (h != NULL &&
+	    (t_ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0 && ret == 0)
+		ret = t_ret;
+
+	if (F_ISSET(dbp, DB_AM_SUBDB) && mmeta != NULL)
+		if ((t_ret = memp_fput(dbp->mpf, mmeta,
+		    dirty_mmeta ? DB_MPOOL_DIRTY : 0)) != 0 && ret == 0)
+			ret = t_ret;
+	if (metalock.off != LOCK_INVALID)
+		(void)__TLPUT(dbc, metalock);
+
+	return (ret);
+}
+
+static int
+__ham_delete(dbp, txn, key, flags)
+	DB *dbp;
+	DB_TXN *txn;
+	DBT *key;
+	u_int32_t flags;
+{
+	DBC *dbc;
+	HASH_CURSOR *hcp;
+	db_pgno_t pgno;
+	int ret, t_ret;
+
+	/*
+	 * This is the only access method routine called directly from
+	 * the dbp, so we have to do error checking.
+	 */
+
+	PANIC_CHECK(dbp->dbenv);
+	DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del");
+	DB_CHECK_TXN(dbp, txn);
+
+	if ((ret =
+	    __db_delchk(dbp, key, flags, F_ISSET(dbp, DB_AM_RDONLY))) != 0)
+		return (ret);
+
+	if ((ret = dbp->cursor(dbp, txn, &dbc, DB_WRITELOCK)) != 0)
+		return (ret);
+
+	DEBUG_LWRITE(dbc, txn, "ham_delete", key, NULL, flags);
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+
+	pgno = PGNO_INVALID;
+	if ((ret = __ham_lookup(dbc, key, 0, DB_LOCK_WRITE, &pgno)) == 0) {
+		if (F_ISSET(hcp, H_OK)) {
+			if (pgno == PGNO_INVALID)
+				ret = __ham_del_pair(dbc, 1);
+			else {
+				/* When we close the cursor in __ham_del_dups,
+				 * that will make the off-page dup tree go
+				 * go away as well as our current entry.  When
+				 * it updates cursors, ours should get marked
+				 * as H_DELETED.
+				 */
+				ret = __ham_del_dups(dbc, key);
+			}
+		} else
+			ret = DB_NOTFOUND;
+	}
+
+	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+out:	if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret);
+}
+
+/* ****************** CURSORS ********************************** */
+/*
+ * __ham_c_init --
+ *	Initialize the hash-specific portion of a cursor.
+ *
+ * PUBLIC: int __ham_c_init __P((DBC *));
+ */
+int
+__ham_c_init(dbc)
+	DBC *dbc;
+{
+	DB_ENV *dbenv;
+	HASH_CURSOR *new_curs;
+	int ret;
+
+	dbenv = dbc->dbp->dbenv;
+	if ((ret = __os_calloc(dbenv,
+	    1, sizeof(struct cursor_t), &new_curs)) != 0)
+		return (ret);
+	if ((ret = __os_malloc(dbenv,
+	    dbc->dbp->pgsize, NULL, &new_curs->split_buf)) != 0) {
+		__os_free(new_curs, sizeof(*new_curs));
+		return (ret);
+	}
+
+	dbc->internal = (DBC_INTERNAL *) new_curs;
+	dbc->c_close = __db_c_close;
+	dbc->c_count = __db_c_count;
+	dbc->c_del = __db_c_del;
+	dbc->c_dup = __db_c_dup;
+	dbc->c_get = __db_c_get;
+	dbc->c_put = __db_c_put;
+	dbc->c_am_close = __ham_c_close;
+	dbc->c_am_del = __ham_c_del;
+	dbc->c_am_destroy = __ham_c_destroy;
+	dbc->c_am_get = __ham_c_get;
+	dbc->c_am_put = __ham_c_put;
+	dbc->c_am_writelock = __ham_c_writelock;
+
+	__ham_item_init(dbc);
+
+	return (0);
+}
+
+/*
+ * __ham_c_close --
+ *	Close down the cursor from a single use.
+ */
+static int
+__ham_c_close(dbc, root_pgno, rmroot)
+	DBC *dbc;
+	db_pgno_t root_pgno;
+	int *rmroot;
+{
+	HASH_CURSOR *hcp;
+	HKEYDATA *dp;
+	int doroot, gotmeta, ret, t_ret;
+	u_int32_t dirty;
+
+	COMPQUIET(rmroot, 0);
+	dirty = 0;
+	doroot = gotmeta = ret = 0;
+	hcp = (HASH_CURSOR *) dbc->internal;
+
+	/* Check for off page dups. */
+	if (dbc->internal->opd != NULL) {
+		if ((ret = __ham_get_meta(dbc)) != 0)
+			goto done;
+		gotmeta = 1;
+		if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0)
+			goto out;
+		dp = (HKEYDATA *)H_PAIRDATA(hcp->page, hcp->indx);
+		DB_ASSERT(HPAGE_PTYPE(dp) == H_OFFDUP);
+		memcpy(&root_pgno, HOFFPAGE_PGNO(dp), sizeof(db_pgno_t));
+
+		if ((ret =
+		    hcp->opd->c_am_close(hcp->opd, root_pgno, &doroot)) != 0)
+			goto out;
+		if (doroot != 0) {
+			if ((ret = __ham_del_pair(dbc, 1)) != 0)
+				goto out;
+			dirty = DB_MPOOL_DIRTY;
+		}
+	}
+
+out:	if (hcp->page != NULL && (t_ret =
+	    memp_fput(dbc->dbp->mpf, hcp->page, dirty)) != 0 && ret == 0)
+		ret = t_ret;
+	if (gotmeta != 0 && (t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+done:
+	__ham_item_init(dbc);
+	return (ret);
+}
+
+/*
+ * __ham_c_destroy --
+ *	Cleanup the access method private part of a cursor.
+ */
+static int
+__ham_c_destroy(dbc)
+	DBC *dbc;
+{
+	HASH_CURSOR *hcp;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if (hcp->split_buf != NULL)
+		__os_free(hcp->split_buf, dbc->dbp->pgsize);
+	__os_free(hcp, sizeof(HASH_CURSOR));
+
+	return (0);
+}
+
+/*
+ * __ham_c_count --
+ *	Return a count of on-page duplicates.
+ *
+ * PUBLIC: int __ham_c_count __P((DBC *, db_recno_t *));
+ */
+int
+__ham_c_count(dbc, recnop)
+	DBC *dbc;
+	db_recno_t *recnop;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	db_indx_t len;
+	db_recno_t recno;
+	int ret, t_ret;
+	u_int8_t *p, *pend;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *) dbc->internal;
+
+	recno = 0;
+
+	if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0)
+		return (ret);
+
+	switch (HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx))) {
+	case H_KEYDATA:
+	case H_OFFPAGE:
+		recno = 1;
+		break;
+	case H_DUPLICATE:
+		p = HKEYDATA_DATA(H_PAIRDATA(hcp->page, hcp->indx));
+		pend = p +
+		    LEN_HDATA(hcp->page, dbp->pgsize, hcp->indx);
+		for (; p < pend; recno++) {
+			/* p may be odd, so copy rather than just dereffing */
+			memcpy(&len, p, sizeof(db_indx_t));
+			p += 2 * sizeof(db_indx_t) + len;
+		}
+
+		break;
+	default:
+		ret = __db_unknown_type(dbp->dbenv, "__ham_c_count",
+		    HPAGE_PTYPE(H_PAIRDATA(hcp->page, hcp->indx)));
+		goto err;
+	}
+
+	*recnop = recno;
+
+err:	if ((t_ret = memp_fput(dbc->dbp->mpf, hcp->page, 0)) != 0 && ret == 0)
+		ret = t_ret;
+	hcp->page = NULL;
+	return (ret);
+}
+
+static int
+__ham_c_del(dbc)
+	DBC *dbc;
+{
+	DB *dbp;
+	DBT repldbt;
+	HASH_CURSOR *hcp;
+	int ret, t_ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if (F_ISSET(hcp, H_DELETED))
+		return (DB_NOTFOUND);
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto out;
+
+	if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0)
+		goto out;
+
+	/* Off-page duplicates. */
+	if (HPAGE_TYPE(hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP)
+		goto out;
+
+	if (F_ISSET(hcp, H_ISDUP)) { /* On-page duplicate. */
+		if (hcp->dup_off == 0 &&
+		    DUP_SIZE(hcp->dup_len) == LEN_HDATA(hcp->page,
+		    hcp->hdr->dbmeta.pagesize, hcp->indx))
+			ret = __ham_del_pair(dbc, 1);
+		else {
+			repldbt.flags = 0;
+			F_SET(&repldbt, DB_DBT_PARTIAL);
+			repldbt.doff = hcp->dup_off;
+			repldbt.dlen = DUP_SIZE(hcp->dup_len);
+			repldbt.size = 0;
+			repldbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->page,
+			    hcp->indx));
+			ret = __ham_replpair(dbc, &repldbt, 0);
+			hcp->dup_tlen -= DUP_SIZE(hcp->dup_len);
+			F_SET(hcp, H_DELETED);
+			ret = __ham_c_update(dbc, DUP_SIZE(hcp->dup_len), 0, 1);
+		}
+
+	} else /* Not a duplicate */
+		ret = __ham_del_pair(dbc, 1);
+
+out:	if (ret == 0 && hcp->page != NULL &&
+	    (t_ret = memp_fput(dbp->mpf, hcp->page, DB_MPOOL_DIRTY)) != 0)
+		ret = t_ret;
+	hcp->page = NULL;
+	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+	return (ret);
+}
+
+/*
+ * __ham_c_dup --
+ *	Duplicate a hash cursor, such that the new one holds appropriate
+ *	locks for the position of the original.
+ *
+ * PUBLIC: int __ham_c_dup __P((DBC *, DBC *));
+ */
+int
+__ham_c_dup(orig_dbc, new_dbc)
+	DBC *orig_dbc, *new_dbc;
+{
+	HASH_CURSOR *orig, *new;
+
+	orig = (HASH_CURSOR *)orig_dbc->internal;
+	new = (HASH_CURSOR *)new_dbc->internal;
+
+	new->bucket = orig->bucket;
+	new->lbucket = orig->lbucket;
+	new->dup_off = orig->dup_off;
+	new->dup_len = orig->dup_len;
+	new->dup_tlen = orig->dup_tlen;
+
+	if (F_ISSET(orig, H_DELETED))
+		F_SET(new, H_DELETED);
+	if (F_ISSET(orig, H_ISDUP))
+		F_SET(new, H_ISDUP);
+
+	/*
+	 * If the old cursor held a lock and we're not in transactions, get one
+	 * for the new one.   The reason that we don't need a new lock if we're
+	 * in a transaction is because we already hold a lock and will continue
+	 * to do so until commit, so there is no point in reaquiring it. We
+	 * don't know if the old lock was a read or write lock, but it doesn't
+	 * matter. We'll get a read lock.  We know that this locker already
+	 * holds a lock of the correct type, so if we need a write lock and
+	 * request it, we know that we'll get it.
+	 */
+	if (orig->lock.off == LOCK_INVALID || orig_dbc->txn != NULL)
+		return (0);
+
+	return (__ham_lock_bucket(new_dbc, DB_LOCK_READ));
+}
+
+static int
+__ham_c_get(dbc, key, data, flags, pgnop)
+	DBC *dbc;
+	DBT *key;
+	DBT *data;
+	u_int32_t flags;
+	db_pgno_t *pgnop;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	db_lockmode_t lock_type;
+	int get_key, ret, t_ret;
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	dbp = dbc->dbp;
+
+	/* Clear OR'd in additional bits so we can check for flag equality. */
+	if (F_ISSET(dbc, DBC_RMW))
+		lock_type = DB_LOCK_WRITE;
+	else
+		lock_type = DB_LOCK_READ;
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		return (ret);
+	hcp->seek_size = 0;
+
+	ret = 0;
+	get_key = 1;
+	switch (flags) {
+	case DB_PREV_NODUP:
+		F_SET(hcp, H_NEXT_NODUP);
+		/* FALLTHROUGH */
+	case DB_PREV:
+		if (IS_INITIALIZED(dbc)) {
+			ret = __ham_item_prev(dbc, lock_type, pgnop);
+			break;
+		}
+		/* FALLTHROUGH */
+	case DB_LAST:
+		ret = __ham_item_last(dbc, lock_type, pgnop);
+		break;
+	case DB_NEXT_NODUP:
+		F_SET(hcp, H_NEXT_NODUP);
+		/* FALLTHROUGH */
+	case DB_NEXT:
+		if (IS_INITIALIZED(dbc)) {
+			ret = __ham_item_next(dbc, lock_type, pgnop);
+			break;
+		}
+		/* FALLTHROUGH */
+	case DB_FIRST:
+		ret = __ham_item_first(dbc, lock_type, pgnop);
+		break;
+	case DB_NEXT_DUP:
+		/* cgetchk has already determined that the cursor is set. */
+		F_SET(hcp, H_DUPONLY);
+		ret = __ham_item_next(dbc, lock_type, pgnop);
+		break;
+	case DB_SET:
+	case DB_SET_RANGE:
+	case DB_GET_BOTH:
+		ret = __ham_lookup(dbc, key, 0, lock_type, pgnop);
+		get_key = 0;
+		break;
+	case DB_GET_BOTHC:
+		F_SET(hcp, H_DUPONLY);
+
+		ret = __ham_item_next(dbc, lock_type, pgnop);
+		get_key = 0;
+		break;
+	case DB_CURRENT:
+		/* cgetchk has already determined that the cursor is set. */
+		if (F_ISSET(hcp, H_DELETED)) {
+			ret = DB_KEYEMPTY;
+			goto err;
+		}
+
+		ret = __ham_item(dbc, lock_type, pgnop);
+		break;
+	}
+
+	/*
+	 * Must always enter this loop to do error handling and
+	 * check for big key/data pair.
+	 */
+	for (;;) {
+		if (ret != 0 && ret != DB_NOTFOUND)
+			goto err;
+		else if (F_ISSET(hcp, H_OK)) {
+			if (*pgnop == PGNO_INVALID)
+				ret = __ham_dup_return (dbc, data, flags);
+			break;
+		} else if (!F_ISSET(hcp, H_NOMORE)) {
+			__db_err(dbp->dbenv,
+			     "H_NOMORE returned to __ham_c_get");
+			ret = EINVAL;
+			break;
+		}
+
+		/*
+		 * Ran out of entries in a bucket; change buckets.
+		 */
+		switch (flags) {
+			case DB_LAST:
+			case DB_PREV:
+			case DB_PREV_NODUP:
+				ret = memp_fput(dbp->mpf, hcp->page, 0);
+				hcp->page = NULL;
+				if (hcp->bucket == 0) {
+					ret = DB_NOTFOUND;
+					hcp->pgno = PGNO_INVALID;
+					goto err;
+				}
+				F_CLR(hcp, H_ISDUP);
+				hcp->bucket--;
+				hcp->indx = NDX_INVALID;
+				hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+				if (ret == 0)
+					ret = __ham_item_prev(dbc,
+					    lock_type, pgnop);
+				break;
+			case DB_FIRST:
+			case DB_NEXT:
+			case DB_NEXT_NODUP:
+				ret = memp_fput(dbp->mpf, hcp->page, 0);
+				hcp->page = NULL;
+				hcp->indx = NDX_INVALID;
+				hcp->bucket++;
+				F_CLR(hcp, H_ISDUP);
+				hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+				if (hcp->bucket > hcp->hdr->max_bucket) {
+					ret = DB_NOTFOUND;
+					hcp->pgno = PGNO_INVALID;
+					goto err;
+				}
+				if (ret == 0)
+					ret = __ham_item_next(dbc,
+					    lock_type, pgnop);
+				break;
+			case DB_GET_BOTH:
+			case DB_GET_BOTHC:
+			case DB_NEXT_DUP:
+			case DB_SET:
+			case DB_SET_RANGE:
+				/* Key not found. */
+				ret = DB_NOTFOUND;
+				goto err;
+			case DB_CURRENT:
+				/*
+				 * This should only happen if you are doing
+				 * deletes and reading with concurrent threads
+				 * and not doing proper locking.  We return
+				 * the same error code as we would if the
+				 * cursor were deleted.
+				 */
+				ret = DB_KEYEMPTY;
+				goto err;
+			default:
+				DB_ASSERT(0);
+		}
+	}
+
+	if (get_key == 0)
+		F_SET(key, DB_DBT_ISSET);
+
+err:	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+	F_CLR(hcp, H_DUPONLY);
+	F_CLR(hcp, H_NEXT_NODUP);
+
+	return (ret);
+}
+
+static int
+__ham_c_put(dbc, key, data, flags, pgnop)
+	DBC *dbc;
+	DBT *key;
+	DBT *data;
+	u_int32_t flags;
+	db_pgno_t *pgnop;
+{
+	DB *dbp;
+	DBT tmp_val, *myval;
+	HASH_CURSOR *hcp;
+	u_int32_t nbytes;
+	int ret, t_ret;
+
+	/*
+	 * The compiler doesn't realize that we only use this when ret is
+	 * equal to 0 and that if ret is equal to 0, that we must have set
+	 * myval.  So, we initialize it here to shut the compiler up.
+	 */
+	COMPQUIET(myval, NULL);
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	if (F_ISSET(hcp, H_DELETED) &&
+	    flags != DB_KEYFIRST && flags != DB_KEYLAST)
+		return (DB_NOTFOUND);
+
+	if ((ret = __ham_get_meta(dbc)) != 0)
+		goto err1;
+
+	switch (flags) {
+	case DB_KEYLAST:
+	case DB_KEYFIRST:
+	case DB_NODUPDATA:
+		nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE :
+		    HKEYDATA_PSIZE(key->size)) +
+		    (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE :
+		    HKEYDATA_PSIZE(data->size));
+		if ((ret = __ham_lookup(dbc,
+		    key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) {
+			ret = 0;
+			if (hcp->seek_found_page != PGNO_INVALID &&
+			    hcp->seek_found_page != hcp->pgno) {
+				if ((ret = memp_fput(dbp->mpf, hcp->page, 0))
+				    != 0)
+					goto err2;
+				hcp->page = NULL;
+				hcp->pgno = hcp->seek_found_page;
+				hcp->indx = NDX_INVALID;
+			}
+
+			if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) {
+				/*
+				 * A partial put, but the key does not exist
+				 * and we are not beginning the write at 0.
+				 * We must create a data item padded up to doff
+				 * and then write the new bytes represented by
+				 * val.
+				 */
+				if ((ret = __ham_init_dbt(dbp->dbenv,
+				    &tmp_val, data->size + data->doff,
+				    &dbc->rdata.data, &dbc->rdata.ulen)) == 0) {
+					memset(tmp_val.data, 0, data->doff);
+					memcpy((u_int8_t *)tmp_val.data +
+					    data->doff, data->data, data->size);
+					myval = &tmp_val;
+				}
+			} else
+				myval = (DBT *)data;
+
+			if (ret == 0)
+				ret = __ham_add_el(dbc, key, myval, H_KEYDATA);
+			goto done;
+		}
+		break;
+	case DB_BEFORE:
+	case DB_AFTER:
+	case DB_CURRENT:
+		ret = __ham_item(dbc, DB_LOCK_WRITE, pgnop);
+		break;
+	}
+
+	if (*pgnop == PGNO_INVALID && ret == 0) {
+		if (flags == DB_CURRENT ||
+		    ((flags == DB_KEYFIRST ||
+		    flags == DB_KEYLAST || flags == DB_NODUPDATA) &&
+		    !(F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))))
+			ret = __ham_overwrite(dbc, data, flags);
+		else
+			ret = __ham_add_dup(dbc, data, flags, pgnop);
+	}
+
+done:	if (ret == 0 && F_ISSET(hcp, H_EXPAND)) {
+		ret = __ham_expand_table(dbc);
+		F_CLR(hcp, H_EXPAND);
+	}
+
+	if (ret == 0 &&
+	    (t_ret = memp_fset(dbp->mpf, hcp->page, DB_MPOOL_DIRTY)) != 0)
+		ret = t_ret;
+
+err2:	if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+err1:	return (ret);
+}
+
+/********************************* UTILITIES ************************/
+
+/*
+ * __ham_expand_table --
+ */
+static int
+__ham_expand_table(dbc)
+	DBC *dbc;
+{
+	DB *dbp;
+	PAGE *h;
+	HASH_CURSOR *hcp;
+	db_pgno_t pgno;
+	u_int32_t old_bucket, new_bucket;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if ((ret = __ham_dirty_meta(dbc)) != 0)
+		return (ret);
+
+	/*
+	 * If the split point is about to increase, make sure that we
+	 * have enough extra pages.  The calculation here is weird.
+	 * We'd like to do this after we've upped max_bucket, but it's
+	 * too late then because we've logged the meta-data split.  What
+	 * we'll do between then and now is increment max bucket and then
+	 * see what the log of one greater than that is; here we have to
+	 * look at the log of max + 2.  VERY NASTY STUFF.
+	 *
+	 * It just got even nastier.  With subdatabases, we have to request
+	 * a chunk of contiguous pages, so we do that here using an
+	 * undocumented feature of mpool (the MPOOL_NEW_GROUP flag) to
+	 * give us a number of contiguous pages.  Ouch.
+	 */
+	if (hcp->hdr->max_bucket == hcp->hdr->high_mask) {
+		/*
+		 * Ask mpool to give us a set of contiguous page numbers
+		 * large enough to contain the next doubling.
+		 *
+		 * Figure out how many new pages we need.   This will return
+		 * us the last page.  We calculate its page number, initialize
+		 * the page and then write it back to reserve all the pages
+		 * in between.  It is possible that the allocation of new pages
+		 * has already been done, but the tranaction aborted.  Since
+		 * we don't undo the allocation, check for a valid pgno before
+		 * doing the allocation.
+		 */
+		pgno = hcp->hdr->max_bucket + 1;
+		if (hcp->hdr->spares[__db_log2(pgno) + 1] == PGNO_INVALID)
+			/* Allocate a group of pages. */
+			ret = memp_fget(dbp->mpf,
+			    &pgno, DB_MPOOL_NEW_GROUP, &h);
+		else {
+			/* Just read in the last page of the batch */
+			pgno = hcp->hdr->spares[__db_log2(pgno) + 1] +
+			    hcp->hdr->max_bucket + 1;
+			/* Move to the last page of the group. */
+			pgno += hcp->hdr->max_bucket;
+			ret = memp_fget(dbp->mpf,
+			    &pgno, DB_MPOOL_CREATE, &h);
+		}
+		if (ret != 0)
+			return (ret);
+
+		P_INIT(h, dbp->pgsize, pgno,
+		    PGNO_INVALID, PGNO_INVALID, 0, P_HASH);
+		pgno -= hcp->hdr->max_bucket;
+	} else {
+		pgno = BUCKET_TO_PAGE(hcp, hcp->hdr->max_bucket + 1);
+		if ((ret =
+		    memp_fget(dbp->mpf, &pgno, DB_MPOOL_CREATE, &h)) != 0)
+			return (ret);
+	}
+
+	/* Now we can log the meta-data split. */
+	if (DB_LOGGING(dbc)) {
+		if ((ret = __ham_metagroup_log(dbp->dbenv,
+		    dbc->txn, &h->lsn, 0, dbp->log_fileid,
+		    hcp->hdr->max_bucket, pgno, &hcp->hdr->dbmeta.lsn,
+		    &h->lsn)) != 0) {
+			(void)memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY);
+			return (ret);
+		}
+
+		hcp->hdr->dbmeta.lsn = h->lsn;
+	}
+
+	/* If we allocated some new pages, write out the last page. */
+	if ((ret = memp_fput(dbp->mpf, h, DB_MPOOL_DIRTY)) != 0)
+		return (ret);
+
+	new_bucket = ++hcp->hdr->max_bucket;
+	old_bucket = (hcp->hdr->max_bucket & hcp->hdr->low_mask);
+
+	/*
+	 * If we started a new doubling, fill in the spares array with
+	 * the starting page number negatively offset by the bucket number.
+	 */
+	if (new_bucket > hcp->hdr->high_mask) {
+		/* Starting a new doubling */
+		hcp->hdr->low_mask = hcp->hdr->high_mask;
+		hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask;
+		if (hcp->hdr->spares[__db_log2(new_bucket) + 1] == PGNO_INVALID)
+			hcp->hdr->spares[__db_log2(new_bucket) + 1] =
+			    pgno - new_bucket;
+	}
+
+	/* Relocate records to the new bucket */
+	return (__ham_split_page(dbc, old_bucket, new_bucket));
+}
+
+/*
+ * PUBLIC: u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, int32_t));
+ */
+u_int32_t
+__ham_call_hash(dbc, k, len)
+	DBC *dbc;
+	u_int8_t *k;
+	int32_t len;
+{
+	DB *dbp;
+	u_int32_t n, bucket;
+	HASH_CURSOR *hcp;
+	HASH *hashp;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	hashp = dbp->h_internal;
+
+	n = (u_int32_t)(hashp->h_hash(dbp, k, len));
+
+	bucket = n & hcp->hdr->high_mask;
+	if (bucket > hcp->hdr->max_bucket)
+		bucket = bucket & hcp->hdr->low_mask;
+	return (bucket);
+}
+
+/*
+ * Check for duplicates, and call __db_ret appropriately.  Release
+ * everything held by the cursor.
+ */
+static int
+__ham_dup_return (dbc, val, flags)
+	DBC *dbc;
+	DBT *val;
+	u_int32_t flags;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	PAGE *pp;
+	DBT *myval, tmp_val;
+	db_indx_t ndx;
+	db_pgno_t pgno;
+	u_int32_t off, tlen;
+	u_int8_t *hk, type;
+	int cmp, ret;
+	db_indx_t len;
+
+	/* Check for duplicate and return the first one. */
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	ndx = H_DATAINDEX(hcp->indx);
+	type = HPAGE_TYPE(hcp->page, ndx);
+	pp = hcp->page;
+	myval = val;
+
+	/*
+	 * There are 4 cases:
+	 * 1. We are not in duplicate, simply return; the upper layer
+	 *    will do the right thing.
+	 * 2. We are looking at keys and stumbled onto a duplicate.
+	 * 3. We are in the middle of a duplicate set. (ISDUP set)
+	 * 4. We need to check for particular data match.
+	 */
+
+	/* We should never get here with off-page dups. */
+	DB_ASSERT(type != H_OFFDUP);
+
+	/* Case 1 */
+	if (type != H_DUPLICATE &&
+	    flags != DB_GET_BOTH && flags != DB_GET_BOTHC)
+		return (0);
+
+	/*
+	 * Here we check for the case where we just stumbled onto a
+	 * duplicate.  In this case, we do initialization and then
+	 * let the normal duplicate code handle it. (Case 2)
+	 */
+	if (!F_ISSET(hcp, H_ISDUP) && type == H_DUPLICATE) {
+		F_SET(hcp, H_ISDUP);
+		hcp->dup_tlen = LEN_HDATA(hcp->page,
+		    hcp->hdr->dbmeta.pagesize, hcp->indx);
+		hk = H_PAIRDATA(hcp->page, hcp->indx);
+		if (flags == DB_LAST
+		    || flags == DB_PREV || flags == DB_PREV_NODUP) {
+			hcp->dup_off = 0;
+			do {
+				memcpy(&len,
+				    HKEYDATA_DATA(hk) + hcp->dup_off,
+				    sizeof(db_indx_t));
+				hcp->dup_off += DUP_SIZE(len);
+			} while (hcp->dup_off < hcp->dup_tlen);
+			hcp->dup_off -= DUP_SIZE(len);
+		} else {
+			memcpy(&len,
+			    HKEYDATA_DATA(hk), sizeof(db_indx_t));
+			hcp->dup_off = 0;
+		}
+		hcp->dup_len = len;
+	}
+
+	/*
+	 * If we are retrieving a specific key/data pair, then we
+	 * may need to adjust the cursor before returning data.
+	 * Case 4
+	 */
+	if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC) {
+		if (F_ISSET(hcp, H_ISDUP)) {
+			/*
+			 * If we're doing a join, search forward from the
+			 * current position, not the beginning of the dup set.
+			 */
+			if (flags == DB_GET_BOTHC)
+				F_SET(hcp, H_CONTINUE);
+
+			__ham_dsearch(dbc, val, &off, &cmp);
+
+			/*
+			 * This flag is set nowhere else and is safe to
+			 * clear unconditionally.
+			 */
+			F_CLR(hcp, H_CONTINUE);
+			hcp->dup_off = off;
+		} else {
+			hk = H_PAIRDATA(hcp->page, hcp->indx);
+			if (((HKEYDATA *)hk)->type == H_OFFPAGE) {
+				memcpy(&tlen,
+				    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
+				memcpy(&pgno,
+				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
+				if ((ret = __db_moff(dbp, val,
+				    pgno, tlen, dbp->dup_compare, &cmp)) != 0)
+					return (ret);
+			} else {
+				/*
+				 * We do not zero tmp_val since the comparison
+				 * routines may only look at data and size.
+				 */
+				tmp_val.data = HKEYDATA_DATA(hk);
+				tmp_val.size = LEN_HDATA(hcp->page,
+				    dbp->pgsize, hcp->indx);
+				cmp = dbp->dup_compare == NULL ?
+				    __bam_defcmp(dbp, &tmp_val, val) :
+				    dbp->dup_compare(dbp, &tmp_val, val);
+			}
+		}
+
+		if (cmp != 0)
+			return (DB_NOTFOUND);
+	}
+
+	/*
+	 * Now, everything is initialized, grab a duplicate if
+	 * necessary.
+	 */
+	if (F_ISSET(hcp, H_ISDUP)) {	/* Case 3 */
+		/*
+		 * Copy the DBT in case we are retrieving into user
+		 * memory and we need the parameters for it.  If the
+		 * user requested a partial, then we need to adjust
+		 * the user's parameters to get the partial of the
+		 * duplicate which is itself a partial.
+		 */
+		memcpy(&tmp_val, val, sizeof(*val));
+		if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) {
+			/*
+			 * Take the user's length unless it would go
+			 * beyond the end of the duplicate.
+			 */
+			if (tmp_val.doff + hcp->dup_off > hcp->dup_len)
+				tmp_val.dlen = 0;
+			else if (tmp_val.dlen + tmp_val.doff >
+			    hcp->dup_len)
+				tmp_val.dlen =
+				    hcp->dup_len - tmp_val.doff;
+
+			/*
+			 * Calculate the new offset.
+			 */
+			tmp_val.doff += hcp->dup_off;
+		} else {
+			F_SET(&tmp_val, DB_DBT_PARTIAL);
+			tmp_val.dlen = hcp->dup_len;
+			tmp_val.doff = hcp->dup_off + sizeof(db_indx_t);
+		}
+		myval = &tmp_val;
+	}
+
+	/*
+	 * Finally, if we had a duplicate, pp, ndx, and myval should be
+	 * set appropriately.
+	 */
+	if ((ret = __db_ret(dbp, pp, ndx, myval, &dbc->rdata.data,
+	    &dbc->rdata.ulen)) != 0)
+		return (ret);
+
+	/*
+	 * In case we sent a temporary off to db_ret, set the real
+	 * return values.
+	 */
+	val->data = myval->data;
+	val->size = myval->size;
+
+	F_SET(val, DB_DBT_ISSET);
+
+	return (0);
+}
+
+static int
+__ham_overwrite(dbc, nval, flags)
+	DBC *dbc;
+	DBT *nval;
+	u_int32_t flags;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	DBT *myval, tmp_val, tmp_val2;
+	void *newrec;
+	u_int8_t *hk, *p;
+	u_int32_t len, nondup_size;
+	db_indx_t newsize;
+	int ret;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if (F_ISSET(hcp, H_ISDUP)) {
+		/*
+		 * This is an overwrite of a duplicate. We should never
+		 * be off-page at this point.
+		 */
+		DB_ASSERT(hcp->opd == NULL);
+		/* On page dups */
+		if (F_ISSET(nval, DB_DBT_PARTIAL)) {
+			/*
+			 * We're going to have to get the current item, then
+			 * construct the record, do any padding and do a
+			 * replace.
+			 */
+			memset(&tmp_val, 0, sizeof(tmp_val));
+			if ((ret =
+			    __ham_dup_return (dbc, &tmp_val, DB_CURRENT)) != 0)
+				return (ret);
+
+			/* Figure out new size. */
+			nondup_size = tmp_val.size;
+			newsize = nondup_size;
+
+			/*
+			 * Three cases:
+			 * 1. strictly append (may need to allocate space
+			 *	for pad bytes; really gross).
+			 * 2. overwrite some and append.
+			 * 3. strictly overwrite.
+			 */
+			if (nval->doff > nondup_size)
+				newsize +=
+				    (nval->doff - nondup_size + nval->size);
+			else if (nval->doff + nval->dlen > nondup_size)
+				newsize += nval->size -
+				    (nondup_size - nval->doff);
+			else
+				newsize += nval->size - nval->dlen;
+
+			/*
+			 * Make sure that the new size doesn't put us over
+			 * the onpage duplicate size in which case we need
+			 * to convert to off-page duplicates.
+			 */
+			if (ISBIG(hcp, hcp->dup_tlen - nondup_size + newsize)) {
+				if ((ret = __ham_dup_convert(dbc)) != 0)
+					return (ret);
+				return (hcp->opd->c_am_put(hcp->opd,
+				    NULL, nval, flags, NULL));
+			}
+
+			if ((ret = __os_malloc(dbp->dbenv,
+			    DUP_SIZE(newsize), NULL, &newrec)) != 0)
+				return (ret);
+			memset(&tmp_val2, 0, sizeof(tmp_val2));
+			F_SET(&tmp_val2, DB_DBT_PARTIAL);
+
+			/* Construct the record. */
+			p = newrec;
+			/* Initial size. */
+			memcpy(p, &newsize, sizeof(db_indx_t));
+			p += sizeof(db_indx_t);
+
+			/* First part of original record. */
+			len = nval->doff > tmp_val.size
+			    ? tmp_val.size : nval->doff;
+			memcpy(p, tmp_val.data, len);
+			p += len;
+
+			if (nval->doff > tmp_val.size) {
+				/* Padding */
+				memset(p, 0, nval->doff - tmp_val.size);
+				p += nval->doff - tmp_val.size;
+			}
+
+			/* New bytes */
+			memcpy(p, nval->data, nval->size);
+			p += nval->size;
+
+			/* End of original record (if there is any) */
+			if (nval->doff + nval->dlen < tmp_val.size) {
+				len = tmp_val.size - nval->doff - nval->dlen;
+				memcpy(p, (u_int8_t *)tmp_val.data +
+				    nval->doff + nval->dlen, len);
+				p += len;
+			}
+
+			/* Final size. */
+			memcpy(p, &newsize, sizeof(db_indx_t));
+
+			/*
+			 * Make sure that the caller isn't corrupting
+			 * the sort order.
+			 */
+			if (dbp->dup_compare != NULL) {
+				tmp_val2.data =
+				    (u_int8_t *)newrec + sizeof(db_indx_t);
+				tmp_val2.size = newsize;
+				if (dbp->dup_compare(
+				    dbp, &tmp_val, &tmp_val2) != 0) {
+					(void)__os_free(newrec,
+					    DUP_SIZE(newsize));
+					return (__db_duperr(dbp, flags));
+				}
+			}
+
+			tmp_val2.data = newrec;
+			tmp_val2.size = DUP_SIZE(newsize);
+			tmp_val2.doff = hcp->dup_off;
+			tmp_val2.dlen = DUP_SIZE(hcp->dup_len);
+
+			ret = __ham_replpair(dbc, &tmp_val2, 0);
+			(void)__os_free(newrec, DUP_SIZE(newsize));
+
+			/* Update cursor */
+			if (ret != 0)
+				return (ret);
+
+			if (newsize > nondup_size)
+				hcp->dup_tlen += (newsize - nondup_size);
+			else
+				hcp->dup_tlen -= (nondup_size - newsize);
+			hcp->dup_len = DUP_SIZE(newsize);
+			return (0);
+		} else {
+			/* Check whether we need to convert to off page. */
+			if (ISBIG(hcp,
+			    hcp->dup_tlen - hcp->dup_len + nval->size)) {
+				if ((ret = __ham_dup_convert(dbc)) != 0)
+					return (ret);
+				return (hcp->opd->c_am_put(hcp->opd,
+				    NULL, nval, flags, NULL));
+			}
+
+			/* Make sure we maintain sort order. */
+			if (dbp->dup_compare != NULL) {
+				tmp_val2.data =
+				    HKEYDATA_DATA(H_PAIRDATA(hcp->page,
+				    hcp->indx)) + hcp->dup_off +
+				    sizeof(db_indx_t);
+				tmp_val2.size = hcp->dup_len;
+				if (dbp->dup_compare(dbp, nval, &tmp_val2) != 0)
+					return (EINVAL);
+			}
+			/* Overwriting a complete duplicate. */
+			if ((ret =
+			    __ham_make_dup(dbp->dbenv, nval,
+			    &tmp_val, &dbc->rdata.data, &dbc->rdata.ulen)) != 0)
+				return (ret);
+			/* Now fix what we are replacing. */
+			tmp_val.doff = hcp->dup_off;
+			tmp_val.dlen = DUP_SIZE(hcp->dup_len);
+
+			/* Update cursor */
+			if (nval->size > hcp->dup_len)
+				hcp->dup_tlen += (nval->size - hcp->dup_len);
+			else
+				hcp->dup_tlen -= (hcp->dup_len - nval->size);
+			hcp->dup_len = DUP_SIZE(nval->size);
+		}
+		myval = &tmp_val;
+	} else if (!F_ISSET(nval, DB_DBT_PARTIAL)) {
+		/* Put/overwrite */
+		memcpy(&tmp_val, nval, sizeof(*nval));
+		F_SET(&tmp_val, DB_DBT_PARTIAL);
+		tmp_val.doff = 0;
+		hk = H_PAIRDATA(hcp->page, hcp->indx);
+		if (HPAGE_PTYPE(hk) == H_OFFPAGE)
+			memcpy(&tmp_val.dlen,
+			    HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
+		else
+			tmp_val.dlen = LEN_HDATA(hcp->page,
+			    hcp->hdr->dbmeta.pagesize, hcp->indx);
+		myval = &tmp_val;
+	} else
+		/* Regular partial put */
+		myval = nval;
+
+	return (__ham_replpair(dbc, myval, 0));
+}
+
+/*
+ * Given a key and a cursor, sets the cursor to the page/ndx on which
+ * the key resides.  If the key is found, the cursor H_OK flag is set
+ * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set.
+ * If the key is not found, the H_OK flag is not set.  If the sought
+ * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields
+ * are set indicating where an add might take place.  If it is 0,
+ * non of the cursor pointer field are valid.
+ */
+static int
+__ham_lookup(dbc, key, sought, mode, pgnop)
+	DBC *dbc;
+	const DBT *key;
+	u_int32_t sought;
+	db_lockmode_t mode;
+	db_pgno_t *pgnop;
+{
+	DB *dbp;
+	HASH_CURSOR *hcp;
+	db_pgno_t pgno;
+	u_int32_t tlen;
+	int match, ret;
+	u_int8_t *hk, *dk;
+
+	dbp = dbc->dbp;
+	hcp = (HASH_CURSOR *)dbc->internal;
+	/*
+	 * Set up cursor so that we're looking for space to add an item
+	 * as we cycle through the pages looking for the key.
+	 */
+	if ((ret = __ham_item_reset(dbc)) != 0)
+		return (ret);
+	hcp->seek_size = sought;
+
+	hcp->bucket = __ham_call_hash(dbc, (u_int8_t *)key->data, key->size);
+	hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket);
+
+	while (1) {
+		*pgnop = PGNO_INVALID;
+		if ((ret = __ham_item_next(dbc, mode, pgnop)) != 0)
+			return (ret);
+
+		if (F_ISSET(hcp, H_NOMORE))
+			break;
+
+		hk = H_PAIRKEY(hcp->page, hcp->indx);
+		switch (HPAGE_PTYPE(hk)) {
+		case H_OFFPAGE:
+			memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t));
+			if (tlen == key->size) {
+				memcpy(&pgno,
+				    HOFFPAGE_PGNO(hk), sizeof(db_pgno_t));
+				if ((ret = __db_moff(dbp,
+				    key, pgno, tlen, NULL, &match)) != 0)
+					return (ret);
+				if (match == 0)
+					goto found_key;
+			}
+			break;
+		case H_KEYDATA:
+			if (key->size ==
+			    LEN_HKEY(hcp->page, dbp->pgsize, hcp->indx) &&
+			    memcmp(key->data,
+			    HKEYDATA_DATA(hk), key->size) == 0) {
+				/* Found the key, check for data type. */
+found_key:			F_SET(hcp, H_OK);
+				dk = H_PAIRDATA(hcp->page, hcp->indx);
+				if (HPAGE_PTYPE(dk) == H_OFFDUP)
+					memcpy(pgnop, HOFFDUP_PGNO(dk),
+					    sizeof(db_pgno_t));
+				return (0);
+			}
+			break;
+		case H_DUPLICATE:
+		case H_OFFDUP:
+			/*
+			 * These are errors because keys are never
+			 * duplicated, only data items are.
+			 */
+			return (__db_pgfmt(dbp, PGNO(hcp->page)));
+		}
+	}
+
+	/*
+	 * Item was not found.
+	 */
+
+	if (sought != 0)
+		return (ret);
+
+	return (ret);
+}
+
+/*
+ * __ham_init_dbt --
+ *	Initialize a dbt using some possibly already allocated storage
+ *	for items.
+ *
+ * PUBLIC: int __ham_init_dbt __P((DB_ENV *,
+ * PUBLIC:     DBT *, u_int32_t, void **, u_int32_t *));
+ */
+int
+__ham_init_dbt(dbenv, dbt, size, bufp, sizep)
+	DB_ENV *dbenv;
+	DBT *dbt;
+	u_int32_t size;
+	void **bufp;
+	u_int32_t *sizep;
+{
+	int ret;
+
+	memset(dbt, 0, sizeof(*dbt));
+	if (*sizep < size) {
+		if ((ret = __os_realloc(dbenv, size, NULL, bufp)) != 0) {
+			*sizep = 0;
+			return (ret);
+		}
+		*sizep = size;
+	}
+	dbt->data = *bufp;
+	dbt->size = size;
+	return (0);
+}
+
+/*
+ * Adjust the cursor after an insert or delete.  The cursor passed is
+ * the one that was operated upon; we just need to check any of the
+ * others.
+ *
+ * len indicates the length of the item added/deleted
+ * add indicates if the item indicated by the cursor has just been
+ * added (add == 1) or deleted (add == 0).
+ * dup indicates if the addition occurred into a duplicate set.
+ *
+ * PUBLIC: int __ham_c_update
+ * PUBLIC:    __P((DBC *, u_int32_t, int, int));
+ */
+int
+__ham_c_update(dbc, len, add, is_dup)
+	DBC *dbc;
+	u_int32_t len;
+	int add, is_dup;
+{
+	DB *dbp, *ldbp;
+	DBC *cp;
+	DB_ENV *dbenv;
+	DB_LSN lsn;
+	DB_TXN *my_txn;
+	HASH_CURSOR *hcp, *lcp;
+	int found, ret;
+	u_int32_t order;
+
+	dbp = dbc->dbp;
+	dbenv = dbp->dbenv;
+	hcp = (HASH_CURSOR *)dbc->internal;
+
+	/*
+	 * Adjustment will only be logged if this is a subtransaction.
+	 * Only subtransactions can abort and effect their parent
+	 * transactions cursors.
+	 */
+
+	my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
+	found = 0;
+
+	MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+
+	/*
+	 * Calcuate the order of this deleted record.
+	 * This will be one grater than any cursor that is pointing
+	 * at this record and already marked as deleted.
+	 */
+	order = 0;
+	if (!add) {
+		order = 1;
+		for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+		    ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+		    ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+			MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+			for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+			    cp = TAILQ_NEXT(cp, links)) {
+				if (cp == dbc || cp->dbtype != DB_HASH)
+					continue;
+				lcp = (HASH_CURSOR *)cp->internal;
+				if (F_ISSET(lcp, H_DELETED) &&
+				     hcp->pgno == lcp->pgno &&
+				     hcp->indx == lcp->indx &&
+				     order <= lcp->order &&
+				     (!is_dup || hcp->dup_off == lcp->dup_off))
+					order = lcp->order +1;
+			}
+			MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+		}
+		hcp->order = order;
+	}
+
+	for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+	    ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+	    ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+		MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+		for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+		    cp = TAILQ_NEXT(cp, links)) {
+			if (cp == dbc || cp->dbtype != DB_HASH)
+				continue;
+
+			lcp = (HASH_CURSOR *)cp->internal;
+
+			if (lcp->pgno != hcp->pgno || lcp->indx == NDX_INVALID)
+				continue;
+
+			if (my_txn != NULL && cp->txn != my_txn)
+				found = 1;
+
+			if (!is_dup) {
+				if (add) {
+					/*
+					 * This routine is not called to add
+					 * non-dup records which are always put
+					 * at the end.  It is only called from
+					 * recovery in this case and the
+					 * cursor will be marked deleted.
+					 * We are "undeleting" so unmark all
+					 * cursors with the same order.
+					 */
+					if (lcp->indx == hcp->indx
+					     && F_ISSET(lcp, H_DELETED)) {
+						if (lcp->order == hcp->order)
+							F_CLR(lcp, H_DELETED);
+						else if (lcp->order >
+						    hcp->order) {
+
+						/*
+						 * If we've moved this cursor's
+						 * index, split its order
+						 * number--i.e., decrement it by
+						 * enough so that the lowest
+						 * cursor moved has order 1.
+						 * cp_arg->order is the split
+						 * point, so decrement by one
+						 * less than that.
+						 */
+							lcp->order -=
+							    (hcp->order - 1);
+							lcp->indx += 2;
+						}
+					} else if (lcp->indx >= hcp->indx)
+						lcp->indx += 2;
+
+				} else {
+					if (lcp->indx > hcp->indx) {
+						lcp->indx -= 2;
+						if (lcp->indx == hcp->indx
+						    && F_ISSET(lcp, H_DELETED))
+							lcp->order += order;
+					} else if (lcp->indx == hcp->indx
+					    && !F_ISSET(lcp, H_DELETED)) {
+						F_SET(lcp, H_DELETED);
+						lcp->order = order;
+					}
+				}
+			} else if (lcp->indx == hcp->indx) {
+				/*
+				 * Handle duplicates.  This routine is
+				 * only called for on page dups.
+				 * Off page dups are handled by btree/rtree
+				 * code.
+				 */
+				if (add) {
+					lcp->dup_tlen += len;
+					if (lcp->dup_off == hcp->dup_off
+					     && F_ISSET(hcp, H_DELETED)
+					     && F_ISSET(lcp, H_DELETED)) {
+					     /* Abort of a delete. */
+						if (lcp->order == hcp->order)
+							F_CLR(lcp, H_DELETED);
+						else if (lcp->order >
+						    hcp->order) {
+							lcp->order -=
+							    (hcp->order -1);
+							lcp->dup_off += len;
+						}
+					} else if (lcp->dup_off >= hcp->dup_off)
+						lcp->dup_off += len;
+				} else {
+					lcp->dup_tlen -= len;
+					if (lcp->dup_off > hcp->dup_off) {
+						lcp->dup_off -= len;
+						if (lcp->dup_off == hcp->dup_off
+						     && F_ISSET(lcp, H_DELETED))
+							lcp->order += order;
+					} else if (lcp->dup_off ==
+					    hcp->dup_off &&
+					    !F_ISSET(lcp, H_DELETED)) {
+						F_SET(lcp, H_DELETED);
+						lcp->order = order;
+					}
+				}
+			}
+		}
+		MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+	}
+	MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+	if (found != 0 && DB_LOGGING(dbc)) {
+		if ((ret = __ham_curadj_log(dbenv,
+		     my_txn, &lsn, 0, dbp->log_fileid, hcp->pgno,
+		     hcp->indx, len, hcp->dup_off, add, is_dup, order)) != 0)
+			return (ret);
+	}
+
+	return (0);
+}
+
+/*
+ * __ham_get_clist --
+ *
+ * Get a list of cursors either on a particular bucket or on a particular
+ * page and index combination.  The former is so that we can update
+ * cursors on a split.  The latter is so we can update cursors when we
+ * move items off page.
+ *
+ * PUBLIC: int __ham_get_clist __P((DB *,
+ * PUBLIC:     db_pgno_t, u_int32_t, DBC ***));
+ */
+int
+__ham_get_clist(dbp, bucket, indx, listp)
+	DB *dbp;
+	db_pgno_t bucket;
+	u_int32_t indx;
+	DBC ***listp;
+{
+	DB *ldbp;
+	DBC *cp;
+	DB_ENV *dbenv;
+	int nalloc, nused, ret;
+
+	/*
+	 * Assume that finding anything is the exception, so optimize for
+	 * the case where there aren't any.
+	 */
+	nalloc = nused = 0;
+	*listp = NULL;
+	dbenv = dbp->dbenv;
+
+	MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+	for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+	    ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+	    ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+		MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+		for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+		    cp = TAILQ_NEXT(cp, links))
+			if (cp->dbtype == DB_HASH &&
+			    ((indx == NDX_INVALID &&
+			    ((HASH_CURSOR *)(cp->internal))->bucket
+			    == bucket) || (indx != NDX_INVALID &&
+			    cp->internal->pgno == bucket &&
+			    cp->internal->indx == indx))) {
+				if (nused >= nalloc) {
+					nalloc += 10;
+					if ((ret = __os_realloc(dbp->dbenv,
+					    nalloc * sizeof(HASH_CURSOR *),
+					    NULL, listp)) != 0)
+						return (ret);
+				}
+				(*listp)[nused++] = cp;
+			}
+
+		MUTEX_THREAD_UNLOCK(dbp->dbenv, dbp->mutexp);
+	}
+	MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+	if (listp != NULL) {
+		if (nused >= nalloc) {
+			nalloc++;
+			if ((ret = __os_realloc(dbp->dbenv,
+			    nalloc * sizeof(HASH_CURSOR *), NULL, listp)) != 0)
+				return (ret);
+		}
+		(*listp)[nused] = NULL;
+	}
+	return (0);
+}
+
+static int
+__ham_del_dups(orig_dbc, key)
+	DBC *orig_dbc;
+	DBT *key;
+{
+	DBC *dbc;
+	DBT data, lkey;
+	int ret, t_ret;
+
+	/* Allocate a cursor. */
+	if ((ret = orig_dbc->c_dup(orig_dbc, &dbc, 0)) != 0)
+		return (ret);
+
+	/*
+	 * Walk a cursor through the key/data pairs, deleting as we go.  Set
+	 * the DB_DBT_USERMEM flag, as this might be a threaded application
+	 * and the flags checking will catch us.  We don't actually want the
+	 * keys or data, so request a partial of length 0.
+	 */
+	memset(&lkey, 0, sizeof(lkey));
+	F_SET(&lkey, DB_DBT_USERMEM | DB_DBT_PARTIAL);
+	memset(&data, 0, sizeof(data));
+	F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL);
+
+	/* Walk through the set of key/data pairs, deleting as we go. */
+	if ((ret = dbc->c_get(dbc, key, &data, DB_SET)) != 0) {
+		if (ret == DB_NOTFOUND)
+			ret = 0;
+		goto err;
+	}
+
+	for (;;) {
+		if ((ret = dbc->c_del(dbc, 0)) != 0)
+			goto err;
+		if ((ret = dbc->c_get(dbc, &lkey, &data, DB_NEXT_DUP)) != 0) {
+			if (ret == DB_NOTFOUND) {
+				ret = 0;
+				break;
+			}
+			goto err;
+		}
+	}
+
+err:	/*
+	 * Discard the cursor.  This will cause the underlying off-page dup
+	 * tree to go away as well as the actual entry on the page.
+	 */
+	if ((t_ret = dbc->c_close(dbc)) != 0 && ret == 0)
+		ret = t_ret;
+
+	return (ret);
+
+}
+
+static int
+__ham_c_writelock(dbc)
+	DBC *dbc;
+{
+	HASH_CURSOR *hcp;
+	DB_LOCK tmp_lock;
+	int ret;
+
+	/*
+	 * All we need do is acquire the lock and let the off-page
+	 * dup tree do its thing.
+	 */
+	if (!STD_LOCKING(dbc))
+		return (0);
+
+	hcp = (HASH_CURSOR *)dbc->internal;
+	if ((hcp->lock.off == LOCK_INVALID || hcp->lock_mode == DB_LOCK_READ)) {
+		tmp_lock = hcp->lock;
+		if ((ret = __ham_lock_bucket(dbc, DB_LOCK_WRITE)) != 0)
+			return (ret);
+		if (tmp_lock.off != LOCK_INVALID &&
+		    (ret = lock_put(dbc->dbp->dbenv, &tmp_lock)) != 0)
+			return (ret);
+	}
+	return (0);
+}
+
+/*
+ * __ham_c_chgpg --
+ *
+ * Adjust the cursors after moving an item from one page to another.
+ * If the old_index is NDX_INVALID, that means that we copied the
+ * page wholesale and we're leaving indices intact and just changing
+ * the page number.
+ *
+ * PUBLIC: int __ham_c_chgpg
+ * PUBLIC:    __P((DBC *, db_pgno_t, u_int32_t, db_pgno_t, u_int32_t));
+ */
+int
+__ham_c_chgpg(dbc, old_pgno, old_index, new_pgno, new_index)
+	DBC *dbc;
+	db_pgno_t old_pgno, new_pgno;
+	u_int32_t old_index, new_index;
+{
+	DB *dbp, *ldbp;
+	DB_ENV *dbenv;
+	DB_LSN lsn;
+	DB_TXN *my_txn;
+	DBC *cp;
+	HASH_CURSOR *hcp;
+	int found, ret;
+
+	dbp = dbc->dbp;
+	dbenv = dbp->dbenv;
+
+	my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL;
+	found = 0;
+
+	MUTEX_THREAD_LOCK(dbenv, dbenv->dblist_mutexp);
+	for (ldbp = __dblist_get(dbenv, dbp->adj_fileid);
+	    ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid;
+	    ldbp = LIST_NEXT(ldbp, dblistlinks)) {
+		MUTEX_THREAD_LOCK(dbenv, dbp->mutexp);
+		for (cp = TAILQ_FIRST(&ldbp->active_queue); cp != NULL;
+		    cp = TAILQ_NEXT(cp, links)) {
+			if (cp == dbc || cp->dbtype != DB_HASH)
+				continue;
+
+			hcp = (HASH_CURSOR *)cp->internal;
+			if (hcp->pgno == old_pgno) {
+				if (old_index == NDX_INVALID) {
+					hcp->pgno = new_pgno;
+				} else if (hcp->indx == old_index) {
+					hcp->pgno = new_pgno;
+					hcp->indx = new_index;
+				} else
+					continue;
+				if (my_txn != NULL && cp->txn != my_txn)
+					found = 1;
+			}
+		}
+		MUTEX_THREAD_UNLOCK(dbenv, dbp->mutexp);
+	}
+	MUTEX_THREAD_UNLOCK(dbenv, dbenv->dblist_mutexp);
+
+	if (found != 0 && DB_LOGGING(dbc)) {
+		if ((ret = __ham_chgpg_log(dbenv,
+		     my_txn, &lsn, 0, dbp->log_fileid, DB_HAM_CHGPG,
+		     old_pgno, new_pgno, old_index, new_index)) != 0)
+			return (ret);
+	}
+	return (0);
+}
author	jbj <devnull@localhost>	2001-03-21 18:33:35 +0000
committer	jbj <devnull@localhost>	2001-03-21 18:33:35 +0000
commit	731946f4b90eb1173452dd30f1296dd825155d82 (patch)
tree	67535f54ecb7e5463c06e62044e4efd84ae0291d /db/hash/hash.c
parent	7ed904da030dc4640ff9bce8458ba07cc09d830d (diff)
download	rpm-731946f4b90eb1173452dd30f1296dd825155d82.tar.gz rpm-731946f4b90eb1173452dd30f1296dd825155d82.tar.bz2 rpm-731946f4b90eb1173452dd30f1296dd825155d82.zip