diff options
Diffstat (limited to 'db/hash')
-rw-r--r-- | db/hash/hash.c | 2144 | ||||
-rw-r--r-- | db/hash/hash.src | 279 | ||||
-rw-r--r-- | db/hash/hash_auto.c | 2596 | ||||
-rw-r--r-- | db/hash/hash_autop.c | 543 | ||||
-rw-r--r-- | db/hash/hash_conv.c | 110 | ||||
-rw-r--r-- | db/hash/hash_dup.c | 895 | ||||
-rw-r--r-- | db/hash/hash_func.c | 240 | ||||
-rw-r--r-- | db/hash/hash_meta.c | 98 | ||||
-rw-r--r-- | db/hash/hash_method.c | 183 | ||||
-rw-r--r-- | db/hash/hash_open.c | 547 | ||||
-rw-r--r-- | db/hash/hash_page.c | 2633 | ||||
-rw-r--r-- | db/hash/hash_rec.c | 1581 | ||||
-rw-r--r-- | db/hash/hash_reclaim.c | 96 | ||||
-rw-r--r-- | db/hash/hash_stat.c | 514 | ||||
-rw-r--r-- | db/hash/hash_stub.c | 450 | ||||
-rw-r--r-- | db/hash/hash_upgrade.c | 314 | ||||
-rw-r--r-- | db/hash/hash_verify.c | 1082 |
17 files changed, 0 insertions, 14305 deletions
diff --git a/db/hash/hash.c b/db/hash/hash.c deleted file mode 100644 index 0f22e811a..000000000 --- a/db/hash/hash.c +++ /dev/null @@ -1,2144 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id: hash.c,v 12.41 2007/05/17 17:17:59 bostic Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/mp.h" - -static int __ham_bulk __P((DBC *, DBT *, u_int32_t)); -static int __hamc_close __P((DBC *, db_pgno_t, int *)); -static int __hamc_del __P((DBC *)); -static int __hamc_destroy __P((DBC *)); -static int __hamc_get __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); -static int __hamc_put __P((DBC *, DBT *, DBT *, u_int32_t, db_pgno_t *)); -static int __hamc_writelock __P((DBC *)); -static int __ham_dup_return __P((DBC *, DBT *, u_int32_t)); -static int __ham_expand_table __P((DBC *)); -static int __ham_lookup __P((DBC *, - const DBT *, u_int32_t, db_lockmode_t, db_pgno_t *)); -static int __ham_overwrite __P((DBC *, DBT *, u_int32_t)); - -/* - * __ham_quick_delete -- - * This function is called by __db_del when the appropriate conditions - * are met, and it performs the delete in the optimized way. - * - * PUBLIC: int __ham_quick_delete __P((DBC *)); - */ -int -__ham_quick_delete(dbc) - DBC *dbc; -{ - int ret, t_ret; - - /* - * When performing a DB->del operation not involving secondary indices - * and not removing an off-page duplicate tree, we can speed things up - * substantially by removing the entire duplicate set, if any is - * present, in one operation, rather than by conjuring up and deleting - * each of the items individually. (All are stored in one big HKEYDATA - * structure.) We don't bother to distinguish on-page duplicate sets - * from single, non-dup items; they're deleted in exactly the same way. - * - * The cursor should be set to the first item in the duplicate set, or - * to the sole key/data pair when the key does not have a duplicate set, - * before the function is called. - * - * We do not need to call CDB_LOCKING_INIT, __db_del calls here with - * a write cursor. - * - * Assert we're initialized, but not to an off-page duplicate. - * Assert we're not using secondary indices. - */ - DB_ASSERT(dbc->dbp->dbenv, IS_INITIALIZED(dbc)); - DB_ASSERT(dbc->dbp->dbenv, dbc->internal->opd == NULL); - DB_ASSERT(dbc->dbp->dbenv, !F_ISSET(dbc->dbp, DB_AM_SECONDARY)); - DB_ASSERT(dbc->dbp->dbenv, - LIST_FIRST(&dbc->dbp->s_secondaries) == NULL); - - if ((ret = __ham_get_meta(dbc)) != 0) - return (ret); - - if ((ret = __hamc_writelock(dbc)) == 0) - ret = __ham_del_pair(dbc, 0); - - if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* ****************** CURSORS ********************************** */ -/* - * __hamc_init -- - * Initialize the hash-specific portion of a cursor. - * - * PUBLIC: int __hamc_init __P((DBC *)); - */ -int -__hamc_init(dbc) - DBC *dbc; -{ - DB_ENV *dbenv; - HASH_CURSOR *new_curs; - int ret; - - dbenv = dbc->dbp->dbenv; - if ((ret = __os_calloc(dbenv, - 1, sizeof(struct cursor_t), &new_curs)) != 0) - return (ret); - if ((ret = __os_malloc(dbenv, - dbc->dbp->pgsize, &new_curs->split_buf)) != 0) { - __os_free(dbenv, new_curs); - return (ret); - } - - dbc->internal = (DBC_INTERNAL *) new_curs; - dbc->close = dbc->c_close = __dbc_close_pp; - dbc->count = dbc->c_count = __dbc_count_pp; - dbc->del = dbc->c_del = __dbc_del_pp; - dbc->dup = dbc->c_dup = __dbc_dup_pp; - dbc->get = dbc->c_get = __dbc_get_pp; - dbc->pget = dbc->c_pget = __dbc_pget_pp; - dbc->put = dbc->c_put = __dbc_put_pp; - dbc->am_bulk = __ham_bulk; - dbc->am_close = __hamc_close; - dbc->am_del = __hamc_del; - dbc->am_destroy = __hamc_destroy; - dbc->am_get = __hamc_get; - dbc->am_put = __hamc_put; - dbc->am_writelock = __hamc_writelock; - - return (__ham_item_init(dbc)); -} - -/* - * __hamc_close -- - * Close down the cursor from a single use. - */ -static int -__hamc_close(dbc, root_pgno, rmroot) - DBC *dbc; - db_pgno_t root_pgno; - int *rmroot; -{ - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - HKEYDATA *dp; - db_lockmode_t lock_mode; - int doroot, gotmeta, ret, t_ret; - - COMPQUIET(rmroot, 0); - mpf = dbc->dbp->mpf; - doroot = gotmeta = ret = 0; - hcp = (HASH_CURSOR *) dbc->internal; - - /* Check for off page dups. */ - if (dbc->internal->opd != NULL) { - if ((ret = __ham_get_meta(dbc)) != 0) - goto done; - gotmeta = 1; - lock_mode = DB_LOCK_READ; - - /* To support dirty reads we must reget the write lock. */ - if (F_ISSET(dbc->dbp, DB_AM_READ_UNCOMMITTED) && - F_ISSET((BTREE_CURSOR *) - dbc->internal->opd->internal, C_DELETED)) - lock_mode = DB_LOCK_WRITE; - - if ((ret = __ham_get_cpage(dbc, lock_mode)) != 0) - goto out; - dp = (HKEYDATA *)H_PAIRDATA(dbc->dbp, hcp->page, hcp->indx); - - /* If it's not a dup we aborted before we changed it. */ - if (HPAGE_PTYPE(dp) == H_OFFDUP) - memcpy(&root_pgno, - HOFFPAGE_PGNO(dp), sizeof(db_pgno_t)); - else - root_pgno = PGNO_INVALID; - - if ((ret = - hcp->opd->am_close(hcp->opd, root_pgno, &doroot)) != 0) - goto out; - if (doroot != 0) { - if ((ret = __memp_dirty(mpf, &hcp->page, - dbc->txn, dbc->priority, 0)) != 0) - goto out; - if ((ret = __ham_del_pair(dbc, 0)) != 0) - goto out; - } - } - -out: if (hcp->page != NULL && (t_ret = - __memp_fput(mpf, hcp->page, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if (gotmeta != 0 && (t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - -done: if ((t_ret = __ham_item_init(dbc)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __hamc_destroy -- - * Cleanup the access method private part of a cursor. - */ -static int -__hamc_destroy(dbc) - DBC *dbc; -{ - HASH_CURSOR *hcp; - - hcp = (HASH_CURSOR *)dbc->internal; - if (hcp->split_buf != NULL) - __os_free(dbc->dbp->dbenv, hcp->split_buf); - __os_free(dbc->dbp->dbenv, hcp); - - return (0); -} - -/* - * __hamc_count -- - * Return a count of on-page duplicates. - * - * PUBLIC: int __hamc_count __P((DBC *, db_recno_t *)); - */ -int -__hamc_count(dbc, recnop) - DBC *dbc; - db_recno_t *recnop; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - db_indx_t len; - db_recno_t recno; - int ret, t_ret; - u_int8_t *p, *pend; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - recno = 0; - - if ((ret = __ham_get_cpage(dbc, DB_LOCK_READ)) != 0) - return (ret); - if (hcp->indx >= NUM_ENT(hcp->page)) { - *recnop = 0; - goto err; - } - - switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) { - case H_KEYDATA: - case H_OFFPAGE: - recno = 1; - break; - case H_DUPLICATE: - p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - pend = p + - LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); - for (; p < pend; recno++) { - /* p may be odd, so copy rather than just dereffing */ - memcpy(&len, p, sizeof(db_indx_t)); - p += 2 * sizeof(db_indx_t) + len; - } - - break; - default: - ret = __db_pgfmt(dbp->dbenv, hcp->pgno); - goto err; - } - - *recnop = recno; - -err: if ((t_ret = - __memp_fput(mpf, hcp->page, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - hcp->page = NULL; - return (ret); -} - -static int -__hamc_del(dbc) - DBC *dbc; -{ - DB *dbp; - DBT repldbt; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - int ret, t_ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - if (F_ISSET(hcp, H_DELETED)) - return (DB_NOTFOUND); - - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - - if ((ret = __ham_get_cpage(dbc, DB_LOCK_WRITE)) != 0) - goto out; - - /* Off-page duplicates. */ - if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) - goto out; - - if ((ret = __memp_dirty(mpf, - &hcp->page, dbc->txn, dbc->priority, 0)) != 0) - goto out; - - if (F_ISSET(hcp, H_ISDUP)) { /* On-page duplicate. */ - if (hcp->dup_off == 0 && - DUP_SIZE(hcp->dup_len) == LEN_HDATA(dbp, hcp->page, - hcp->hdr->dbmeta.pagesize, hcp->indx)) - ret = __ham_del_pair(dbc, 0); - else { - repldbt.flags = 0; - F_SET(&repldbt, DB_DBT_PARTIAL); - repldbt.doff = hcp->dup_off; - repldbt.dlen = DUP_SIZE(hcp->dup_len); - repldbt.size = 0; - repldbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, - hcp->indx)); - if ((ret = __ham_replpair(dbc, &repldbt, 0)) == 0) { - hcp->dup_tlen -= DUP_SIZE(hcp->dup_len); - F_SET(hcp, H_DELETED); - ret = __hamc_update(dbc, DUP_SIZE(hcp->dup_len), - DB_HAM_CURADJ_DEL, 1); - } - } - } else /* Not a duplicate */ - ret = __ham_del_pair(dbc, 0); - -out: if (hcp->page != NULL) { - if ((t_ret = __memp_fput(mpf, - hcp->page, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - hcp->page = NULL; - } - if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __hamc_dup -- - * Duplicate a hash cursor, such that the new one holds appropriate - * locks for the position of the original. - * - * PUBLIC: int __hamc_dup __P((DBC *, DBC *)); - */ -int -__hamc_dup(orig_dbc, new_dbc) - DBC *orig_dbc, *new_dbc; -{ - HASH_CURSOR *orig, *new; - - orig = (HASH_CURSOR *)orig_dbc->internal; - new = (HASH_CURSOR *)new_dbc->internal; - - new->bucket = orig->bucket; - new->lbucket = orig->lbucket; - new->dup_off = orig->dup_off; - new->dup_len = orig->dup_len; - new->dup_tlen = orig->dup_tlen; - - if (F_ISSET(orig, H_DELETED)) - F_SET(new, H_DELETED); - if (F_ISSET(orig, H_ISDUP)) - F_SET(new, H_ISDUP); - - return (0); -} - -static int -__hamc_get(dbc, key, data, flags, pgnop) - DBC *dbc; - DBT *key; - DBT *data; - u_int32_t flags; - db_pgno_t *pgnop; -{ - DB *dbp; - DB_ENV *dbenv; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - db_lockmode_t lock_type; - int ret, t_ret; - - hcp = (HASH_CURSOR *)dbc->internal; - dbp = dbc->dbp; - dbenv = dbp->dbenv; - mpf = dbp->mpf; - - /* Clear OR'd in additional bits so we can check for flag equality. */ - if (F_ISSET(dbc, DBC_RMW)) - lock_type = DB_LOCK_WRITE; - else - lock_type = DB_LOCK_READ; - - if ((ret = __ham_get_meta(dbc)) != 0) - return (ret); - hcp->seek_size = 0; - - ret = 0; - switch (flags) { - case DB_PREV_DUP: - F_SET(hcp, H_DUPONLY); - goto prev; - case DB_PREV_NODUP: - F_SET(hcp, H_NEXT_NODUP); - /* FALLTHROUGH */ - case DB_PREV: - if (IS_INITIALIZED(dbc)) { -prev: ret = __ham_item_prev(dbc, lock_type, pgnop); - break; - } - /* FALLTHROUGH */ - case DB_LAST: - ret = __ham_item_last(dbc, lock_type, pgnop); - break; - case DB_NEXT_DUP: - case DB_GET_BOTHC: - /* cgetchk has already determined that the cursor is set. */ - F_SET(hcp, H_DUPONLY); - goto next; - case DB_NEXT_NODUP: - F_SET(hcp, H_NEXT_NODUP); - /* FALLTHROUGH */ - case DB_NEXT: - if (IS_INITIALIZED(dbc)) { -next: ret = __ham_item_next(dbc, lock_type, pgnop); - break; - } - /* FALLTHROUGH */ - case DB_FIRST: - ret = __ham_item_first(dbc, lock_type, pgnop); - break; - case DB_SET: - case DB_SET_RANGE: - case DB_GET_BOTH: - case DB_GET_BOTH_RANGE: - ret = __ham_lookup(dbc, key, 0, lock_type, pgnop); - break; - case DB_CURRENT: - /* cgetchk has already determined that the cursor is set. */ - if (F_ISSET(hcp, H_DELETED)) { - ret = DB_KEYEMPTY; - goto err; - } - - ret = __ham_item(dbc, lock_type, pgnop); - break; - default: - ret = __db_unknown_flag(dbenv, "__hamc_get", flags); - break; - } - - /* - * Must always enter this loop to do error handling and - * check for big key/data pair. - */ - for (;;) { - if (ret != 0 && ret != DB_NOTFOUND) - goto err; - else if (F_ISSET(hcp, H_OK)) { - if (*pgnop == PGNO_INVALID) - ret = __ham_dup_return(dbc, data, flags); - break; - } else if (!F_ISSET(hcp, H_NOMORE)) { - __db_errx(dbenv, "H_NOMORE returned to __hamc_get"); - ret = EINVAL; - break; - } - - /* - * Ran out of entries in a bucket; change buckets. - */ - switch (flags) { - case DB_LAST: - case DB_PREV: - case DB_PREV_DUP: - case DB_PREV_NODUP: - ret = __memp_fput(mpf, hcp->page, dbc->priority); - hcp->page = NULL; - if (hcp->bucket == 0) { - ret = DB_NOTFOUND; - hcp->pgno = PGNO_INVALID; - goto err; - } - F_CLR(hcp, H_ISDUP); - hcp->bucket--; - hcp->indx = NDX_INVALID; - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - if (ret == 0) - ret = __ham_item_prev(dbc, lock_type, pgnop); - break; - case DB_FIRST: - case DB_NEXT: - case DB_NEXT_NODUP: - ret = __memp_fput(mpf, hcp->page, dbc->priority); - hcp->page = NULL; - hcp->indx = NDX_INVALID; - hcp->bucket++; - F_CLR(hcp, H_ISDUP); - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - if (hcp->bucket > hcp->hdr->max_bucket) { - ret = DB_NOTFOUND; - hcp->pgno = PGNO_INVALID; - goto err; - } - if (ret == 0) - ret = __ham_item_next(dbc, lock_type, pgnop); - break; - case DB_GET_BOTH: - case DB_GET_BOTHC: - case DB_GET_BOTH_RANGE: - case DB_NEXT_DUP: - case DB_SET: - case DB_SET_RANGE: - /* Key not found. */ - ret = DB_NOTFOUND; - goto err; - case DB_CURRENT: - /* - * This should only happen if you are doing deletes and - * reading with concurrent threads and not doing proper - * locking. We return the same error code as we would - * if the cursor were deleted. - */ - ret = DB_KEYEMPTY; - goto err; - default: - DB_ASSERT(dbenv, 0); - } - } - -err: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - - F_CLR(hcp, H_DUPONLY); - F_CLR(hcp, H_NEXT_NODUP); - - return (ret); -} - -/* - * __ham_bulk -- Return bulk data from a hash table. - */ -static int -__ham_bulk(dbc, data, flags) - DBC *dbc; - DBT *data; - u_int32_t flags; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *cp; - PAGE *pg; - db_indx_t dup_len, dup_off, dup_tlen, indx, *inp; - db_lockmode_t lock_mode; - db_pgno_t pgno; - int32_t *endp, *offp, *saveoff; - u_int32_t key_off, key_size, pagesize, size, space; - u_int8_t *dbuf, *dp, *hk, *np, *tmp; - int is_dup, is_key; - int need_pg, next_key, no_dup, ret, t_ret; - - ret = 0; - key_off = 0; - dup_len = dup_off = dup_tlen = 0; - size = 0; - dbp = dbc->dbp; - pagesize = dbp->pgsize; - mpf = dbp->mpf; - cp = (HASH_CURSOR *)dbc->internal; - is_key = LF_ISSET(DB_MULTIPLE_KEY) ? 1 : 0; - next_key = is_key && LF_ISSET(DB_OPFLAGS_MASK) != DB_NEXT_DUP; - no_dup = LF_ISSET(DB_OPFLAGS_MASK) == DB_NEXT_NODUP; - dbuf = data->data; - np = dp = dbuf; - - /* Keep track of space that is left. There is an termination entry */ - space = data->ulen; - space -= sizeof(*offp); - - /* Build the offset/size table from the end up. */ - endp = (int32_t *) ((u_int8_t *)dbuf + data->ulen); - endp--; - offp = endp; - - key_size = 0; - lock_mode = F_ISSET(dbc, DBC_RMW) ? DB_LOCK_WRITE: DB_LOCK_READ; - -next_pg: - need_pg = 1; - indx = cp->indx; - pg = cp->page; - inp = P_INP(dbp, pg); - - do { - if (is_key) { - hk = H_PAIRKEY(dbp, pg, indx); - if (HPAGE_PTYPE(hk) == H_OFFPAGE) { - memcpy(&key_size, - HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - memcpy(&pgno, - HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - size = key_size; - if (key_size > space) - goto get_key_space; - if ((ret = __bam_bulk_overflow( - dbc, key_size, pgno, np)) != 0) - return (ret); - space -= key_size; - key_off = (u_int32_t)(np - dbuf); - np += key_size; - } else { - if (need_pg) { - dp = np; - size = pagesize - HOFFSET(pg); - if (space < size) { -get_key_space: - if (offp == endp) { - data->size = (u_int32_t) - DB_ALIGN(size + - pagesize, 1024); - return - (DB_BUFFER_SMALL); - } - goto back_up; - } - memcpy(dp, - (u_int8_t *)pg + HOFFSET(pg), size); - need_pg = 0; - space -= size; - np += size; - } - key_size = LEN_HKEY(dbp, pg, pagesize, indx); - key_off = ((inp[indx] - HOFFSET(pg)) + - (u_int32_t)(dp - dbuf)) + - SSZA(HKEYDATA, data); - } - } - - hk = H_PAIRDATA(dbp, pg, indx); - switch (HPAGE_PTYPE(hk)) { - case H_DUPLICATE: - case H_KEYDATA: - if (need_pg) { - dp = np; - size = pagesize - HOFFSET(pg); - if (space < size) { -back_up: - if (indx != 0) { - indx -= 2; - /* XXX - * It's not clear that this is - * the right way to fix this, - * but here goes. - * If we are backing up onto a - * duplicate, then we need to - * position ourselves at the - * end of the duplicate set. - * We probably need to make - * this work for H_OFFDUP too. - * It might be worth making a - * dummy cursor and calling - * __ham_item_prev. - */ - tmp = H_PAIRDATA(dbp, pg, indx); - if (HPAGE_PTYPE(tmp) == - H_DUPLICATE) { - dup_off = dup_tlen = - LEN_HDATA(dbp, pg, - pagesize, indx + 1); - memcpy(&dup_len, - HKEYDATA_DATA(tmp), - sizeof(db_indx_t)); - } else { - is_dup = 0; - dup_len = 0; - dup_off = 0; - dup_tlen = 0; - F_CLR(cp, H_ISDUP); - } - goto get_space; - } - /* indx == 0 */ - cp->dup_len = dup_len; - cp->dup_off = dup_off; - cp->dup_tlen = dup_tlen; - if ((ret = __ham_item_prev(dbc, - lock_mode, &pgno)) != 0) { - if (ret != DB_NOTFOUND) - return (ret); - if ((ret = __memp_fput(mpf, - cp->page, - dbc->priority)) != 0) - return (ret); - cp->page = NULL; - if (cp->bucket == 0) { - cp->indx = indx = - NDX_INVALID; - goto get_space; - } - if ((ret = - __ham_get_meta(dbc)) != 0) - return (ret); - - cp->bucket--; - cp->pgno = BUCKET_TO_PAGE(cp, - cp->bucket); - cp->indx = NDX_INVALID; - if ((ret = __ham_release_meta( - dbc)) != 0) - return (ret); - if ((ret = __ham_item_prev(dbc, - lock_mode, &pgno)) != 0) - return (ret); - } - indx = cp->indx; -get_space: - /* - * See if we put any data in the buffer. - */ - if (offp >= endp || - F_ISSET(dbc, DBC_TRANSIENT)) { - data->size = (u_int32_t) - DB_ALIGN(size + - data->ulen - space, 1024); - return (DB_BUFFER_SMALL); - } - /* - * Don't continue; we're all out - * of space, even though we're - * returning success. - */ - next_key = 0; - break; - } - memcpy(dp, (u_int8_t *)pg + HOFFSET(pg), size); - need_pg = 0; - space -= size; - np += size; - } - - /* - * We're about to crack the offset(s) and length(s) - * out of an H_KEYDATA or H_DUPLICATE item. - * There are three cases: - * 1. We were moved into a duplicate set by - * the standard hash cursor code. Respect - * the dup_off and dup_tlen we were given. - * 2. We stumbled upon a duplicate set while - * walking the page on our own. We need to - * recognize it as a dup and set dup_off and - * dup_tlen. - * 3. The current item is not a dup. - */ - if (F_ISSET(cp, H_ISDUP)) { - /* Case 1 */ - is_dup = 1; - dup_len = cp->dup_len; - dup_off = cp->dup_off; - dup_tlen = cp->dup_tlen; - } else if (HPAGE_PTYPE(hk) == H_DUPLICATE) { - /* Case 2 */ - is_dup = 1; - /* - * If we run out of memory and bail, - * make sure the fact we're in a dup set - * isn't ignored later. - */ - F_SET(cp, H_ISDUP); - dup_off = 0; - memcpy(&dup_len, - HKEYDATA_DATA(hk), sizeof(db_indx_t)); - dup_tlen = LEN_HDATA(dbp, pg, pagesize, indx); - } else { - /* Case 3 */ - is_dup = 0; - dup_len = 0; - dup_off = 0; - dup_tlen = 0; - } - - do { - space -= (is_key ? 4 : 2) * sizeof(*offp); - size += (is_key ? 4 : 2) * sizeof(*offp); - /* - * Since space is an unsigned, if we happen - * to wrap, then this comparison will turn out - * to be true. XXX Wouldn't it be better to - * simply check above that space is greater than - * the value we're about to subtract??? - */ - if (space > data->ulen) { - if (!is_dup || dup_off == 0) - goto back_up; - dup_off -= (db_indx_t) - DUP_SIZE((u_int32_t)offp[1]); - goto get_space; - } - if (is_key) { - *offp-- = (int32_t)key_off; - *offp-- = (int32_t)key_size; - } - if (is_dup) { - *offp-- = (int32_t)( - ((inp[indx + 1] - HOFFSET(pg)) + - dp - dbuf) + SSZA(HKEYDATA, data) + - dup_off + sizeof(db_indx_t)); - memcpy(&dup_len, - HKEYDATA_DATA(hk) + dup_off, - sizeof(db_indx_t)); - dup_off += DUP_SIZE(dup_len); - *offp-- = dup_len; - } else { - *offp-- = (int32_t)( - ((inp[indx + 1] - HOFFSET(pg)) + - dp - dbuf) + SSZA(HKEYDATA, data)); - *offp-- = LEN_HDATA(dbp, pg, - pagesize, indx); - } - } while (is_dup && dup_off < dup_tlen && no_dup == 0); - F_CLR(cp, H_ISDUP); - break; - case H_OFFDUP: - memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - space -= 2 * sizeof(*offp); - if (space > data->ulen) - goto back_up; - - if (is_key) { - space -= 2 * sizeof(*offp); - if (space > data->ulen) - goto back_up; - *offp-- = (int32_t)key_off; - *offp-- = (int32_t)key_size; - } - saveoff = offp; - if ((ret = __bam_bulk_duplicates(dbc, - pgno, dbuf, is_key ? offp + 2 : NULL, - &offp, &np, &space, no_dup)) != 0) { - if (ret == DB_BUFFER_SMALL) { - size = space; - space = 0; - if (is_key && saveoff == offp) { - offp += 2; - goto back_up; - } - goto get_space; - } - return (ret); - } - break; - case H_OFFPAGE: - space -= (is_key ? 4 : 2) * sizeof(*offp); - if (space > data->ulen) - goto back_up; - - memcpy(&size, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - memcpy(&pgno, HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - if (size > space) - goto back_up; - - if ((ret = - __bam_bulk_overflow(dbc, size, pgno, np)) != 0) - return (ret); - - if (is_key) { - *offp-- = (int32_t)key_off; - *offp-- = (int32_t)key_size; - } - - *offp-- = (int32_t)(np - dbuf); - *offp-- = (int32_t)size; - - np += size; - space -= size; - break; - default: - /* Do nothing. */ - break; - } - } while (next_key && (indx += 2) < NUM_ENT(pg)); - - cp->indx = indx; - cp->dup_len = dup_len; - cp->dup_off = dup_off; - cp->dup_tlen = dup_tlen; - - /* If we are off the page then try to the next page. */ - if (ret == 0 && next_key && indx >= NUM_ENT(pg)) { - if ((ret = __ham_item_next(dbc, lock_mode, &pgno)) == 0) - goto next_pg; - if (ret != DB_NOTFOUND) - return (ret); - if ((ret = __memp_fput(dbc->dbp->mpf, - cp->page, dbc->priority)) != 0) - return (ret); - cp->page = NULL; - if ((ret = __ham_get_meta(dbc)) != 0) - return (ret); - - cp->bucket++; - if (cp->bucket > cp->hdr->max_bucket) { - /* - * Restore cursor to its previous state. We're past - * the last item in the last bucket, so the next - * DBC->get(DB_NEXT) will return DB_NOTFOUND. - */ - cp->bucket--; - ret = DB_NOTFOUND; - } else { - /* - * Start on the next bucket. - * - * Note that if this new bucket happens to be empty, - * but there's another non-empty bucket after it, - * we'll return early. This is a rare case, and we - * don't guarantee any particular number of keys - * returned on each call, so just let the next call - * to bulk get move forward by yet another bucket. - */ - cp->pgno = BUCKET_TO_PAGE(cp, cp->bucket); - cp->indx = NDX_INVALID; - F_CLR(cp, H_ISDUP); - ret = __ham_item_next(dbc, lock_mode, &pgno); - } - - if ((t_ret = __ham_release_meta(dbc)) != 0) - return (t_ret); - if (ret == 0) - goto next_pg; - if (ret != DB_NOTFOUND) - return (ret); - } - *offp = -1; - return (0); -} - -static int -__hamc_put(dbc, key, data, flags, pgnop) - DBC *dbc; - DBT *key; - DBT *data; - u_int32_t flags; - db_pgno_t *pgnop; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - DBT tmp_val, *myval; - HASH_CURSOR *hcp; - u_int32_t nbytes; - int ret, t_ret; - - /* - * The compiler doesn't realize that we only use this when ret is - * equal to 0 and that if ret is equal to 0, that we must have set - * myval. So, we initialize it here to shut the compiler up. - */ - COMPQUIET(myval, NULL); - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - if (F_ISSET(hcp, H_DELETED) && - flags != DB_KEYFIRST && flags != DB_KEYLAST) - return (DB_NOTFOUND); - - if ((ret = __ham_get_meta(dbc)) != 0) - goto err1; - - switch (flags) { - case DB_KEYLAST: - case DB_KEYFIRST: - case DB_NODUPDATA: - case DB_NOOVERWRITE: - nbytes = (ISBIG(hcp, key->size) ? HOFFPAGE_PSIZE : - HKEYDATA_PSIZE(key->size)) + - (ISBIG(hcp, data->size) ? HOFFPAGE_PSIZE : - HKEYDATA_PSIZE(data->size)); - if ((ret = __ham_lookup(dbc, - key, nbytes, DB_LOCK_WRITE, pgnop)) == DB_NOTFOUND) { - if (hcp->seek_found_page != PGNO_INVALID && - hcp->seek_found_page != hcp->pgno) { - if ((ret = __memp_fput(mpf, - hcp->page, dbc->priority)) != 0) - goto err2; - hcp->page = NULL; - hcp->pgno = hcp->seek_found_page; - hcp->indx = NDX_INVALID; - } - - if (F_ISSET(data, DB_DBT_PARTIAL) && data->doff != 0) { - /* - * A partial put, but the key does not exist - * and we are not beginning the write at 0. - * We must create a data item padded up to doff - * and then write the new bytes represented by - * val. - */ - if ((ret = __ham_init_dbt(dbp->dbenv, &tmp_val, - data->size + data->doff, - &dbc->my_rdata.data, - &dbc->my_rdata.ulen)) != 0) - goto err2; - - memset(tmp_val.data, 0, data->doff); - memcpy((u_int8_t *)tmp_val.data + - data->doff, data->data, data->size); - myval = &tmp_val; - } else - myval = (DBT *)data; - - ret = __ham_add_el(dbc, key, myval, H_KEYDATA); - goto done; - } else if (flags == DB_NOOVERWRITE && - !F_ISSET(hcp, H_DELETED)) { - if (*pgnop == PGNO_INVALID) - ret = DB_KEYEXIST; - else - ret = __bam_opd_exists(dbc, *pgnop); - if (ret != 0) - goto done; - } - break; - case DB_BEFORE: - case DB_AFTER: - case DB_CURRENT: - ret = __ham_item(dbc, DB_LOCK_WRITE, pgnop); - break; - default: - ret = __db_unknown_flag(dbp->dbenv, "__hamc_put", flags); - break; - } - - /* - * Invalidate any insert index found. So they are not reused - * in future inserts. - */ - hcp->seek_found_page = PGNO_INVALID; - hcp->seek_found_indx = NDX_INVALID; - - if (*pgnop == PGNO_INVALID && ret == 0) { - if ((ret = __memp_dirty(mpf, - &hcp->page, dbc->txn, dbc->priority, 0)) != 0) - goto done; - if (flags == DB_CURRENT || - ((flags == DB_KEYFIRST || - flags == DB_KEYLAST || flags == DB_NODUPDATA) && - !(F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK)))) - ret = __ham_overwrite(dbc, data, flags); - else - ret = __ham_add_dup(dbc, data, flags, pgnop); - } - -done: if (hcp->page != NULL) { - if ((t_ret = __memp_fput(mpf, - hcp->page, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if (t_ret == 0) - hcp->page = NULL; - } - - if (ret == 0 && F_ISSET(hcp, H_EXPAND)) { - ret = __ham_expand_table(dbc); - F_CLR(hcp, H_EXPAND); - /* If we are out of space, ignore the error. */ - if (ret == ENOSPC && dbc->txn == NULL) - ret = 0; - } - -err2: if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - -err1: return (ret); -} - -/********************************* UTILITIES ************************/ - -/* - * __ham_expand_table -- - */ -static int -__ham_expand_table(dbc) - DBC *dbc; -{ - DB *dbp; - DB_LOCK metalock; - DB_LSN lsn; - DB_MPOOLFILE *mpf; - DBMETA *mmeta; - HASH_CURSOR *hcp; - PAGE *h; - db_pgno_t pgno, mpgno; - u_int32_t logn, newalloc, new_bucket, old_bucket; - int got_meta, new_double, ret, t_ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_dirty_meta(dbc, 0)) != 0) - return (ret); - - LOCK_INIT(metalock); - mmeta = (DBMETA *) hcp->hdr; - mpgno = mmeta->pgno; - h = NULL; - newalloc = 0; - got_meta = 0; - - /* - * If the split point is about to increase, make sure that we - * have enough extra pages. The calculation here is weird. - * We'd like to do this after we've upped max_bucket, but it's - * too late then because we've logged the meta-data split. What - * we'll do between then and now is increment max bucket and then - * see what the log of one greater than that is; here we have to - * look at the log of max + 2. VERY NASTY STUFF. - * - * We figure out what we need to do, then we log it, then request - * the pages from mpool. We don't want to fail after extending - * the file. - * - * If the page we are about to split into has already been allocated, - * then we simply need to get it to get its LSN. If it hasn't yet - * been allocated, then we know it's LSN (0,0). - */ - - new_bucket = hcp->hdr->max_bucket + 1; - old_bucket = new_bucket & hcp->hdr->low_mask; - - new_double = hcp->hdr->max_bucket == hcp->hdr->high_mask; - logn = __db_log2(new_bucket); - - if (!new_double || hcp->hdr->spares[logn + 1] != PGNO_INVALID) { - /* Page exists; get it so we can get its LSN */ - pgno = BUCKET_TO_PAGE(hcp, new_bucket); - if ((ret = __memp_fget(mpf, &pgno, dbc->txn, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &h)) != 0) - goto err; - lsn = h->lsn; - } else { - /* Get the master meta-data page to do allocation. */ - if (F_ISSET(dbp, DB_AM_SUBDB)) { - mpgno = PGNO_BASE_MD; - if ((ret = __db_lget(dbc, - 0, mpgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - if ((ret = __memp_fget(mpf, &mpgno, dbc->txn, - DB_MPOOL_DIRTY, &mmeta)) != 0) - goto err; - got_meta = 1; - } - pgno = mmeta->last_pgno + 1; - ZERO_LSN(lsn); - newalloc = 1; - } - - /* Log the meta-data split first. */ - if (DBC_LOGGING(dbc)) { - /* - * We always log the page number of the first page of - * the allocation group. However, the LSN that we log - * is either the LSN on the first page (if we did not - * do the actual allocation here) or the LSN on the last - * page of the unit (if we did do the allocation here). - */ - if ((ret = __ham_metagroup_log(dbp, dbc->txn, - &lsn, 0, hcp->hdr->max_bucket, mpgno, &mmeta->lsn, - hcp->hdr->dbmeta.pgno, &hcp->hdr->dbmeta.lsn, - pgno, &lsn, newalloc, mmeta->last_pgno)) != 0) - goto err; - } else - LSN_NOT_LOGGED(lsn); - - hcp->hdr->dbmeta.lsn = lsn; - - if (new_double && hcp->hdr->spares[logn + 1] == PGNO_INVALID) { - /* - * We need to begin a new doubling and we have not allocated - * any pages yet. Read the last page in and initialize it to - * make the allocation contiguous. The pgno we calculated - * above is the first page allocated. The entry in spares is - * that page number minus any buckets already allocated (it - * simplifies bucket to page transaction). After we've set - * that, we calculate the last pgno. - */ - - pgno += hcp->hdr->max_bucket; - - if ((ret = __memp_fget(mpf, &pgno, dbc->txn, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &h)) != 0) - goto err; - - hcp->hdr->spares[logn + 1] = - (pgno - new_bucket) - hcp->hdr->max_bucket; - mmeta->last_pgno = pgno; - mmeta->lsn = lsn; - - P_INIT(h, dbp->pgsize, - pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - } - - /* Write out whatever page we ended up modifying. */ - h->lsn = lsn; - if ((ret = __memp_fput(mpf, h, dbc->priority)) != 0) - goto err; - h = NULL; - - /* - * Update the meta-data page of this hash database. - */ - hcp->hdr->max_bucket = new_bucket; - if (new_double) { - hcp->hdr->low_mask = hcp->hdr->high_mask; - hcp->hdr->high_mask = new_bucket | hcp->hdr->low_mask; - } - - /* Relocate records to the new bucket */ - ret = __ham_split_page(dbc, old_bucket, new_bucket); - -err: if (got_meta) - if ((t_ret = - __memp_fput(mpf, mmeta, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0) - ret = t_ret; - if (h != NULL) - if ((t_ret = __memp_fput(mpf, - h, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * PUBLIC: u_int32_t __ham_call_hash __P((DBC *, u_int8_t *, u_int32_t)); - */ -u_int32_t -__ham_call_hash(dbc, k, len) - DBC *dbc; - u_int8_t *k; - u_int32_t len; -{ - DB *dbp; - HASH_CURSOR *hcp; - HASH *hashp; - u_int32_t n, bucket; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - hashp = dbp->h_internal; - - n = (u_int32_t)(hashp->h_hash(dbp, k, len)); - - bucket = n & hcp->hdr->high_mask; - if (bucket > hcp->hdr->max_bucket) - bucket = bucket & hcp->hdr->low_mask; - return (bucket); -} - -/* - * Check for duplicates, and call __db_ret appropriately. Release - * everything held by the cursor. - */ -static int -__ham_dup_return(dbc, val, flags) - DBC *dbc; - DBT *val; - u_int32_t flags; -{ - DB *dbp; - HASH_CURSOR *hcp; - PAGE *pp; - DBT *myval, tmp_val; - db_indx_t ndx; - db_pgno_t pgno; - u_int32_t off, tlen; - u_int8_t *hk, type; - int cmp, ret; - db_indx_t len; - - /* Check for duplicate and return the first one. */ - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - ndx = H_DATAINDEX(hcp->indx); - type = HPAGE_TYPE(dbp, hcp->page, ndx); - pp = hcp->page; - myval = val; - - /* - * There are 4 cases: - * 1. We are not in duplicate, simply return; the upper layer - * will do the right thing. - * 2. We are looking at keys and stumbled onto a duplicate. - * 3. We are in the middle of a duplicate set. (ISDUP set) - * 4. We need to check for particular data match. - */ - - /* We should never get here with off-page dups. */ - DB_ASSERT(dbp->dbenv, type != H_OFFDUP); - - /* Case 1 */ - if (type != H_DUPLICATE && flags != DB_GET_BOTH && - flags != DB_GET_BOTHC && flags != DB_GET_BOTH_RANGE) - return (0); - - /* - * Here we check for the case where we just stumbled onto a - * duplicate. In this case, we do initialization and then - * let the normal duplicate code handle it. (Case 2) - */ - if (!F_ISSET(hcp, H_ISDUP) && type == H_DUPLICATE) { - F_SET(hcp, H_ISDUP); - hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, - hcp->hdr->dbmeta.pagesize, hcp->indx); - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - if (flags == DB_LAST || - flags == DB_PREV || flags == DB_PREV_NODUP) { - hcp->dup_off = 0; - do { - memcpy(&len, - HKEYDATA_DATA(hk) + hcp->dup_off, - sizeof(db_indx_t)); - hcp->dup_off += DUP_SIZE(len); - } while (hcp->dup_off < hcp->dup_tlen); - hcp->dup_off -= DUP_SIZE(len); - } else { - memcpy(&len, - HKEYDATA_DATA(hk), sizeof(db_indx_t)); - hcp->dup_off = 0; - } - hcp->dup_len = len; - } - - /* - * If we are retrieving a specific key/data pair, then we - * may need to adjust the cursor before returning data. - * Case 4 - */ - if (flags == DB_GET_BOTH || - flags == DB_GET_BOTHC || flags == DB_GET_BOTH_RANGE) { - if (F_ISSET(hcp, H_ISDUP)) { - /* - * If we're doing a join, search forward from the - * current position, not the beginning of the dup set. - */ - if (flags == DB_GET_BOTHC) - F_SET(hcp, H_CONTINUE); - - __ham_dsearch(dbc, val, &off, &cmp, flags); - - /* - * This flag is set nowhere else and is safe to - * clear unconditionally. - */ - F_CLR(hcp, H_CONTINUE); - hcp->dup_off = off; - } else { - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - if (((HKEYDATA *)hk)->type == H_OFFPAGE) { - memcpy(&tlen, - HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - memcpy(&pgno, - HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - if ((ret = __db_moff(dbp, dbc->txn, val, - pgno, tlen, dbp->dup_compare, &cmp)) != 0) - return (ret); - } else { - /* - * We do not zero tmp_val since the comparison - * routines may only look at data and size. - */ - tmp_val.data = HKEYDATA_DATA(hk); - tmp_val.size = LEN_HDATA(dbp, hcp->page, - dbp->pgsize, hcp->indx); - cmp = dbp->dup_compare == NULL ? - __bam_defcmp(dbp, &tmp_val, val) : - dbp->dup_compare(dbp, &tmp_val, val); - } - } - - if (cmp != 0) - return (DB_NOTFOUND); - } - - /* - * If we've already got the data for this value, or we're doing a bulk - * get, we don't want to return the data. - */ - if (F_ISSET(dbc, DBC_MULTIPLE | DBC_MULTIPLE_KEY) || - F_ISSET(val, DB_DBT_ISSET)) - return (0); - - /* - * Now, everything is initialized, grab a duplicate if - * necessary. - */ - if (F_ISSET(hcp, H_ISDUP)) { /* Case 3 */ - /* - * Copy the DBT in case we are retrieving into user - * memory and we need the parameters for it. If the - * user requested a partial, then we need to adjust - * the user's parameters to get the partial of the - * duplicate which is itself a partial. - */ - memcpy(&tmp_val, val, sizeof(*val)); - - if (F_ISSET(&tmp_val, DB_DBT_PARTIAL)) { - /* - * Take the user's length unless it would go - * beyond the end of the duplicate. - */ - if (tmp_val.doff > hcp->dup_len) - tmp_val.dlen = 0; - else if (tmp_val.dlen + tmp_val.doff > hcp->dup_len) - tmp_val.dlen = hcp->dup_len - tmp_val.doff; - - } else { - F_SET(&tmp_val, DB_DBT_PARTIAL); - tmp_val.dlen = hcp->dup_len; - tmp_val.doff = 0; - } - - /* - * Set offset to the appropriate place within the - * current duplicate -- need to take into account - * both the dup_off and the current duplicate's - * length. - */ - tmp_val.doff += hcp->dup_off + sizeof(db_indx_t); - - myval = &tmp_val; - } - - /* - * Finally, if we had a duplicate, pp, ndx, and myval should be - * set appropriately. - */ - if ((ret = __db_ret(dbp, dbc->txn, - pp, ndx, myval, &dbc->rdata->data, &dbc->rdata->ulen)) != 0) - return (ret); - - /* - * In case we sent a temporary off to db_ret, set the real - * return values. - */ - val->data = myval->data; - val->size = myval->size; - - F_SET(val, DB_DBT_ISSET); - - return (0); -} - -static int -__ham_overwrite(dbc, nval, flags) - DBC *dbc; - DBT *nval; - u_int32_t flags; -{ - DB *dbp; - DB_ENV *dbenv; - HASH_CURSOR *hcp; - DBT *myval, tmp_val, tmp_val2; - void *newrec; - u_int8_t *hk, *p; - u_int32_t len, nondup_size; - db_indx_t newsize; - int ret; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - hcp = (HASH_CURSOR *)dbc->internal; - if (F_ISSET(hcp, H_ISDUP)) { - /* - * This is an overwrite of a duplicate. We should never - * be off-page at this point. - */ - DB_ASSERT(dbenv, hcp->opd == NULL); - /* On page dups */ - if (F_ISSET(nval, DB_DBT_PARTIAL)) { - /* - * We're going to have to get the current item, then - * construct the record, do any padding and do a - * replace. - */ - memset(&tmp_val, 0, sizeof(tmp_val)); - if ((ret = - __ham_dup_return(dbc, &tmp_val, DB_CURRENT)) != 0) - return (ret); - - /* Figure out new size. */ - nondup_size = tmp_val.size; - newsize = nondup_size; - - /* - * Three cases: - * 1. strictly append (may need to allocate space - * for pad bytes; really gross). - * 2. overwrite some and append. - * 3. strictly overwrite. - */ - if (nval->doff > nondup_size) - newsize += - ((nval->doff - nondup_size) + nval->size); - else if (nval->doff + nval->dlen > nondup_size) - newsize += nval->size - - (nondup_size - nval->doff); - else - newsize += nval->size - nval->dlen; - - /* - * Make sure that the new size doesn't put us over - * the onpage duplicate size in which case we need - * to convert to off-page duplicates. - */ - if (ISBIG(hcp, - (hcp->dup_tlen - nondup_size) + newsize)) { - if ((ret = __ham_dup_convert(dbc)) != 0) - return (ret); - return (hcp->opd->am_put(hcp->opd, - NULL, nval, flags, NULL)); - } - - if ((ret = __os_malloc(dbp->dbenv, - DUP_SIZE(newsize), &newrec)) != 0) - return (ret); - memset(&tmp_val2, 0, sizeof(tmp_val2)); - F_SET(&tmp_val2, DB_DBT_PARTIAL); - - /* Construct the record. */ - p = newrec; - /* Initial size. */ - memcpy(p, &newsize, sizeof(db_indx_t)); - p += sizeof(db_indx_t); - - /* First part of original record. */ - len = nval->doff > tmp_val.size - ? tmp_val.size : nval->doff; - memcpy(p, tmp_val.data, len); - p += len; - - if (nval->doff > tmp_val.size) { - /* Padding */ - memset(p, 0, nval->doff - tmp_val.size); - p += nval->doff - tmp_val.size; - } - - /* New bytes */ - memcpy(p, nval->data, nval->size); - p += nval->size; - - /* End of original record (if there is any) */ - if (nval->doff + nval->dlen < tmp_val.size) { - len = (tmp_val.size - nval->doff) - nval->dlen; - memcpy(p, (u_int8_t *)tmp_val.data + - nval->doff + nval->dlen, len); - p += len; - } - - /* Final size. */ - memcpy(p, &newsize, sizeof(db_indx_t)); - - /* - * Make sure that the caller isn't corrupting - * the sort order. - */ - if (dbp->dup_compare != NULL) { - tmp_val2.data = - (u_int8_t *)newrec + sizeof(db_indx_t); - tmp_val2.size = newsize; - if (dbp->dup_compare( - dbp, &tmp_val, &tmp_val2) != 0) { - __os_free(dbenv, newrec); - return (__db_duperr(dbp, flags)); - } - } - - tmp_val2.data = newrec; - tmp_val2.size = DUP_SIZE(newsize); - tmp_val2.doff = hcp->dup_off; - tmp_val2.dlen = DUP_SIZE(hcp->dup_len); - - ret = __ham_replpair(dbc, &tmp_val2, 0); - __os_free(dbenv, newrec); - - /* Update cursor */ - if (ret != 0) - return (ret); - - if (newsize > nondup_size) { - if ((ret = __hamc_update(dbc, - (newsize - nondup_size), - DB_HAM_CURADJ_ADDMOD, 1)) != 0) - return (ret); - hcp->dup_tlen += (newsize - nondup_size); - } else { - if ((ret = __hamc_update(dbc, - (nondup_size - newsize), - DB_HAM_CURADJ_DELMOD, 1)) != 0) - return (ret); - hcp->dup_tlen -= (nondup_size - newsize); - } - hcp->dup_len = newsize; - return (0); - } else { - /* Check whether we need to convert to off page. */ - if (ISBIG(hcp, - (hcp->dup_tlen - hcp->dup_len) + nval->size)) { - if ((ret = __ham_dup_convert(dbc)) != 0) - return (ret); - return (hcp->opd->am_put(hcp->opd, - NULL, nval, flags, NULL)); - } - - /* Make sure we maintain sort order. */ - if (dbp->dup_compare != NULL) { - tmp_val2.data = - HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, - hcp->indx)) + hcp->dup_off + - sizeof(db_indx_t); - tmp_val2.size = hcp->dup_len; - if (dbp->dup_compare( - dbp, nval, &tmp_val2) != 0) { - __db_errx(dbenv, - "Existing data sorts differently from put data"); - return (EINVAL); - } - } - /* Overwriting a complete duplicate. */ - if ((ret = - __ham_make_dup(dbp->dbenv, nval, &tmp_val, - &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0) - return (ret); - /* Now fix what we are replacing. */ - tmp_val.doff = hcp->dup_off; - tmp_val.dlen = DUP_SIZE(hcp->dup_len); - - /* Update cursor */ - if (nval->size > hcp->dup_len) { - if ((ret = __hamc_update(dbc, - (nval->size - hcp->dup_len), - DB_HAM_CURADJ_ADDMOD, 1)) != 0) - return (ret); - hcp->dup_tlen += (nval->size - hcp->dup_len); - } else { - if ((ret = __hamc_update(dbc, - (hcp->dup_len - nval->size), - DB_HAM_CURADJ_DELMOD, 1)) != 0) - return (ret); - hcp->dup_tlen -= (hcp->dup_len - nval->size); - } - hcp->dup_len = (db_indx_t)nval->size; - } - myval = &tmp_val; - } else if (!F_ISSET(nval, DB_DBT_PARTIAL)) { - /* Put/overwrite */ - memcpy(&tmp_val, nval, sizeof(*nval)); - F_SET(&tmp_val, DB_DBT_PARTIAL); - tmp_val.doff = 0; - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - if (HPAGE_PTYPE(hk) == H_OFFPAGE) - memcpy(&tmp_val.dlen, - HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - else - tmp_val.dlen = LEN_HDATA(dbp, hcp->page, - hcp->hdr->dbmeta.pagesize, hcp->indx); - myval = &tmp_val; - } else - /* Regular partial put */ - myval = nval; - - return (__ham_replpair(dbc, myval, 0)); -} - -/* - * Given a key and a cursor, sets the cursor to the page/ndx on which - * the key resides. If the key is found, the cursor H_OK flag is set - * and the pagep, bndx, pgno (dpagep, dndx, dpgno) fields are set. - * If the key is not found, the H_OK flag is not set. If the sought - * field is non-0, the pagep, bndx, pgno (dpagep, dndx, dpgno) fields - * are set indicating where an add might take place. If it is 0, - * none of the cursor pointer field are valid. - */ -static int -__ham_lookup(dbc, key, sought, mode, pgnop) - DBC *dbc; - const DBT *key; - u_int32_t sought; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - DB *dbp; - HASH_CURSOR *hcp; - db_pgno_t next_pgno; - int match, ret; - u_int8_t *dk; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - - /* - * Set up cursor so that we're looking for space to add an item - * as we cycle through the pages looking for the key. - */ - if ((ret = __ham_item_reset(dbc)) != 0) - return (ret); - hcp->seek_size = sought; - - hcp->bucket = __ham_call_hash(dbc, (u_int8_t *)key->data, key->size); - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - /* look though all pages in the bucket for the key */ - if ((ret = __ham_get_cpage(dbc, mode)) != 0) - return (ret); - - *pgnop = PGNO_INVALID; - if (hcp->indx == NDX_INVALID) { - hcp->indx = 0; - F_CLR(hcp, H_ISDUP); - } - while (hcp->pgno != PGNO_INVALID) { - /* Are we looking for space to insert an item. */ - if (hcp->seek_size != 0 && - hcp->seek_found_page == PGNO_INVALID && - hcp->seek_size < P_FREESPACE(dbp, hcp->page)) { - hcp->seek_found_page = hcp->pgno; - hcp->seek_found_indx = NDX_INVALID; - } - - if ((ret = __ham_getindex(dbp, dbc->txn, hcp->page, key, - H_KEYDATA, &match, &hcp->indx)) != 0) - return (ret); - - /* - * If this is the first page in the bucket with space for - * inserting the requested item. Store the insert index to - * save having to look it up again later. - */ - if (hcp->seek_found_page == hcp->pgno) - hcp->seek_found_indx = hcp->indx; - - if (match == 0) { - F_SET(hcp, H_OK); - dk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - if (HPAGE_PTYPE(dk) == H_OFFDUP) - memcpy(pgnop, HOFFDUP_PGNO(dk), - sizeof(db_pgno_t)); - return (0); - } - - /* move the cursor to the next page. */ - if (NEXT_PGNO(hcp->page) == PGNO_INVALID) - break; - next_pgno = NEXT_PGNO(hcp->page); - hcp->indx = 0; - if ((ret = __ham_next_cpage(dbc, next_pgno)) != 0) - return (ret); - } - F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); -} - -/* - * __ham_init_dbt -- - * Initialize a dbt using some possibly already allocated storage - * for items. - * - * PUBLIC: int __ham_init_dbt __P((DB_ENV *, - * PUBLIC: DBT *, u_int32_t, void **, u_int32_t *)); - */ -int -__ham_init_dbt(dbenv, dbt, size, bufp, sizep) - DB_ENV *dbenv; - DBT *dbt; - u_int32_t size; - void **bufp; - u_int32_t *sizep; -{ - int ret; - - memset(dbt, 0, sizeof(*dbt)); - if (*sizep < size) { - if ((ret = __os_realloc(dbenv, size, bufp)) != 0) { - *sizep = 0; - return (ret); - } - *sizep = size; - } - dbt->data = *bufp; - dbt->size = size; - return (0); -} - -/* - * Adjust the cursor after an insert or delete. The cursor passed is - * the one that was operated upon; we just need to check any of the - * others. - * - * len indicates the length of the item added/deleted - * add indicates if the item indicated by the cursor has just been - * added (add == 1) or deleted (add == 0). - * dup indicates if the addition occurred into a duplicate set. - * - * PUBLIC: int __hamc_update - * PUBLIC: __P((DBC *, u_int32_t, db_ham_curadj, int)); - */ -int -__hamc_update(dbc, len, operation, is_dup) - DBC *dbc; - u_int32_t len; - db_ham_curadj operation; - int is_dup; -{ - DB *dbp, *ldbp; - DBC *cp; - DB_ENV *dbenv; - DB_LSN lsn; - DB_TXN *my_txn; - HASH_CURSOR *hcp, *lcp; - int found, ret, was_mod, was_add; - u_int32_t order; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - hcp = (HASH_CURSOR *)dbc->internal; - - /* - * Adjustment will only be logged if this is a subtransaction. - * Only subtransactions can abort and effect their parent - * transactions cursors. - */ - - my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; - found = 0; - - MUTEX_LOCK(dbenv, dbenv->mtx_dblist); - - switch (operation) { - case DB_HAM_CURADJ_DEL: - was_mod = 0; - was_add = 0; - break; - case DB_HAM_CURADJ_ADD: - was_mod = 0; - was_add = 1; - break; - case DB_HAM_CURADJ_DELMOD: - was_mod = 1; - was_add = 0; - break; - case DB_HAM_CURADJ_ADDMOD: - was_mod = 1; - was_add = 1; - break; - default: - DB_ASSERT(dbenv, "Invalid arg to hamc_update"); - return (EINVAL); - } - - /* - * Calculate the order of this deleted record. - * This will be one greater than any cursor that is pointing - * at this record and already marked as deleted. - */ - order = 0; - if (was_add == 0) { - FIND_FIRST_DB_MATCH(dbenv, dbp, ldbp); - for (order = 1; - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = TAILQ_NEXT(ldbp, dblistlinks)) { - MUTEX_LOCK(dbenv, dbp->mutex); - TAILQ_FOREACH(cp, &ldbp->active_queue, links) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - lcp = (HASH_CURSOR *)cp->internal; - if (F_ISSET(lcp, H_DELETED) && - hcp->pgno == lcp->pgno && - hcp->indx == lcp->indx && - order <= lcp->order && - (!is_dup || hcp->dup_off == lcp->dup_off) && - !MVCC_SKIP_CURADJ(cp, lcp->pgno)) - order = lcp->order + 1; - } - MUTEX_UNLOCK(dbenv, dbp->mutex); - } - hcp->order = order; - } - - FIND_FIRST_DB_MATCH(dbenv, dbp, ldbp); - for (; - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = TAILQ_NEXT(ldbp, dblistlinks)) { - MUTEX_LOCK(dbenv, dbp->mutex); - TAILQ_FOREACH(cp, &ldbp->active_queue, links) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - - lcp = (HASH_CURSOR *)cp->internal; - - if (lcp->pgno != hcp->pgno || - lcp->indx == NDX_INVALID || - MVCC_SKIP_CURADJ(cp, lcp->pgno)) - continue; - - if (my_txn != NULL && cp->txn != my_txn) - found = 1; - - if (!is_dup) { - if (was_add == 1) { - /* - * This routine is not called to add - * non-dup records which are always put - * at the end. It is only called from - * recovery in this case and the - * cursor will be marked deleted. - * We are "undeleting" so unmark all - * cursors with the same order. - */ - if (lcp->indx == hcp->indx && - F_ISSET(lcp, H_DELETED)) { - if (lcp->order == hcp->order) - F_CLR(lcp, H_DELETED); - else if (lcp->order > - hcp->order) { - - /* - * If we've moved this cursor's - * index, split its order - * number--i.e., decrement it by - * enough so that the lowest - * cursor moved has order 1. - * cp_arg->order is the split - * point, so decrement by it. - */ - lcp->order -= - hcp->order; - lcp->indx += 2; - } - } else if (lcp->indx >= hcp->indx) - lcp->indx += 2; - } else { - if (lcp->indx > hcp->indx) { - lcp->indx -= 2; - if (lcp->indx == hcp->indx && - F_ISSET(lcp, H_DELETED)) - lcp->order += order; - } else if (lcp->indx == hcp->indx && - !F_ISSET(lcp, H_DELETED)) { - F_SET(lcp, H_DELETED); - F_CLR(lcp, H_ISDUP); - lcp->order = order; - } - } - } else if (lcp->indx == hcp->indx) { - /* - * Handle duplicates. This routine is only - * called for on page dups. Off page dups are - * handled by btree/rtree code. - */ - if (was_add == 1) { - lcp->dup_tlen += len; - if (lcp->dup_off == hcp->dup_off && - F_ISSET(hcp, H_DELETED) && - F_ISSET(lcp, H_DELETED)) { - /* Abort of a delete. */ - if (lcp->order == hcp->order) - F_CLR(lcp, H_DELETED); - else if (lcp->order > - hcp->order) { - lcp->order -= - (hcp->order -1); - lcp->dup_off += len; - } - } else if (lcp->dup_off > - hcp->dup_off || (!was_mod && - lcp->dup_off == hcp->dup_off)) - lcp->dup_off += len; - } else { - lcp->dup_tlen -= len; - if (lcp->dup_off > hcp->dup_off) { - lcp->dup_off -= len; - if (lcp->dup_off == - hcp->dup_off && - F_ISSET(lcp, H_DELETED)) - lcp->order += order; - } else if (!was_mod && - lcp->dup_off == hcp->dup_off && - !F_ISSET(lcp, H_DELETED)) { - F_SET(lcp, H_DELETED); - lcp->order = order; - } - } - } - } - MUTEX_UNLOCK(dbenv, dbp->mutex); - } - MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist); - - if (found != 0 && DBC_LOGGING(dbc)) { - if ((ret = __ham_curadj_log(dbp, my_txn, &lsn, 0, hcp->pgno, - hcp->indx, len, hcp->dup_off, (int)operation, is_dup, - order)) != 0) - return (ret); - } - - return (0); -} - -/* - * __ham_get_clist -- - * - * Get a list of cursors either on a particular bucket or on a particular - * page and index combination. The former is so that we can update - * cursors on a split. The latter is so we can update cursors when we - * move items off page. - * - * PUBLIC: int __ham_get_clist __P((DB *, db_pgno_t, u_int32_t, DBC ***)); - */ -int -__ham_get_clist(dbp, pgno, indx, listp) - DB *dbp; - db_pgno_t pgno; - u_int32_t indx; - DBC ***listp; -{ - DB *ldbp; - DBC *cp; - DB_ENV *dbenv; - u_int nalloc, nused; - int ret; - - *listp = NULL; - dbenv = dbp->dbenv; - nalloc = nused = 0; - - /* - * Assume that finding anything is the exception, so optimize for - * the case where there aren't any. - */ - MUTEX_LOCK(dbenv, dbenv->mtx_dblist); - FIND_FIRST_DB_MATCH(dbenv, dbp, ldbp); - for (; - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = TAILQ_NEXT(ldbp, dblistlinks)) { - MUTEX_LOCK(dbenv, dbp->mutex); - TAILQ_FOREACH(cp, &ldbp->active_queue, links) - /* - * We match if cp->pgno matches the specified - * pgno, and if either the cp->indx matches - * or we weren't given an index. - */ - if (cp->internal->pgno == pgno && - (indx == NDX_INVALID || - cp->internal->indx == indx) && - !MVCC_SKIP_CURADJ(cp, pgno)) { - if (nused >= nalloc) { - nalloc += 10; - if ((ret = __os_realloc(dbp->dbenv, - nalloc * sizeof(HASH_CURSOR *), - listp)) != 0) - goto err; - } - (*listp)[nused++] = cp; - } - - MUTEX_UNLOCK(dbp->dbenv, dbp->mutex); - } - MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist); - - if (listp != NULL) { - if (nused >= nalloc) { - nalloc++; - if ((ret = __os_realloc(dbp->dbenv, - nalloc * sizeof(HASH_CURSOR *), listp)) != 0) - return (ret); - } - (*listp)[nused] = NULL; - } - return (0); -err: - MUTEX_UNLOCK(dbp->dbenv, dbp->mutex); - MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist); - return (ret); -} - -static int -__hamc_writelock(dbc) - DBC *dbc; -{ - DB_LOCK tmp_lock; - HASH_CURSOR *hcp; - int ret; - - /* - * All we need do is acquire the lock and let the off-page - * dup tree do its thing. - */ - if (!STD_LOCKING(dbc)) - return (0); - - hcp = (HASH_CURSOR *)dbc->internal; - ret = 0; - if ((!LOCK_ISSET(hcp->lock) || hcp->lock_mode != DB_LOCK_WRITE)) { - tmp_lock = hcp->lock; - if ((ret = __ham_lock_bucket(dbc, DB_LOCK_WRITE)) == 0 && - tmp_lock.mode != DB_LOCK_WWRITE) - ret = __LPUT(dbc, tmp_lock); - } - return (ret); -} diff --git a/db/hash/hash.src b/db/hash/hash.src deleted file mode 100644 index 91a07fc9f..000000000 --- a/db/hash/hash.src +++ /dev/null @@ -1,279 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - * - * $Id: hash.src,v 12.8 2007/05/17 15:15:38 bostic Exp $ - */ -/* - * Copyright (c) 1995, 1996 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1995, 1996 - * The President and Fellows of Harvard University. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -PREFIX __ham -DBPRIVATE - -INCLUDE #include "db_int.h" -INCLUDE #include "dbinc/crypto.h" -INCLUDE #include "dbinc/db_page.h" -INCLUDE #include "dbinc/db_dispatch.h" -INCLUDE #include "dbinc/db_am.h" -INCLUDE #include "dbinc/hash.h" -INCLUDE #include "dbinc/log.h" -INCLUDE #include "dbinc/txn.h" -INCLUDE - -/* - * HASH-insdel: used for hash to insert/delete a pair of entries onto a master - * page. The pair might be regular key/data pairs or they might be the - * structures that refer to off page items, duplicates or offpage duplicates. - * opcode - PUTPAIR/DELPAIR + big masks - * fileid - identifies the file referenced - * pgno - page within file - * ndx - index on the page of the item being added (item index) - * pagelsn - lsn on the page before the update - * key - the key being inserted - * data - the data being inserted - */ -BEGIN insdel 42 21 -ARG opcode u_int32_t lu -DB fileid int32_t ld -ARG pgno db_pgno_t lu -ARG ndx u_int32_t lu -POINTER pagelsn DB_LSN * lu -DBT key DBT s -DBT data DBT s -END - -/* - * Used to add and remove overflow pages. - * prev_pgno is the previous page that is going to get modified to - * point to this one. If this is the first page in a chain - * then prev_pgno should be PGNO_INVALID. - * new_pgno is the page being allocated. - * next_pgno is the page that follows this one. On allocation, - * this should be PGNO_INVALID. For deletes, it may exist. - * pagelsn is the old lsn on the page. - */ -BEGIN newpage 42 22 -ARG opcode u_int32_t lu -DB fileid int32_t ld -ARG prev_pgno db_pgno_t lu -POINTER prevlsn DB_LSN * lu -ARG new_pgno db_pgno_t lu -POINTER pagelsn DB_LSN * lu -ARG next_pgno db_pgno_t lu -POINTER nextlsn DB_LSN * lu -END - -/* - * Splitting requires two types of log messages. The second logs the - * data on the original page. To redo the split, we have to visit the - * new page (pages) and add the items back on the page if they are not - * yet there. - */ -BEGIN splitdata 42 24 -DB fileid int32_t ld -ARG opcode u_int32_t lu -ARG pgno db_pgno_t lu -PGDBT pageimage DBT s -POINTER pagelsn DB_LSN * lu -END - -/* - * HASH-replace: is used for hash to handle partial puts that only - * affect a single master page. - * fileid - identifies the file referenced - * pgno - page within file - * ndx - index on the page of the item being modified (item index) - * pagelsn - lsn on the page before the update - * off - offset in the old item where the new item is going. - * olditem - DBT that describes the part of the item being replaced. - * newitem - DBT of the new item. - * makedup - this was a replacement that made an item a duplicate. - */ -BEGIN replace 42 25 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -ARG ndx u_int32_t lu -POINTER pagelsn DB_LSN * lu -ARG off int32_t ld -DBT olditem DBT s -DBT newitem DBT s -ARG makedup u_int32_t lu -END - -/* - * Used when we empty the first page in a bucket and there are pages after - * it. The page after it gets copied into the bucket page (since bucket - * pages have to be in fixed locations). - * pgno: the bucket page - * pagelsn: the old LSN on the bucket page - * next_pgno: the page number of the next page - * nnext_pgno: page after next_pgno (may need to change its prev) - * nnextlsn: the LSN of nnext_pgno. - */ -BEGIN copypage 42 28 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER pagelsn DB_LSN * lu -ARG next_pgno db_pgno_t lu -POINTER nextlsn DB_LSN * lu -ARG nnext_pgno db_pgno_t lu -POINTER nnextlsn DB_LSN * lu -PGDBT page DBT s -END - -/* - * This record logs the meta-data aspects of a split operation. It has enough - * information so that we can record both an individual page allocation as well - * as a group allocation which we do because in sub databases, the pages in - * a hash doubling, must be contiguous. If we do a group allocation, the - * number of pages allocated is bucket + 1, pgno is the page number of the - * first newly allocated bucket. - * - * bucket: Old maximum bucket number. - * mmpgno: Master meta-data page number (0 if same as mpgno). - * mmetalsn: Lsn of the master meta-data page. - * mpgno: Meta-data page number. - * metalsn: Lsn of the meta-data page. - * pgno: Page allocated to bucket + 1 (first newly allocated page) - * pagelsn: Lsn of either the first page allocated (if newalloc == 0) or - * the last page allocated (if newalloc == 1). - * newalloc: 1 indicates that this record did the actual allocation; - * 0 indicates that the pages were already allocated from a - * previous (failed) allocation. - * last_pgno: the last page in the file before this op (4.3+). - */ -BEGIN_COMPAT metagroup 42 29 -DB fileid int32_t ld -ARG bucket u_int32_t lu -ARG mmpgno db_pgno_t lu -POINTER mmetalsn DB_LSN * lu -ARG mpgno db_pgno_t lu -POINTER metalsn DB_LSN * lu -ARG pgno db_pgno_t lu -POINTER pagelsn DB_LSN * lu -ARG newalloc u_int32_t lu -END - -BEGIN metagroup 43 29 -DB fileid int32_t ld -ARG bucket u_int32_t lu -ARG mmpgno db_pgno_t lu -POINTER mmetalsn DB_LSN * lu -ARG mpgno db_pgno_t lu -POINTER metalsn DB_LSN * lu -ARG pgno db_pgno_t lu -POINTER pagelsn DB_LSN * lu -ARG newalloc u_int32_t lu -ARG last_pgno db_pgno_t lu -END - -/* - * groupalloc - * - * This is used in conjunction with MPOOL_NEW_GROUP when we are creating - * a new database to make sure that we recreate or reclaim free pages - * when we allocate a chunk of contiguous ones during database creation. - * - * meta_lsn: meta-data lsn - * start_pgno: starting page number - * num: number of allocated pages - * unused: unused, historically the meta-data free list page number - * last_pgno: the last page in the file before this op (4.3+). - */ -BEGIN_COMPAT groupalloc 42 32 -DB fileid int32_t ld -POINTER meta_lsn DB_LSN * lu -ARG start_pgno db_pgno_t lu -ARG num u_int32_t lu -ARG free db_pgno_t lu -END - -BEGIN groupalloc 43 32 -DB fileid int32_t ld -POINTER meta_lsn DB_LSN * lu -ARG start_pgno db_pgno_t lu -ARG num u_int32_t lu -ARG unused db_pgno_t lu -ARG last_pgno db_pgno_t lu -END - -/* - * Records for backing out cursor adjustment. - * curadj - added or deleted a record or a dup - * within a record. - * pgno - page that was effected - * indx - indx of recrod effected. - * len - if a dup its length. - * dup_off - if a dup its offset - * add - 1 if add 0 if delete - * is_dup - 1 if dup 0 otherwise. - * order - order assigned to this deleted record or dup. - * - * chgpg - rmoved a page, move the records to a new page - * mode - CHGPG page was deleted or records move to new page. - * - SPLIT we split a bucket - * - DUP we convered to off page duplicates. - * old_pgno, new_pgno - old and new page numbers. - * old_index, new_index - old and new index numbers, NDX_INVALID if - * it effects all records on the page. - * For three opcodes new in 3.3 (DB_HAM_DELFIRSTPG, DELMIDPG, - * and DELLASTPG), we overload old_indx and new_indx to avoid - * needing a new log record type: old_indx stores the only - * indx of interest to these records, and new_indx stores the - * order that's assigned to the lowest deleted record we're - * moving. - */ -BEGIN curadj 42 33 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -ARG indx u_int32_t lu -ARG len u_int32_t lu -ARG dup_off u_int32_t lu -ARG add int ld -ARG is_dup int ld -ARG order u_int32_t lu -END - -BEGIN chgpg 42 34 -DB fileid int32_t ld -ARG mode db_ham_mode ld -ARG old_pgno db_pgno_t lu -ARG new_pgno db_pgno_t lu -ARG old_indx u_int32_t lu -ARG new_indx u_int32_t lu -END - diff --git a/db/hash/hash_auto.c b/db/hash/hash_auto.c deleted file mode 100644 index 7b1ad0b1a..000000000 --- a/db/hash/hash_auto.c +++ /dev/null @@ -1,2596 +0,0 @@ -/* Do not edit: automatically built by gen_rec.awk. */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/db_page.h" -#include "dbinc/db_dispatch.h" -#include "dbinc/db_am.h" -#include "dbinc/hash.h" -#include "dbinc/log.h" -#include "dbinc/txn.h" - -/* - * PUBLIC: int __ham_insdel_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, u_int32_t, DB_LSN *, - * PUBLIC: const DBT *, const DBT *)); - */ -int -__ham_insdel_log(dbp, txnp, ret_lsnp, flags, - opcode, pgno, ndx, pagelsn, key, - data) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t opcode; - db_pgno_t pgno; - u_int32_t ndx; - DB_LSN * pagelsn; - const DBT *key; - const DBT *data; -{ - DBT logrec; - DB_ENV *dbenv; - DB_TXNLOGREC *lr; - DB_LSN *lsnp, null_lsn, *rlsnp; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - dbenv = dbp->dbenv; - COMPQUIET(lr, NULL); - - rectype = DB___ham_insdel; - npad = 0; - rlsnp = ret_lsnp; - - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(dbenv, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*pagelsn) - + sizeof(u_int32_t) + (key == NULL ? 0 : key->size) - + sizeof(u_int32_t) + (data == NULL ? 0 : data->size); - if (CRYPTO_ON(dbenv)) { - npad = - ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(dbenv, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { - __os_free(dbenv, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)opcode; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)dbp->log_filename->id; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)ndx; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (pagelsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, pagelsn) != 0)) - return (ret); - } - memcpy(bp, pagelsn, sizeof(*pagelsn)); - } else - memset(bp, 0, sizeof(*pagelsn)); - bp += sizeof(*pagelsn); - - if (key == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &key->size, sizeof(key->size)); - bp += sizeof(key->size); - memcpy(bp, key->data, key->size); - bp += key->size; - } - - if (data == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &data->size, sizeof(data->size)); - bp += sizeof(data->size); - memcpy(bp, data->data, data->size); - bp += data->size; - } - - DB_ASSERT(dbenv, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - memcpy(logrec.data, &rectype, sizeof(rectype)); - - if (!IS_REP_CLIENT(dbenv)) - ret = __log_put(dbenv, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__ham_insdel_print(dbenv, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(dbenv, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(dbenv, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __ham_insdel_read __P((DB_ENV *, void *, __ham_insdel_args **)); - */ -int -__ham_insdel_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_insdel_args **argpp; -{ - __ham_insdel_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_insdel_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->opcode = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->ndx = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); - bp += sizeof(argp->pagelsn); - - memset(&argp->key, 0, sizeof(argp->key)); - memcpy(&argp->key.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->key.data = bp; - bp += argp->key.size; - - memset(&argp->data, 0, sizeof(argp->data)); - memcpy(&argp->data.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->data.data = bp; - bp += argp->data.size; - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_newpage_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, - * PUBLIC: db_pgno_t, DB_LSN *)); - */ -int -__ham_newpage_log(dbp, txnp, ret_lsnp, flags, - opcode, prev_pgno, prevlsn, new_pgno, pagelsn, - next_pgno, nextlsn) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t opcode; - db_pgno_t prev_pgno; - DB_LSN * prevlsn; - db_pgno_t new_pgno; - DB_LSN * pagelsn; - db_pgno_t next_pgno; - DB_LSN * nextlsn; -{ - DBT logrec; - DB_ENV *dbenv; - DB_TXNLOGREC *lr; - DB_LSN *lsnp, null_lsn, *rlsnp; - u_int32_t uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - dbenv = dbp->dbenv; - COMPQUIET(lr, NULL); - - rectype = DB___ham_newpage; - npad = 0; - rlsnp = ret_lsnp; - - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(dbenv, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*prevlsn) - + sizeof(u_int32_t) - + sizeof(*pagelsn) - + sizeof(u_int32_t) - + sizeof(*nextlsn); - if (CRYPTO_ON(dbenv)) { - npad = - ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(dbenv, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { - __os_free(dbenv, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)opcode; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)dbp->log_filename->id; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)prev_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (prevlsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(prevlsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, prevlsn) != 0)) - return (ret); - } - memcpy(bp, prevlsn, sizeof(*prevlsn)); - } else - memset(bp, 0, sizeof(*prevlsn)); - bp += sizeof(*prevlsn); - - uinttmp = (u_int32_t)new_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (pagelsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, pagelsn) != 0)) - return (ret); - } - memcpy(bp, pagelsn, sizeof(*pagelsn)); - } else - memset(bp, 0, sizeof(*pagelsn)); - bp += sizeof(*pagelsn); - - uinttmp = (u_int32_t)next_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (nextlsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(nextlsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, nextlsn) != 0)) - return (ret); - } - memcpy(bp, nextlsn, sizeof(*nextlsn)); - } else - memset(bp, 0, sizeof(*nextlsn)); - bp += sizeof(*nextlsn); - - DB_ASSERT(dbenv, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - memcpy(logrec.data, &rectype, sizeof(rectype)); - - if (!IS_REP_CLIENT(dbenv)) - ret = __log_put(dbenv, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__ham_newpage_print(dbenv, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(dbenv, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(dbenv, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __ham_newpage_read __P((DB_ENV *, void *, - * PUBLIC: __ham_newpage_args **)); - */ -int -__ham_newpage_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_newpage_args **argpp; -{ - __ham_newpage_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_newpage_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->opcode = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->prev_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->prevlsn, bp, sizeof(argp->prevlsn)); - bp += sizeof(argp->prevlsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->new_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); - bp += sizeof(argp->pagelsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->next_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->nextlsn, bp, sizeof(argp->nextlsn)); - bp += sizeof(argp->nextlsn); - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_splitdata_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, const DBT *, DB_LSN *)); - */ -int -__ham_splitdata_log(dbp, txnp, ret_lsnp, flags, opcode, pgno, pageimage, pagelsn) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t opcode; - db_pgno_t pgno; - const DBT *pageimage; - DB_LSN * pagelsn; -{ - DBT logrec; - DB_ENV *dbenv; - DB_TXNLOGREC *lr; - DB_LSN *lsnp, null_lsn, *rlsnp; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - dbenv = dbp->dbenv; - COMPQUIET(lr, NULL); - - rectype = DB___ham_splitdata; - npad = 0; - rlsnp = ret_lsnp; - - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(dbenv, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (pageimage == NULL ? 0 : pageimage->size) - + sizeof(*pagelsn); - if (CRYPTO_ON(dbenv)) { - npad = - ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(dbenv, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { - __os_free(dbenv, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)opcode; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (pageimage == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &pageimage->size, sizeof(pageimage->size)); - bp += sizeof(pageimage->size); - memcpy(bp, pageimage->data, pageimage->size); - bp += pageimage->size; - } - - if (pagelsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, pagelsn) != 0)) - return (ret); - } - memcpy(bp, pagelsn, sizeof(*pagelsn)); - } else - memset(bp, 0, sizeof(*pagelsn)); - bp += sizeof(*pagelsn); - - DB_ASSERT(dbenv, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - memcpy(logrec.data, &rectype, sizeof(rectype)); - - if (!IS_REP_CLIENT(dbenv)) - ret = __log_put(dbenv, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__ham_splitdata_print(dbenv, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(dbenv, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(dbenv, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __ham_splitdata_read __P((DB_ENV *, void *, - * PUBLIC: __ham_splitdata_args **)); - */ -int -__ham_splitdata_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_splitdata_args **argpp; -{ - __ham_splitdata_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_splitdata_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->opcode = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->pageimage, 0, sizeof(argp->pageimage)); - memcpy(&argp->pageimage.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->pageimage.data = bp; - bp += argp->pageimage.size; - - memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); - bp += sizeof(argp->pagelsn); - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_replace_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, DB_LSN *, int32_t, const DBT *, - * PUBLIC: const DBT *, u_int32_t)); - */ -int -__ham_replace_log(dbp, txnp, ret_lsnp, flags, pgno, ndx, pagelsn, off, olditem, - newitem, makedup) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t pgno; - u_int32_t ndx; - DB_LSN * pagelsn; - int32_t off; - const DBT *olditem; - const DBT *newitem; - u_int32_t makedup; -{ - DBT logrec; - DB_ENV *dbenv; - DB_TXNLOGREC *lr; - DB_LSN *lsnp, null_lsn, *rlsnp; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - dbenv = dbp->dbenv; - COMPQUIET(lr, NULL); - - rectype = DB___ham_replace; - npad = 0; - rlsnp = ret_lsnp; - - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(dbenv, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*pagelsn) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (olditem == NULL ? 0 : olditem->size) - + sizeof(u_int32_t) + (newitem == NULL ? 0 : newitem->size) - + sizeof(u_int32_t); - if (CRYPTO_ON(dbenv)) { - npad = - ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(dbenv, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { - __os_free(dbenv, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)ndx; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (pagelsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, pagelsn) != 0)) - return (ret); - } - memcpy(bp, pagelsn, sizeof(*pagelsn)); - } else - memset(bp, 0, sizeof(*pagelsn)); - bp += sizeof(*pagelsn); - - uinttmp = (u_int32_t)off; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (olditem == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &olditem->size, sizeof(olditem->size)); - bp += sizeof(olditem->size); - memcpy(bp, olditem->data, olditem->size); - bp += olditem->size; - } - - if (newitem == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &newitem->size, sizeof(newitem->size)); - bp += sizeof(newitem->size); - memcpy(bp, newitem->data, newitem->size); - bp += newitem->size; - } - - uinttmp = (u_int32_t)makedup; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - DB_ASSERT(dbenv, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - memcpy(logrec.data, &rectype, sizeof(rectype)); - - if (!IS_REP_CLIENT(dbenv)) - ret = __log_put(dbenv, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__ham_replace_print(dbenv, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(dbenv, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(dbenv, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __ham_replace_read __P((DB_ENV *, void *, - * PUBLIC: __ham_replace_args **)); - */ -int -__ham_replace_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_replace_args **argpp; -{ - __ham_replace_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_replace_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->ndx = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); - bp += sizeof(argp->pagelsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->off = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->olditem, 0, sizeof(argp->olditem)); - memcpy(&argp->olditem.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->olditem.data = bp; - bp += argp->olditem.size; - - memset(&argp->newitem, 0, sizeof(argp->newitem)); - memcpy(&argp->newitem.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->newitem.data = bp; - bp += argp->newitem.size; - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->makedup = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_copypage_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, db_pgno_t, - * PUBLIC: DB_LSN *, const DBT *)); - */ -int -__ham_copypage_log(dbp, txnp, ret_lsnp, flags, pgno, pagelsn, next_pgno, nextlsn, nnext_pgno, - nnextlsn, page) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t pgno; - DB_LSN * pagelsn; - db_pgno_t next_pgno; - DB_LSN * nextlsn; - db_pgno_t nnext_pgno; - DB_LSN * nnextlsn; - const DBT *page; -{ - DBT logrec; - DB_ENV *dbenv; - DB_TXNLOGREC *lr; - DB_LSN *lsnp, null_lsn, *rlsnp; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - dbenv = dbp->dbenv; - COMPQUIET(lr, NULL); - - rectype = DB___ham_copypage; - npad = 0; - rlsnp = ret_lsnp; - - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(dbenv, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*pagelsn) - + sizeof(u_int32_t) - + sizeof(*nextlsn) - + sizeof(u_int32_t) - + sizeof(*nnextlsn) - + sizeof(u_int32_t) + (page == NULL ? 0 : page->size); - if (CRYPTO_ON(dbenv)) { - npad = - ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(dbenv, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { - __os_free(dbenv, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (pagelsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, pagelsn) != 0)) - return (ret); - } - memcpy(bp, pagelsn, sizeof(*pagelsn)); - } else - memset(bp, 0, sizeof(*pagelsn)); - bp += sizeof(*pagelsn); - - uinttmp = (u_int32_t)next_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (nextlsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(nextlsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, nextlsn) != 0)) - return (ret); - } - memcpy(bp, nextlsn, sizeof(*nextlsn)); - } else - memset(bp, 0, sizeof(*nextlsn)); - bp += sizeof(*nextlsn); - - uinttmp = (u_int32_t)nnext_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (nnextlsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(nnextlsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, nnextlsn) != 0)) - return (ret); - } - memcpy(bp, nnextlsn, sizeof(*nnextlsn)); - } else - memset(bp, 0, sizeof(*nnextlsn)); - bp += sizeof(*nnextlsn); - - if (page == NULL) { - zero = 0; - memcpy(bp, &zero, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - } else { - memcpy(bp, &page->size, sizeof(page->size)); - bp += sizeof(page->size); - memcpy(bp, page->data, page->size); - bp += page->size; - } - - DB_ASSERT(dbenv, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - memcpy(logrec.data, &rectype, sizeof(rectype)); - - if (!IS_REP_CLIENT(dbenv)) - ret = __log_put(dbenv, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__ham_copypage_print(dbenv, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(dbenv, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(dbenv, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __ham_copypage_read __P((DB_ENV *, void *, - * PUBLIC: __ham_copypage_args **)); - */ -int -__ham_copypage_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_copypage_args **argpp; -{ - __ham_copypage_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_copypage_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); - bp += sizeof(argp->pagelsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->next_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->nextlsn, bp, sizeof(argp->nextlsn)); - bp += sizeof(argp->nextlsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->nnext_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->nnextlsn, bp, sizeof(argp->nnextlsn)); - bp += sizeof(argp->nnextlsn); - - memset(&argp->page, 0, sizeof(argp->page)); - memcpy(&argp->page.size, bp, sizeof(u_int32_t)); - bp += sizeof(u_int32_t); - argp->page.data = bp; - bp += argp->page.size; - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_metagroup_42_read __P((DB_ENV *, void *, - * PUBLIC: __ham_metagroup_42_args **)); - */ -int -__ham_metagroup_42_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_metagroup_42_args **argpp; -{ - __ham_metagroup_42_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_metagroup_42_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->bucket = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->mmpgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->mmetalsn, bp, sizeof(argp->mmetalsn)); - bp += sizeof(argp->mmetalsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->mpgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->metalsn, bp, sizeof(argp->metalsn)); - bp += sizeof(argp->metalsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); - bp += sizeof(argp->pagelsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->newalloc = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_metagroup_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, - * PUBLIC: db_pgno_t, DB_LSN *, u_int32_t, db_pgno_t)); - */ -int -__ham_metagroup_log(dbp, txnp, ret_lsnp, flags, bucket, mmpgno, mmetalsn, mpgno, metalsn, - pgno, pagelsn, newalloc, last_pgno) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t bucket; - db_pgno_t mmpgno; - DB_LSN * mmetalsn; - db_pgno_t mpgno; - DB_LSN * metalsn; - db_pgno_t pgno; - DB_LSN * pagelsn; - u_int32_t newalloc; - db_pgno_t last_pgno; -{ - DBT logrec; - DB_ENV *dbenv; - DB_TXNLOGREC *lr; - DB_LSN *lsnp, null_lsn, *rlsnp; - u_int32_t uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - dbenv = dbp->dbenv; - COMPQUIET(lr, NULL); - - rectype = DB___ham_metagroup; - npad = 0; - rlsnp = ret_lsnp; - - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(dbenv, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*mmetalsn) - + sizeof(u_int32_t) - + sizeof(*metalsn) - + sizeof(u_int32_t) - + sizeof(*pagelsn) - + sizeof(u_int32_t) - + sizeof(u_int32_t); - if (CRYPTO_ON(dbenv)) { - npad = - ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(dbenv, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { - __os_free(dbenv, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)bucket; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)mmpgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (mmetalsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(mmetalsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, mmetalsn) != 0)) - return (ret); - } - memcpy(bp, mmetalsn, sizeof(*mmetalsn)); - } else - memset(bp, 0, sizeof(*mmetalsn)); - bp += sizeof(*mmetalsn); - - uinttmp = (u_int32_t)mpgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (metalsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(metalsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, metalsn) != 0)) - return (ret); - } - memcpy(bp, metalsn, sizeof(*metalsn)); - } else - memset(bp, 0, sizeof(*metalsn)); - bp += sizeof(*metalsn); - - uinttmp = (u_int32_t)pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (pagelsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, pagelsn) != 0)) - return (ret); - } - memcpy(bp, pagelsn, sizeof(*pagelsn)); - } else - memset(bp, 0, sizeof(*pagelsn)); - bp += sizeof(*pagelsn); - - uinttmp = (u_int32_t)newalloc; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)last_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - DB_ASSERT(dbenv, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - memcpy(logrec.data, &rectype, sizeof(rectype)); - - if (!IS_REP_CLIENT(dbenv)) - ret = __log_put(dbenv, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__ham_metagroup_print(dbenv, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(dbenv, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(dbenv, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __ham_metagroup_read __P((DB_ENV *, void *, - * PUBLIC: __ham_metagroup_args **)); - */ -int -__ham_metagroup_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_metagroup_args **argpp; -{ - __ham_metagroup_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_metagroup_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->bucket = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->mmpgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->mmetalsn, bp, sizeof(argp->mmetalsn)); - bp += sizeof(argp->mmetalsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->mpgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->metalsn, bp, sizeof(argp->metalsn)); - bp += sizeof(argp->metalsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->pagelsn, bp, sizeof(argp->pagelsn)); - bp += sizeof(argp->pagelsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->newalloc = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->last_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_groupalloc_42_read __P((DB_ENV *, void *, - * PUBLIC: __ham_groupalloc_42_args **)); - */ -int -__ham_groupalloc_42_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_groupalloc_42_args **argpp; -{ - __ham_groupalloc_42_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_groupalloc_42_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn)); - bp += sizeof(argp->meta_lsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->start_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->num = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->free = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_groupalloc_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, DB_LSN *, db_pgno_t, u_int32_t, db_pgno_t, - * PUBLIC: db_pgno_t)); - */ -int -__ham_groupalloc_log(dbp, txnp, ret_lsnp, flags, meta_lsn, start_pgno, num, unused, last_pgno) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - DB_LSN * meta_lsn; - db_pgno_t start_pgno; - u_int32_t num; - db_pgno_t unused; - db_pgno_t last_pgno; -{ - DBT logrec; - DB_ENV *dbenv; - DB_TXNLOGREC *lr; - DB_LSN *lsnp, null_lsn, *rlsnp; - u_int32_t uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - dbenv = dbp->dbenv; - COMPQUIET(lr, NULL); - - rectype = DB___ham_groupalloc; - npad = 0; - rlsnp = ret_lsnp; - - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(dbenv, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(*meta_lsn) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t); - if (CRYPTO_ON(dbenv)) { - npad = - ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(dbenv, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { - __os_free(dbenv, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - if (meta_lsn != NULL) { - if (txnp != NULL) { - LOG *lp = dbenv->lg_handle->reginfo.primary; - if (LOG_COMPARE(meta_lsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(dbenv, dbp, meta_lsn) != 0)) - return (ret); - } - memcpy(bp, meta_lsn, sizeof(*meta_lsn)); - } else - memset(bp, 0, sizeof(*meta_lsn)); - bp += sizeof(*meta_lsn); - - uinttmp = (u_int32_t)start_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)num; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)unused; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)last_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - DB_ASSERT(dbenv, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - memcpy(logrec.data, &rectype, sizeof(rectype)); - - if (!IS_REP_CLIENT(dbenv)) - ret = __log_put(dbenv, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__ham_groupalloc_print(dbenv, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(dbenv, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(dbenv, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __ham_groupalloc_read __P((DB_ENV *, void *, - * PUBLIC: __ham_groupalloc_args **)); - */ -int -__ham_groupalloc_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_groupalloc_args **argpp; -{ - __ham_groupalloc_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_groupalloc_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&argp->meta_lsn, bp, sizeof(argp->meta_lsn)); - bp += sizeof(argp->meta_lsn); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->start_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->num = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->unused = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->last_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_curadj_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, u_int32_t, u_int32_t, u_int32_t, int, int, - * PUBLIC: u_int32_t)); - */ -int -__ham_curadj_log(dbp, txnp, ret_lsnp, flags, pgno, indx, len, dup_off, add, - is_dup, order) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t pgno; - u_int32_t indx; - u_int32_t len; - u_int32_t dup_off; - int add; - int is_dup; - u_int32_t order; -{ - DBT logrec; - DB_ENV *dbenv; - DB_TXNLOGREC *lr; - DB_LSN *lsnp, null_lsn, *rlsnp; - u_int32_t uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - dbenv = dbp->dbenv; - COMPQUIET(lr, NULL); - - rectype = DB___ham_curadj; - npad = 0; - rlsnp = ret_lsnp; - - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(dbenv, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t); - if (CRYPTO_ON(dbenv)) { - npad = - ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(dbenv, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { - __os_free(dbenv, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)indx; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)len; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)dup_off; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)add; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)is_dup; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)order; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - DB_ASSERT(dbenv, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - memcpy(logrec.data, &rectype, sizeof(rectype)); - - if (!IS_REP_CLIENT(dbenv)) - ret = __log_put(dbenv, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__ham_curadj_print(dbenv, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(dbenv, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(dbenv, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __ham_curadj_read __P((DB_ENV *, void *, __ham_curadj_args **)); - */ -int -__ham_curadj_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_curadj_args **argpp; -{ - __ham_curadj_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_curadj_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->indx = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->len = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->dup_off = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->add = (int)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->is_dup = (int)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->order = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_chgpg_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_ham_mode, db_pgno_t, db_pgno_t, u_int32_t, - * PUBLIC: u_int32_t)); - */ -int -__ham_chgpg_log(dbp, txnp, ret_lsnp, flags, mode, old_pgno, new_pgno, old_indx, new_indx) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_ham_mode mode; - db_pgno_t old_pgno; - db_pgno_t new_pgno; - u_int32_t old_indx; - u_int32_t new_indx; -{ - DBT logrec; - DB_ENV *dbenv; - DB_TXNLOGREC *lr; - DB_LSN *lsnp, null_lsn, *rlsnp; - u_int32_t uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - dbenv = dbp->dbenv; - COMPQUIET(lr, NULL); - - rectype = DB___ham_chgpg; - npad = 0; - rlsnp = ret_lsnp; - - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(dbenv, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(dbenv, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t); - if (CRYPTO_ON(dbenv)) { - npad = - ((DB_CIPHER *)dbenv->crypto_handle)->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(dbenv, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(dbenv, logrec.size, &logrec.data)) != 0) { - __os_free(dbenv, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - memcpy(bp, &rectype, sizeof(rectype)); - bp += sizeof(rectype); - - memcpy(bp, &txn_num, sizeof(txn_num)); - bp += sizeof(txn_num); - - memcpy(bp, lsnp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)mode; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)old_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)new_pgno; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)old_indx; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)new_indx; - memcpy(bp, &uinttmp, sizeof(uinttmp)); - bp += sizeof(uinttmp); - - DB_ASSERT(dbenv, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(dbenv, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - memcpy(logrec.data, &rectype, sizeof(rectype)); - - if (!IS_REP_CLIENT(dbenv)) - ret = __log_put(dbenv, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__ham_chgpg_print(dbenv, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(dbenv, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(dbenv, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __ham_chgpg_read __P((DB_ENV *, void *, __ham_chgpg_args **)); - */ -int -__ham_chgpg_read(dbenv, recbuf, argpp) - DB_ENV *dbenv; - void *recbuf; - __ham_chgpg_args **argpp; -{ - __ham_chgpg_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(dbenv, - sizeof(__ham_chgpg_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - memcpy(&argp->type, bp, sizeof(argp->type)); - bp += sizeof(argp->type); - - memcpy(&argp->txnp->txnid, bp, sizeof(argp->txnp->txnid)); - bp += sizeof(argp->txnp->txnid); - - memcpy(&argp->prev_lsn, bp, sizeof(DB_LSN)); - bp += sizeof(DB_LSN); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->mode = (db_ham_mode)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->old_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->new_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->old_indx = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - memcpy(&uinttmp, bp, sizeof(uinttmp)); - argp->new_indx = (u_int32_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (0); -} - -/* - * PUBLIC: int __ham_init_recover __P((DB_ENV *, int (***)(DB_ENV *, - * PUBLIC: DBT *, DB_LSN *, db_recops, void *), size_t *)); - */ -int -__ham_init_recover(dbenv, dtabp, dtabsizep) - DB_ENV *dbenv; - int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - size_t *dtabsizep; -{ - int ret; - - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_insdel_recover, DB___ham_insdel)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_newpage_recover, DB___ham_newpage)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_splitdata_recover, DB___ham_splitdata)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_replace_recover, DB___ham_replace)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_copypage_recover, DB___ham_copypage)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_metagroup_recover, DB___ham_metagroup)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_groupalloc_recover, DB___ham_groupalloc)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_curadj_recover, DB___ham_curadj)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_chgpg_recover, DB___ham_chgpg)) != 0) - return (ret); - return (0); -} diff --git a/db/hash/hash_autop.c b/db/hash/hash_autop.c deleted file mode 100644 index aec47b77a..000000000 --- a/db/hash/hash_autop.c +++ /dev/null @@ -1,543 +0,0 @@ -/* Do not edit: automatically built by gen_rec.awk. */ - -#include "db_config.h" - -#ifdef HAVE_HASH -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/db_page.h" -#include "dbinc/db_dispatch.h" -#include "dbinc/db_am.h" -#include "dbinc/hash.h" -#include "dbinc/log.h" -#include "dbinc/txn.h" - -/* - * PUBLIC: int __ham_insdel_print __P((DB_ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__ham_insdel_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_insdel_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_insdel_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_insdel%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\topcode: %lu\n", (u_long)argp->opcode); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tndx: %lu\n", (u_long)argp->ndx); - (void)printf("\tpagelsn: [%lu][%lu]\n", - (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); - (void)printf("\tkey: "); - for (i = 0; i < argp->key.size; i++) { - ch = ((u_int8_t *)argp->key.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tdata: "); - for (i = 0; i < argp->data.size; i++) { - ch = ((u_int8_t *)argp->data.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_newpage_print __P((DB_ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__ham_newpage_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_newpage_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_newpage_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_newpage%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\topcode: %lu\n", (u_long)argp->opcode); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tprev_pgno: %lu\n", (u_long)argp->prev_pgno); - (void)printf("\tprevlsn: [%lu][%lu]\n", - (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset); - (void)printf("\tnew_pgno: %lu\n", (u_long)argp->new_pgno); - (void)printf("\tpagelsn: [%lu][%lu]\n", - (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); - (void)printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno); - (void)printf("\tnextlsn: [%lu][%lu]\n", - (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_splitdata_print __P((DB_ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__ham_splitdata_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_splitdata_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_splitdata_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_splitdata%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\topcode: %lu\n", (u_long)argp->opcode); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tpageimage: "); - for (i = 0; i < argp->pageimage.size; i++) { - ch = ((u_int8_t *)argp->pageimage.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tpagelsn: [%lu][%lu]\n", - (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_replace_print __P((DB_ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__ham_replace_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_replace_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_replace_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_replace%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tndx: %lu\n", (u_long)argp->ndx); - (void)printf("\tpagelsn: [%lu][%lu]\n", - (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); - (void)printf("\toff: %ld\n", (long)argp->off); - (void)printf("\tolditem: "); - for (i = 0; i < argp->olditem.size; i++) { - ch = ((u_int8_t *)argp->olditem.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tnewitem: "); - for (i = 0; i < argp->newitem.size; i++) { - ch = ((u_int8_t *)argp->newitem.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tmakedup: %lu\n", (u_long)argp->makedup); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_copypage_print __P((DB_ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__ham_copypage_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_copypage_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_copypage_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_copypage%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tpagelsn: [%lu][%lu]\n", - (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); - (void)printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno); - (void)printf("\tnextlsn: [%lu][%lu]\n", - (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); - (void)printf("\tnnext_pgno: %lu\n", (u_long)argp->nnext_pgno); - (void)printf("\tnnextlsn: [%lu][%lu]\n", - (u_long)argp->nnextlsn.file, (u_long)argp->nnextlsn.offset); - (void)printf("\tpage: "); - for (i = 0; i < argp->page.size; i++) { - ch = ((u_int8_t *)argp->page.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_metagroup_42_print __P((DB_ENV *, DBT *, - * PUBLIC: DB_LSN *, db_recops, void *)); - */ -int -__ham_metagroup_42_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_metagroup_42_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_metagroup_42_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_metagroup_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tbucket: %lu\n", (u_long)argp->bucket); - (void)printf("\tmmpgno: %lu\n", (u_long)argp->mmpgno); - (void)printf("\tmmetalsn: [%lu][%lu]\n", - (u_long)argp->mmetalsn.file, (u_long)argp->mmetalsn.offset); - (void)printf("\tmpgno: %lu\n", (u_long)argp->mpgno); - (void)printf("\tmetalsn: [%lu][%lu]\n", - (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tpagelsn: [%lu][%lu]\n", - (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); - (void)printf("\tnewalloc: %lu\n", (u_long)argp->newalloc); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_metagroup_print __P((DB_ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__ham_metagroup_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_metagroup_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_metagroup_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_metagroup%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tbucket: %lu\n", (u_long)argp->bucket); - (void)printf("\tmmpgno: %lu\n", (u_long)argp->mmpgno); - (void)printf("\tmmetalsn: [%lu][%lu]\n", - (u_long)argp->mmetalsn.file, (u_long)argp->mmetalsn.offset); - (void)printf("\tmpgno: %lu\n", (u_long)argp->mpgno); - (void)printf("\tmetalsn: [%lu][%lu]\n", - (u_long)argp->metalsn.file, (u_long)argp->metalsn.offset); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tpagelsn: [%lu][%lu]\n", - (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); - (void)printf("\tnewalloc: %lu\n", (u_long)argp->newalloc); - (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_groupalloc_42_print __P((DB_ENV *, DBT *, - * PUBLIC: DB_LSN *, db_recops, void *)); - */ -int -__ham_groupalloc_42_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_groupalloc_42_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_groupalloc_42_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_groupalloc_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno); - (void)printf("\tnum: %lu\n", (u_long)argp->num); - (void)printf("\tfree: %lu\n", (u_long)argp->free); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_groupalloc_print __P((DB_ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__ham_groupalloc_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_groupalloc_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_groupalloc_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_groupalloc%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tstart_pgno: %lu\n", (u_long)argp->start_pgno); - (void)printf("\tnum: %lu\n", (u_long)argp->num); - (void)printf("\tunused: %lu\n", (u_long)argp->unused); - (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_curadj_print __P((DB_ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__ham_curadj_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_curadj_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_curadj_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_curadj%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tindx: %lu\n", (u_long)argp->indx); - (void)printf("\tlen: %lu\n", (u_long)argp->len); - (void)printf("\tdup_off: %lu\n", (u_long)argp->dup_off); - (void)printf("\tadd: %ld\n", (long)argp->add); - (void)printf("\tis_dup: %ld\n", (long)argp->is_dup); - (void)printf("\torder: %lu\n", (u_long)argp->order); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_chgpg_print __P((DB_ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__ham_chgpg_print(dbenv, dbtp, lsnp, notused2, notused3) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __ham_chgpg_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __ham_chgpg_read(dbenv, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__ham_chgpg%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tmode: %ld\n", (long)argp->mode); - (void)printf("\told_pgno: %lu\n", (u_long)argp->old_pgno); - (void)printf("\tnew_pgno: %lu\n", (u_long)argp->new_pgno); - (void)printf("\told_indx: %lu\n", (u_long)argp->old_indx); - (void)printf("\tnew_indx: %lu\n", (u_long)argp->new_indx); - (void)printf("\n"); - __os_free(dbenv, argp); - return (0); -} - -/* - * PUBLIC: int __ham_init_print __P((DB_ENV *, int (***)(DB_ENV *, - * PUBLIC: DBT *, DB_LSN *, db_recops, void *), size_t *)); - */ -int -__ham_init_print(dbenv, dtabp, dtabsizep) - DB_ENV *dbenv; - int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - size_t *dtabsizep; -{ - int ret; - - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_insdel_print, DB___ham_insdel)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_newpage_print, DB___ham_newpage)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_splitdata_print, DB___ham_splitdata)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_replace_print, DB___ham_replace)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_copypage_print, DB___ham_copypage)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_metagroup_print, DB___ham_metagroup)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_groupalloc_print, DB___ham_groupalloc)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_curadj_print, DB___ham_curadj)) != 0) - return (ret); - if ((ret = __db_add_recovery(dbenv, dtabp, dtabsizep, - __ham_chgpg_print, DB___ham_chgpg)) != 0) - return (ret); - return (0); -} -#endif /* HAVE_HASH */ diff --git a/db/hash/hash_conv.c b/db/hash/hash_conv.c deleted file mode 100644 index 314c238e1..000000000 --- a/db/hash/hash_conv.c +++ /dev/null @@ -1,110 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - * - * $Id: hash_conv.c,v 12.6 2007/05/17 15:15:38 bostic Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_swap.h" -#include "dbinc/hash.h" - -/* - * __ham_pgin -- - * Convert host-specific page layout from the host-independent format - * stored on disk. - * - * PUBLIC: int __ham_pgin __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); - */ -int -__ham_pgin(dbenv, dummydbp, pg, pp, cookie) - DB_ENV *dbenv; - DB *dummydbp; - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - DB_PGINFO *pginfo; - PAGE *h; - - h = pp; - pginfo = (DB_PGINFO *)cookie->data; - - /* - * The hash access method does blind reads of pages, causing them - * to be created. If the type field isn't set it's one of them, - * initialize the rest of the page and return. - */ - if (h->type != P_HASHMETA && h->pgno == PGNO_INVALID) { - P_INIT(pp, (db_indx_t)pginfo->db_pagesize, - pg, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - return (0); - } - - if (!F_ISSET(pginfo, DB_AM_SWAP)) - return (0); - - return (h->type == P_HASHMETA ? __ham_mswap(pp) : - __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 1)); -} - -/* - * __ham_pgout -- - * Convert host-specific page layout to the host-independent format - * stored on disk. - * - * PUBLIC: int __ham_pgout __P((DB_ENV *, DB *, db_pgno_t, void *, DBT *)); - */ -int -__ham_pgout(dbenv, dummydbp, pg, pp, cookie) - DB_ENV *dbenv; - DB *dummydbp; - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - DB_PGINFO *pginfo; - PAGE *h; - - pginfo = (DB_PGINFO *)cookie->data; - if (!F_ISSET(pginfo, DB_AM_SWAP)) - return (0); - - h = pp; - return (h->type == P_HASHMETA ? __ham_mswap(pp) : - __db_byteswap(dbenv, dummydbp, pg, pp, pginfo->db_pagesize, 0)); -} - -/* - * __ham_mswap -- - * Swap the bytes on the hash metadata page. - * - * PUBLIC: int __ham_mswap __P((void *)); - */ -int -__ham_mswap(pg) - void *pg; -{ - u_int8_t *p; - int i; - - __db_metaswap(pg); - - p = (u_int8_t *)pg + sizeof(DBMETA); - - SWAP32(p); /* max_bucket */ - SWAP32(p); /* high_mask */ - SWAP32(p); /* low_mask */ - SWAP32(p); /* ffactor */ - SWAP32(p); /* nelem */ - SWAP32(p); /* h_charkey */ - for (i = 0; i < NCACHED; ++i) - SWAP32(p); /* spares */ - p += 59 * sizeof(u_int32_t); /* unused */ - SWAP32(p); /* crypto_magic */ - return (0); -} diff --git a/db/hash/hash_dup.c b/db/hash/hash_dup.c deleted file mode 100644 index 2567310b5..000000000 --- a/db/hash/hash_dup.c +++ /dev/null @@ -1,895 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id: hash_dup.c,v 12.25 2007/05/17 17:18:00 bostic Exp $ - */ - -/* - * PACKAGE: hashing - * - * DESCRIPTION: - * Manipulation of duplicates for the hash package. - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" -#include "dbinc/btree.h" -#include "dbinc/mp.h" - -static int __hamc_chgpg __P((DBC *, - db_pgno_t, u_int32_t, db_pgno_t, u_int32_t)); -static int __ham_check_move __P((DBC *, u_int32_t)); -static int __ham_dcursor __P((DBC *, db_pgno_t, u_int32_t)); -static int __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t)); - -/* - * Called from hash_access to add a duplicate key. nval is the new - * value that we want to add. The flags correspond to the flag values - * to cursor_put indicating where to add the new element. - * There are 4 cases. - * Case 1: The existing duplicate set already resides on a separate page. - * We return and let the common code handle this. - * Case 2: The element is small enough to just be added to the existing set. - * Case 3: The element is large enough to be a big item, so we're going to - * have to push the set onto a new page. - * Case 4: The element is large enough to push the duplicate set onto a - * separate page. - * - * PUBLIC: int __ham_add_dup __P((DBC *, DBT *, u_int32_t, db_pgno_t *)); - */ -int -__ham_add_dup(dbc, nval, flags, pgnop) - DBC *dbc; - DBT *nval; - u_int32_t flags; - db_pgno_t *pgnop; -{ - DB *dbp; - DBT pval, tmp_val; - DB_ENV *dbenv; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - u_int32_t add_bytes, new_size; - int cmp, ret; - u_int8_t *hk; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - DB_ASSERT(dbenv, flags != DB_CURRENT); - - add_bytes = nval->size + - (F_ISSET(nval, DB_DBT_PARTIAL) ? nval->doff : 0); - add_bytes = DUP_SIZE(add_bytes); - - if ((ret = __ham_check_move(dbc, add_bytes)) != 0) - return (ret); - - /* - * Check if resulting duplicate set is going to need to go - * onto a separate duplicate page. If so, convert the - * duplicate set and add the new one. After conversion, - * hcp->dndx is the first free ndx or the index of the - * current pointer into the duplicate set. - */ - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - /* Add the len bytes to the current singleton. */ - if (HPAGE_PTYPE(hk) != H_DUPLICATE) - add_bytes += DUP_SIZE(0); - new_size = - LEN_HKEYDATA(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)) + - add_bytes; - - /* - * We convert to off-page duplicates if the item is a big item, - * the addition of the new item will make the set large, or - * if there isn't enough room on this page to add the next item. - */ - if (HPAGE_PTYPE(hk) != H_OFFDUP && - (HPAGE_PTYPE(hk) == H_OFFPAGE || ISBIG(hcp, new_size) || - add_bytes > P_FREESPACE(dbp, hcp->page))) { - - if ((ret = __ham_dup_convert(dbc)) != 0) - return (ret); - return (hcp->opd->am_put(hcp->opd, - NULL, nval, flags, NULL)); - } - - /* There are two separate cases here: on page and off page. */ - if (HPAGE_PTYPE(hk) != H_OFFDUP) { - if (HPAGE_PTYPE(hk) != H_DUPLICATE) { - pval.flags = 0; - pval.data = HKEYDATA_DATA(hk); - pval.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, - hcp->indx); - if ((ret = __ham_make_dup(dbenv, - &pval, &tmp_val, &dbc->my_rdata.data, - &dbc->my_rdata.ulen)) != 0 || (ret = - __ham_replpair(dbc, &tmp_val, 1)) != 0) - return (ret); - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - HPAGE_PTYPE(hk) = H_DUPLICATE; - - /* - * Update the cursor position since we now are in - * duplicates. - */ - F_SET(hcp, H_ISDUP); - hcp->dup_off = 0; - hcp->dup_len = pval.size; - hcp->dup_tlen = DUP_SIZE(hcp->dup_len); - } - - /* Now make the new entry a duplicate. */ - if ((ret = __ham_make_dup(dbenv, nval, - &tmp_val, &dbc->my_rdata.data, &dbc->my_rdata.ulen)) != 0) - return (ret); - - tmp_val.dlen = 0; - switch (flags) { /* On page. */ - case DB_KEYFIRST: - case DB_KEYLAST: - case DB_NODUPDATA: - if (dbp->dup_compare != NULL) { - __ham_dsearch(dbc, - nval, &tmp_val.doff, &cmp, flags); - - /* dup dups are not supported w/ sorted dups */ - if (cmp == 0) - return (__db_duperr(dbp, flags)); - } else { - hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, - dbp->pgsize, hcp->indx); - hcp->dup_len = nval->size; - F_SET(hcp, H_ISDUP); - if (flags == DB_KEYFIRST) - hcp->dup_off = tmp_val.doff = 0; - else - hcp->dup_off = - tmp_val.doff = hcp->dup_tlen; - } - break; - case DB_BEFORE: - tmp_val.doff = hcp->dup_off; - break; - case DB_AFTER: - tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len); - break; - default: - return (__db_unknown_path(dbenv, "__ham_add_dup")); - } - - /* Add the duplicate. */ - if ((ret = __memp_dirty(mpf, - &hcp->page, dbc->txn, dbc->priority, 0)) != 0 || - (ret = __ham_replpair(dbc, &tmp_val, 0)) != 0) - return (ret); - - /* Now, update the cursor if necessary. */ - switch (flags) { - case DB_AFTER: - hcp->dup_off += DUP_SIZE(hcp->dup_len); - hcp->dup_len = nval->size; - hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size); - break; - case DB_BEFORE: - case DB_KEYFIRST: - case DB_KEYLAST: - case DB_NODUPDATA: - hcp->dup_tlen += (db_indx_t)DUP_SIZE(nval->size); - hcp->dup_len = nval->size; - break; - default: - return (__db_unknown_path(dbenv, "__ham_add_dup")); - } - ret = __hamc_update(dbc, tmp_val.size, DB_HAM_CURADJ_ADD, 1); - return (ret); - } - - /* - * If we get here, then we're on duplicate pages; set pgnop and - * return so the common code can handle it. - */ - memcpy(pgnop, HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), - sizeof(db_pgno_t)); - - return (ret); -} - -/* - * Convert an on-page set of duplicates to an offpage set of duplicates. - * - * PUBLIC: int __ham_dup_convert __P((DBC *)); - */ -int -__ham_dup_convert(dbc) - DBC *dbc; -{ - BOVERFLOW bo; - DB *dbp; - DBC **hcs; - DBT dbt; - DB_ENV *dbenv; - DB_LSN lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - HOFFPAGE ho; - PAGE *dp; - db_indx_t i, len, off; - int c, ret, t_ret; - u_int8_t *p, *pend; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - /* - * Create a new page for the duplicates. - */ - if ((ret = __db_new(dbc, - dbp->dup_compare == NULL ? P_LRECNO : P_LDUP, &dp)) != 0) - return (ret); - P_INIT(dp, dbp->pgsize, - dp->pgno, PGNO_INVALID, PGNO_INVALID, LEAFLEVEL, TYPE(dp)); - - /* - * Get the list of cursors that may need to be updated. - */ - if ((ret = __ham_get_clist(dbp, - PGNO(hcp->page), (u_int32_t)hcp->indx, &hcs)) != 0) - goto err; - - /* - * Now put the duplicates onto the new page. - */ - dbt.flags = 0; - switch (HPAGE_PTYPE(H_PAIRDATA(dbp, hcp->page, hcp->indx))) { - case H_KEYDATA: - /* Simple case, one key on page; move it to dup page. */ - dbt.size = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); - dbt.data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - ret = __db_pitem(dbc, - dp, 0, BKEYDATA_SIZE(dbt.size), NULL, &dbt); - goto finish; - case H_OFFPAGE: - /* Simple case, one key on page; move it to dup page. */ - memcpy(&ho, P_ENTRY(dbp, hcp->page, H_DATAINDEX(hcp->indx)), - HOFFPAGE_SIZE); - UMRW_SET(bo.unused1); - B_TSET(bo.type, ho.type); - UMRW_SET(bo.unused2); - bo.pgno = ho.pgno; - bo.tlen = ho.tlen; - dbt.size = BOVERFLOW_SIZE; - dbt.data = &bo; - - ret = __db_pitem(dbc, dp, 0, dbt.size, &dbt, NULL); -finish: if (ret == 0) { - /* Update any other cursors. */ - if (hcs != NULL && DBC_LOGGING(dbc) && - IS_SUBTRANSACTION(dbc->txn)) { - if ((ret = __ham_chgpg_log(dbp, dbc->txn, - &lsn, 0, DB_HAM_DUP, PGNO(hcp->page), - PGNO(dp), hcp->indx, 0)) != 0) - break; - } - for (c = 0; hcs != NULL && hcs[c] != NULL; c++) - if ((ret = __ham_dcursor(hcs[c], - PGNO(dp), 0)) != 0) - break; - } - break; - case H_DUPLICATE: - p = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - pend = p + - LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); - - /* - * We need to maintain the duplicate cursor position. - * Keep track of where we are in the duplicate set via - * the offset, and when it matches the one in the cursor, - * set the off-page duplicate cursor index to the current - * index. - */ - for (off = 0, i = 0; p < pend; i++) { - memcpy(&len, p, sizeof(db_indx_t)); - dbt.size = len; - p += sizeof(db_indx_t); - dbt.data = p; - p += len + sizeof(db_indx_t); - if ((ret = __db_pitem(dbc, dp, - i, BKEYDATA_SIZE(dbt.size), NULL, &dbt)) != 0) - break; - - /* Update any other cursors */ - if (hcs != NULL && DBC_LOGGING(dbc) && - IS_SUBTRANSACTION(dbc->txn)) { - if ((ret = __ham_chgpg_log(dbp, dbc->txn, - &lsn, 0, DB_HAM_DUP, PGNO(hcp->page), - PGNO(dp), hcp->indx, i)) != 0) - break; - } - for (c = 0; hcs != NULL && hcs[c] != NULL; c++) - if (((HASH_CURSOR *)(hcs[c]->internal))->dup_off - == off && (ret = __ham_dcursor(hcs[c], - PGNO(dp), i)) != 0) - goto err; - off += len + 2 * sizeof(db_indx_t); - } - break; - default: - ret = __db_pgfmt(dbenv, hcp->pgno); - break; - } - - /* - * Now attach this to the source page in place of the old duplicate - * item. - */ - if (ret == 0) - ret = __memp_dirty(mpf, - &hcp->page, dbc->txn, dbc->priority, 0); - - if (ret == 0) - ret = __ham_move_offpage(dbc, hcp->page, - (u_int32_t)H_DATAINDEX(hcp->indx), PGNO(dp)); - -err: if ((t_ret = __memp_fput(mpf, dp, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - - if (ret == 0) - hcp->dup_tlen = hcp->dup_off = hcp->dup_len = 0; - - if (hcs != NULL) - __os_free(dbenv, hcs); - - return (ret); -} - -/* - * __ham_make_dup - * - * Take a regular dbt and make it into a duplicate item with all the partial - * information set appropriately. If the incoming dbt is a partial, assume - * we are creating a new entry and make sure that we do any initial padding. - * - * PUBLIC: int __ham_make_dup __P((DB_ENV *, - * PUBLIC: const DBT *, DBT *d, void **, u_int32_t *)); - */ -int -__ham_make_dup(dbenv, notdup, duplicate, bufp, sizep) - DB_ENV *dbenv; - const DBT *notdup; - DBT *duplicate; - void **bufp; - u_int32_t *sizep; -{ - db_indx_t tsize, item_size; - int ret; - u_int8_t *p; - - item_size = (db_indx_t)notdup->size; - if (F_ISSET(notdup, DB_DBT_PARTIAL)) - item_size += notdup->doff; - - tsize = DUP_SIZE(item_size); - if ((ret = __ham_init_dbt(dbenv, duplicate, tsize, bufp, sizep)) != 0) - return (ret); - - duplicate->dlen = 0; - duplicate->flags = notdup->flags; - F_SET(duplicate, DB_DBT_PARTIAL); - - p = duplicate->data; - memcpy(p, &item_size, sizeof(db_indx_t)); - p += sizeof(db_indx_t); - if (F_ISSET(notdup, DB_DBT_PARTIAL)) { - memset(p, 0, notdup->doff); - p += notdup->doff; - } - memcpy(p, notdup->data, notdup->size); - p += notdup->size; - memcpy(p, &item_size, sizeof(db_indx_t)); - - duplicate->doff = 0; - duplicate->dlen = notdup->size; - - return (0); -} - -/* - * __ham_check_move -- - * - * Check if we can do whatever we need to on this page. If not, - * then we'll have to move the current element to a new page. - */ -static int -__ham_check_move(dbc, add_len) - DBC *dbc; - u_int32_t add_len; -{ - DB *dbp; - DBT k, d; - DB_LSN new_lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - PAGE *next_pagep; - db_pgno_t next_pgno; - u_int32_t new_datalen, old_len, rectype; - db_indx_t new_indx; - u_int8_t *hk; - int key_type, match, ret, t_ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - - /* - * If the item is already off page duplicates or an offpage item, - * then we know we can do whatever we need to do in-place - */ - if (HPAGE_PTYPE(hk) == H_OFFDUP || HPAGE_PTYPE(hk) == H_OFFPAGE) - return (0); - - old_len = - LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_DATAINDEX(hcp->indx)); - new_datalen = (old_len - HKEYDATA_SIZE(0)) + add_len; - if (HPAGE_PTYPE(hk) != H_DUPLICATE) - new_datalen += DUP_SIZE(0); - - /* - * We need to add a new page under two conditions: - * 1. The addition makes the total data length cross the BIG - * threshold and the OFFDUP structure won't fit on this page. - * 2. The addition does not make the total data cross the - * threshold, but the new data won't fit on the page. - * If neither of these is true, then we can return. - */ - if (ISBIG(hcp, new_datalen) && (old_len > HOFFDUP_SIZE || - HOFFDUP_SIZE - old_len <= P_FREESPACE(dbp, hcp->page))) - return (0); - - if (!ISBIG(hcp, new_datalen) && - (new_datalen - old_len) <= P_FREESPACE(dbp, hcp->page)) - return (0); - - /* - * If we get here, then we need to move the item to a new page. - * Check if there are more pages in the chain. We now need to - * update new_datalen to include the size of both the key and - * the data that we need to move. - */ - - new_datalen = ISBIG(hcp, new_datalen) ? - HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen); - new_datalen += - LEN_HITEM(dbp, hcp->page, dbp->pgsize, H_KEYINDEX(hcp->indx)); - - next_pagep = NULL; - for (next_pgno = NEXT_PGNO(hcp->page); next_pgno != PGNO_INVALID; - next_pgno = NEXT_PGNO(next_pagep)) { - if (next_pagep != NULL && - (ret = __memp_fput(mpf, next_pagep, dbc->priority)) != 0) - return (ret); - - if ((ret = __memp_fget(mpf, &next_pgno, dbc->txn, - DB_MPOOL_CREATE, &next_pagep)) != 0) - return (ret); - - if (P_FREESPACE(dbp, next_pagep) >= new_datalen) - break; - } - - /* No more pages, add one. */ - if ((ret = __memp_dirty(mpf, - &hcp->page, dbc->txn, dbc->priority, 0)) != 0) - return (ret); - - if (next_pagep == NULL && (ret = __ham_add_ovflpage(dbc, - hcp->page, 0, &next_pagep)) != 0) - return (ret); - - /* Add new page at the end of the chain. */ - if ((ret = __memp_dirty(mpf, - &next_pagep, dbc->txn, dbc->priority, 0)) != 0) { - (void)__memp_fput(mpf, next_pagep, dbc->priority); - return (ret); - } - - if (P_FREESPACE(dbp, next_pagep) < new_datalen && (ret = - __ham_add_ovflpage(dbc, next_pagep, 1, &next_pagep)) != 0) { - (void)__memp_fput(mpf, next_pagep, dbc->priority); - return (ret); - } - - /* Copy the item to the new page. */ - if (DBC_LOGGING(dbc)) { - rectype = PUTPAIR; - k.flags = 0; - d.flags = 0; - if (HPAGE_PTYPE( - H_PAIRKEY(dbp, hcp->page, hcp->indx)) == H_OFFPAGE) { - rectype |= PAIR_KEYMASK; - k.data = H_PAIRKEY(dbp, hcp->page, hcp->indx); - k.size = HOFFPAGE_SIZE; - key_type = H_OFFPAGE; - } else { - k.data = - HKEYDATA_DATA(H_PAIRKEY(dbp, hcp->page, hcp->indx)); - k.size = - LEN_HKEY(dbp, hcp->page, dbp->pgsize, hcp->indx); - key_type = H_KEYDATA; - } - - /* Resolve the insert index so it can be written to the log. */ - if ((ret = __ham_getindex(dbp, dbc->txn, next_pagep, &k, - key_type, &match, &new_indx)) != 0) - return (ret); - - if (HPAGE_PTYPE(hk) == H_OFFPAGE) { - rectype |= PAIR_DATAMASK; - d.data = H_PAIRDATA(dbp, hcp->page, hcp->indx); - d.size = HOFFPAGE_SIZE; - } else { - if (HPAGE_PTYPE(H_PAIRDATA(dbp, - hcp->page, hcp->indx)) == H_DUPLICATE) - rectype |= PAIR_DUPMASK; - d.data = HKEYDATA_DATA( - H_PAIRDATA(dbp, hcp->page, hcp->indx)); - d.size = LEN_HDATA(dbp, hcp->page, - dbp->pgsize, hcp->indx); - } - - if ((ret = __ham_insdel_log(dbp, - dbc->txn, &new_lsn, 0, rectype, PGNO(next_pagep), - (u_int32_t)new_indx, &LSN(next_pagep), - &k, &d)) != 0) { - (void)__memp_fput(mpf, next_pagep, dbc->priority); - return (ret); - } - } else { - LSN_NOT_LOGGED(new_lsn); - /* - * Ensure that an invalid index is passed to __ham_copypair, so - * it knows to resolve the index. Resolving the insert index - * here would require creating a temporary DBT with the key, - * and calling __ham_getindex. Let __ham_copypair do the - * resolution using the final key DBT. - */ - new_indx = NDX_INVALID; - } - - /* Move lsn onto page. */ - if ((ret = __memp_dirty(mpf, - &next_pagep, dbc->txn, dbc->priority, 0)) != 0) { - (void)__memp_fput(mpf, next_pagep, dbc->priority); - return (ret); - } - LSN(next_pagep) = new_lsn; /* Structure assignment. */ - - if ((ret = __ham_copypair(dbp, dbc->txn, hcp->page, - H_KEYINDEX(hcp->indx), next_pagep, &new_indx)) != 0) - goto out; - - /* Update all cursors that used to point to this item. */ - if ((ret = __hamc_chgpg(dbc, PGNO(hcp->page), H_KEYINDEX(hcp->indx), - PGNO(next_pagep), new_indx)) != 0) - goto out; - - /* Now delete the pair from the current page. */ - ret = __ham_del_pair(dbc, HAM_DEL_NO_RECLAIM); - - /* - * __ham_del_pair decremented nelem. This is incorrect; we - * manually copied the element elsewhere, so the total number - * of elements hasn't changed. Increment it again. - * - * !!! - * Note that we still have the metadata page pinned, and - * __ham_del_pair dirtied it, so we don't need to set the dirty - * flag again. - */ - if (!STD_LOCKING(dbc)) - hcp->hdr->nelem++; - -out: if ((t_ret = - __memp_fput(mpf, hcp->page, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - hcp->page = next_pagep; - hcp->pgno = PGNO(hcp->page); - hcp->indx = new_indx; - F_SET(hcp, H_EXPAND); - F_CLR(hcp, H_DELETED); - - return (ret); -} - -/* - * __ham_move_offpage -- - * Replace an onpage set of duplicates with the OFFDUP structure - * that references the duplicate page. - * - * XXX - * This is really just a special case of __onpage_replace; we should - * probably combine them. - * - */ -static int -__ham_move_offpage(dbc, pagep, ndx, pgno) - DBC *dbc; - PAGE *pagep; - u_int32_t ndx; - db_pgno_t pgno; -{ - DB *dbp; - DBT new_dbt; - DBT old_dbt; - HOFFDUP od; - db_indx_t i, *inp; - int32_t difflen; - u_int8_t *src; - int ret; - - dbp = dbc->dbp; - od.type = H_OFFDUP; - UMRW_SET(od.unused[0]); - UMRW_SET(od.unused[1]); - UMRW_SET(od.unused[2]); - od.pgno = pgno; - ret = 0; - - if (DBC_LOGGING(dbc)) { - new_dbt.data = &od; - new_dbt.size = HOFFDUP_SIZE; - old_dbt.data = P_ENTRY(dbp, pagep, ndx); - old_dbt.size = LEN_HITEM(dbp, pagep, dbp->pgsize, ndx); - if ((ret = __ham_replace_log(dbp, dbc->txn, &LSN(pagep), 0, - PGNO(pagep), (u_int32_t)ndx, &LSN(pagep), -1, - &old_dbt, &new_dbt, 0)) != 0) - return (ret); - } else - LSN_NOT_LOGGED(LSN(pagep)); - - /* - * difflen is the difference in the lengths, and so may be negative. - * We know that the difference between two unsigned lengths from a - * database page will fit into an int32_t. - */ - difflen = - (int32_t)LEN_HITEM(dbp, pagep, dbp->pgsize, ndx) - - (int32_t)HOFFDUP_SIZE; - if (difflen != 0) { - /* Copy data. */ - inp = P_INP(dbp, pagep); - src = (u_int8_t *)(pagep) + HOFFSET(pagep); - memmove(src + difflen, src, inp[ndx] - HOFFSET(pagep)); - HOFFSET(pagep) += difflen; - - /* Update index table. */ - for (i = ndx; i < NUM_ENT(pagep); i++) - inp[i] += difflen; - } - - /* Now copy the offdup entry onto the page. */ - memcpy(P_ENTRY(dbp, pagep, ndx), &od, HOFFDUP_SIZE); - return (ret); -} - -/* - * __ham_dsearch: - * Locate a particular duplicate in a duplicate set. Make sure that - * we exit with the cursor set appropriately. - * - * PUBLIC: void __ham_dsearch - * PUBLIC: __P((DBC *, DBT *, u_int32_t *, int *, u_int32_t)); - */ -void -__ham_dsearch(dbc, dbt, offp, cmpp, flags) - DBC *dbc; - DBT *dbt; - u_int32_t *offp, flags; - int *cmpp; -{ - DB *dbp; - HASH_CURSOR *hcp; - DBT cur; - db_indx_t i, len; - int (*func) __P((DB *, const DBT *, const DBT *)); - u_int8_t *data; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - func = dbp->dup_compare == NULL ? __bam_defcmp : dbp->dup_compare; - - i = F_ISSET(hcp, H_CONTINUE) ? hcp->dup_off: 0; - data = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + i; - hcp->dup_tlen = LEN_HDATA(dbp, hcp->page, dbp->pgsize, hcp->indx); - len = hcp->dup_len; - while (i < hcp->dup_tlen) { - memcpy(&len, data, sizeof(db_indx_t)); - data += sizeof(db_indx_t); - DB_SET_DBT(cur, data, len); - - /* - * If we find an exact match, we're done. If in a sorted - * duplicate set and the item is larger than our test item, - * we're done. In the latter case, if permitting partial - * matches, it's not a failure. - */ - *cmpp = func(dbp, dbt, &cur); - if (*cmpp == 0) - break; - if (*cmpp < 0 && dbp->dup_compare != NULL) { - if (flags == DB_GET_BOTH_RANGE) - *cmpp = 0; - break; - } - - i += len + 2 * sizeof(db_indx_t); - data += len + sizeof(db_indx_t); - } - - *offp = i; - hcp->dup_off = i; - hcp->dup_len = len; - F_SET(hcp, H_ISDUP); -} - -/* - * __ham_dcursor -- - * - * Create an off page duplicate cursor for this cursor. - */ -static int -__ham_dcursor(dbc, pgno, indx) - DBC *dbc; - db_pgno_t pgno; - u_int32_t indx; -{ - DB *dbp; - HASH_CURSOR *hcp; - BTREE_CURSOR *dcp; - int ret; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __dbc_newopd(dbc, pgno, hcp->opd, &hcp->opd)) != 0) - return (ret); - - dcp = (BTREE_CURSOR *)hcp->opd->internal; - dcp->pgno = pgno; - dcp->indx = indx; - - if (dbp->dup_compare == NULL) { - /* - * Converting to off-page Recno trees is tricky. The - * record number for the cursor is the index + 1 (to - * convert to 1-based record numbers). - */ - dcp->recno = indx + 1; - } - - /* - * Transfer the deleted flag from the top-level cursor to the - * created one. - */ - if (F_ISSET(hcp, H_DELETED)) { - F_SET(dcp, C_DELETED); - F_CLR(hcp, H_DELETED); - } - - return (0); -} - -/* - * __hamc_chgpg -- - * Adjust the cursors after moving an item to a new page. We only - * move cursors that are pointing at this one item and are not - * deleted; since we only touch non-deleted cursors, and since - * (by definition) no item existed at the pgno/indx we're moving the - * item to, we're guaranteed that all the cursors we affect here or - * on abort really do refer to this one item. - */ -static int -__hamc_chgpg(dbc, old_pgno, old_index, new_pgno, new_index) - DBC *dbc; - db_pgno_t old_pgno, new_pgno; - u_int32_t old_index, new_index; -{ - DB *dbp, *ldbp; - DB_ENV *dbenv; - DB_LSN lsn; - DB_TXN *my_txn; - DBC *cp; - HASH_CURSOR *hcp; - int found, ret; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - - my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; - - MUTEX_LOCK(dbenv, dbenv->mtx_dblist); - FIND_FIRST_DB_MATCH(dbenv, dbp, ldbp); - for (found = 0; - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = TAILQ_NEXT(ldbp, dblistlinks)) { - MUTEX_LOCK(dbenv, dbp->mutex); - TAILQ_FOREACH(cp, &ldbp->active_queue, links) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - - hcp = (HASH_CURSOR *)cp->internal; - - /* - * If a cursor is deleted, it doesn't refer to this - * item--it just happens to have the same indx, but - * it points to a former neighbor. Don't move it. - */ - if (F_ISSET(hcp, H_DELETED)) - continue; - - if (hcp->pgno == old_pgno && - !MVCC_SKIP_CURADJ(cp, old_pgno)) { - if (hcp->indx == old_index) { - hcp->pgno = new_pgno; - hcp->indx = new_index; - } else - continue; - if (my_txn != NULL && cp->txn != my_txn) - found = 1; - } - } - MUTEX_UNLOCK(dbenv, dbp->mutex); - } - MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist); - - if (found != 0 && DBC_LOGGING(dbc)) { - if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, DB_HAM_CHGPG, - old_pgno, new_pgno, old_index, new_index)) != 0) - return (ret); - } - return (0); -} diff --git a/db/hash/hash_func.c b/db/hash/hash_func.c deleted file mode 100644 index 47f2eda08..000000000 --- a/db/hash/hash_func.c +++ /dev/null @@ -1,240 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id: hash_func.c,v 12.6 2007/05/17 15:15:38 bostic Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" - -/* - * __ham_func2 -- - * Phong Vo's linear congruential hash. - * - * PUBLIC: u_int32_t __ham_func2 __P((DB *, const void *, u_int32_t)); - */ -#define DCHARHASH(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) - -u_int32_t -__ham_func2(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - const u_int8_t *e, *k; - u_int32_t h; - u_int8_t c; - - if (dbp != NULL) - COMPQUIET(dbp, NULL); - - k = key; - e = k + len; - for (h = 0; k != e;) { - c = *k++; - if (!c && k > e) - break; - DCHARHASH(h, c); - } - return (h); -} - -/* - * __ham_func3 -- - * Ozan Yigit's original sdbm hash. - * - * Ugly, but fast. Break the string up into 8 byte units. On the first time - * through the loop get the "leftover bytes" (strlen % 8). On every other - * iteration, perform 8 HASHC's so we handle all 8 bytes. Essentially, this - * saves us 7 cmp & branch instructions. - * - * PUBLIC: u_int32_t __ham_func3 __P((DB *, const void *, u_int32_t)); - */ -u_int32_t -__ham_func3(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - const u_int8_t *k; - u_int32_t n, loop; - - if (dbp != NULL) - COMPQUIET(dbp, NULL); - - if (len == 0) - return (0); - -#define HASHC n = *k++ + 65599 * n - n = 0; - k = key; - - loop = (len + 8 - 1) >> 3; - switch (len & (8 - 1)) { - case 0: - do { - HASHC; - case 7: - HASHC; - case 6: - HASHC; - case 5: - HASHC; - case 4: - HASHC; - case 3: - HASHC; - case 2: - HASHC; - case 1: - HASHC; - } while (--loop); - } - return (n); -} - -/* - * __ham_func4 -- - * Chris Torek's hash function. Although this function performs only - * slightly worse than __ham_func5 on strings, it performs horribly on - * numbers. - * - * PUBLIC: u_int32_t __ham_func4 __P((DB *, const void *, u_int32_t)); - */ -u_int32_t -__ham_func4(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - const u_int8_t *k; - u_int32_t h, loop; - - if (dbp != NULL) - COMPQUIET(dbp, NULL); - - if (len == 0) - return (0); - -#define HASH4a h = (h << 5) - h + *k++; -#define HASH4b h = (h << 5) + h + *k++; -#define HASH4 HASH4b - h = 0; - k = key; - - loop = (len + 8 - 1) >> 3; - switch (len & (8 - 1)) { - case 0: - do { - HASH4; - case 7: - HASH4; - case 6: - HASH4; - case 5: - HASH4; - case 4: - HASH4; - case 3: - HASH4; - case 2: - HASH4; - case 1: - HASH4; - } while (--loop); - } - return (h); -} - -/* - * Fowler/Noll/Vo hash - * - * The basis of the hash algorithm was taken from an idea sent by email to the - * IEEE Posix P1003.2 mailing list from Phong Vo (kpv@research.att.com) and - * Glenn Fowler (gsf@research.att.com). Landon Curt Noll (chongo@toad.com) - * later improved on their algorithm. - * - * The magic is in the interesting relationship between the special prime - * 16777619 (2^24 + 403) and 2^32 and 2^8. - * - * This hash produces the fewest collisions of any function that we've seen so - * far, and works well on both numbers and strings. - * - * PUBLIC: u_int32_t __ham_func5 __P((DB *, const void *, u_int32_t)); - */ -u_int32_t -__ham_func5(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - const u_int8_t *k, *e; - u_int32_t h; - - if (dbp != NULL) - COMPQUIET(dbp, NULL); - - k = key; - e = k + len; - for (h = 0; k < e; ++k) { - h *= 16777619; - h ^= *k; - } - return (h); -} - -/* - * __ham_test -- - * - * PUBLIC: u_int32_t __ham_test __P((DB *, const void *, u_int32_t)); - */ -u_int32_t -__ham_test(dbp, key, len) - DB *dbp; - const void *key; - u_int32_t len; -{ - COMPQUIET(dbp, NULL); - COMPQUIET(len, 0); - return ((u_int32_t)*(char *)key); -} diff --git a/db/hash/hash_meta.c b/db/hash/hash_meta.c deleted file mode 100644 index 9d422b342..000000000 --- a/db/hash/hash_meta.c +++ /dev/null @@ -1,98 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1999,2007 Oracle. All rights reserved. - * - * $Id: hash_meta.c,v 12.11 2007/05/17 15:15:38 bostic Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/mp.h" - -/* - * Acquire the meta-data page. - * - * PUBLIC: int __ham_get_meta __P((DBC *)); - */ -int -__ham_get_meta(dbc) - DBC *dbc; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH *hashp; - HASH_CURSOR *hcp; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hashp = dbp->h_internal; - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __db_lget(dbc, 0, - hashp->meta_pgno, DB_LOCK_READ, 0, &hcp->hlock)) != 0) - return (ret); - - if ((ret = __memp_fget(mpf, &hashp->meta_pgno, dbc->txn, - DB_MPOOL_CREATE, &hcp->hdr)) != 0) - (void)__LPUT(dbc, hcp->hlock); - - return (ret); -} - -/* - * Release the meta-data page. - * - * PUBLIC: int __ham_release_meta __P((DBC *)); - */ -int -__ham_release_meta(dbc) - DBC *dbc; -{ - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - int ret; - - mpf = dbc->dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - if (hcp->hdr != NULL) { - if ((ret = __memp_fput(mpf, hcp->hdr, dbc->priority)) != 0) - return (ret); - hcp->hdr = NULL; - } - - return (__TLPUT(dbc, hcp->hlock)); -} - -/* - * Mark the meta-data page dirty. - * - * PUBLIC: int __ham_dirty_meta __P((DBC *, u_int32_t)); - */ -int -__ham_dirty_meta(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - DB *dbp; - HASH *hashp; - HASH_CURSOR *hcp; - int ret; - - dbp = dbc->dbp; - hashp = dbp->h_internal; - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __db_lget(dbc, LCK_COUPLE, - hashp->meta_pgno, DB_LOCK_WRITE, 0, &hcp->hlock)) != 0) - return (ret); - - return (__memp_dirty(dbp->mpf, - &hcp->hdr, dbc->txn, dbc->priority, flags)); -} diff --git a/db/hash/hash_method.c b/db/hash/hash_method.c deleted file mode 100644 index 30cb9b088..000000000 --- a/db/hash/hash_method.c +++ /dev/null @@ -1,183 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1999,2007 Oracle. All rights reserved. - * - * $Id: hash_method.c,v 12.7 2007/05/17 15:15:38 bostic Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" - -static int __ham_set_h_ffactor __P((DB *, u_int32_t)); -static int __ham_set_h_hash - __P((DB *, u_int32_t(*)(DB *, const void *, u_int32_t))); -static int __ham_set_h_nelem __P((DB *, u_int32_t)); - -/* - * __ham_db_create -- - * Hash specific initialization of the DB structure. - * - * PUBLIC: int __ham_db_create __P((DB *)); - */ -int -__ham_db_create(dbp) - DB *dbp; -{ - HASH *hashp; - int ret; - - if ((ret = __os_malloc(dbp->dbenv, - sizeof(HASH), &dbp->h_internal)) != 0) - return (ret); - - hashp = dbp->h_internal; - - hashp->h_nelem = 0; /* Defaults. */ - hashp->h_ffactor = 0; - hashp->h_hash = NULL; - hashp->h_compare = NULL; - - dbp->get_h_ffactor = __ham_get_h_ffactor; - dbp->set_h_ffactor = __ham_set_h_ffactor; - dbp->set_h_hash = __ham_set_h_hash; - dbp->set_h_compare = __ham_set_h_compare; - dbp->get_h_nelem = __ham_get_h_nelem; - dbp->set_h_nelem = __ham_set_h_nelem; - - return (0); -} - -/* - * PUBLIC: int __ham_db_close __P((DB *)); - */ -int -__ham_db_close(dbp) - DB *dbp; -{ - if (dbp->h_internal == NULL) - return (0); - __os_free(dbp->dbenv, dbp->h_internal); - dbp->h_internal = NULL; - return (0); -} - -/* - * __ham_get_h_ffactor -- - * - * PUBLIC: int __ham_get_h_ffactor __P((DB *, u_int32_t *)); - */ -int -__ham_get_h_ffactor(dbp, h_ffactorp) - DB *dbp; - u_int32_t *h_ffactorp; -{ - HASH *hashp; - - hashp = dbp->h_internal; - *h_ffactorp = hashp->h_ffactor; - return (0); -} - -/* - * __ham_set_h_ffactor -- - * Set the fill factor. - */ -static int -__ham_set_h_ffactor(dbp, h_ffactor) - DB *dbp; - u_int32_t h_ffactor; -{ - HASH *hashp; - - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_h_ffactor"); - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - hashp = dbp->h_internal; - hashp->h_ffactor = h_ffactor; - return (0); -} - -/* - * __ham_set_h_hash -- - * Set the hash function. - */ -static int -__ham_set_h_hash(dbp, func) - DB *dbp; - u_int32_t (*func) __P((DB *, const void *, u_int32_t)); -{ - HASH *hashp; - - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_h_hash"); - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - hashp = dbp->h_internal; - hashp->h_hash = func; - return (0); -} - -/* - * __ham_set_h_compare -- - * Set the comparison function. - * - * PUBLIC: int __ham_set_h_compare - * PUBLIC: __P((DB *, int (*)(DB *, const DBT *, const DBT *))); - */ -int -__ham_set_h_compare(dbp, func) - DB *dbp; - int (*func) __P((DB *, const DBT *, const DBT *)); -{ - HASH *t; - - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_h_compare"); - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - t = dbp->h_internal; - - t->h_compare = func; - - return (0); -} - -/* - * __db_get_h_nelem -- - * - * PUBLIC: int __ham_get_h_nelem __P((DB *, u_int32_t *)); - */ -int -__ham_get_h_nelem(dbp, h_nelemp) - DB *dbp; - u_int32_t *h_nelemp; -{ - HASH *hashp; - - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - hashp = dbp->h_internal; - *h_nelemp = hashp->h_nelem; - return (0); -} - -/* - * __ham_set_h_nelem -- - * Set the table size. - */ -static int -__ham_set_h_nelem(dbp, h_nelem) - DB *dbp; - u_int32_t h_nelem; -{ - HASH *hashp; - - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_h_nelem"); - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - hashp = dbp->h_internal; - hashp->h_nelem = h_nelem; - return (0); -} diff --git a/db/hash/hash_open.c b/db/hash/hash_open.c deleted file mode 100644 index 532091e40..000000000 --- a/db/hash/hash_open.c +++ /dev/null @@ -1,547 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id: hash_open.c,v 12.26 2007/06/14 14:54:37 bostic Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" -#include "dbinc/log.h" -#include "dbinc/lock.h" -#include "dbinc/mp.h" -#include "dbinc/btree.h" -#include "dbinc/fop.h" - -static db_pgno_t __ham_init_meta __P((DB *, HMETA *, db_pgno_t, DB_LSN *)); - -/* - * __ham_open -- - * - * PUBLIC: int __ham_open __P((DB *, - * PUBLIC: DB_TXN *, const char * name, db_pgno_t, u_int32_t)); - */ -int -__ham_open(dbp, txn, name, base_pgno, flags) - DB *dbp; - DB_TXN *txn; - const char *name; - db_pgno_t base_pgno; - u_int32_t flags; -{ - DB_ENV *dbenv; - DBC *dbc; - HASH_CURSOR *hcp; - HASH *hashp; - int ret, t_ret; - - dbenv = dbp->dbenv; - dbc = NULL; - - /* - * Get a cursor. If DB_CREATE is specified, we may be creating - * pages, and to do that safely in CDB we need a write cursor. - * In STD_LOCKING mode, we'll synchronize using the meta page - * lock instead. - */ - if ((ret = __db_cursor(dbp, - txn, &dbc, LF_ISSET(DB_CREATE) && CDB_LOCKING(dbenv) ? - DB_WRITECURSOR : 0)) != 0) - return (ret); - - hcp = (HASH_CURSOR *)dbc->internal; - hashp = dbp->h_internal; - hashp->meta_pgno = base_pgno; - if ((ret = __ham_get_meta(dbc)) != 0) - goto err; - - /* Initialize the hdr structure. */ - if (hcp->hdr->dbmeta.magic == DB_HASHMAGIC) { - /* File exists, verify the data in the header. */ - if (hashp->h_hash == NULL) - hashp->h_hash = hcp->hdr->dbmeta.version < 5 - ? __ham_func4 : __ham_func5; - hashp->h_nelem = hcp->hdr->nelem; - if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUP)) - F_SET(dbp, DB_AM_DUP); - if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_DUPSORT)) - F_SET(dbp, DB_AM_DUPSORT); - if (F_ISSET(&hcp->hdr->dbmeta, DB_HASH_SUBDB)) - F_SET(dbp, DB_AM_SUBDB); - } else if (!IS_RECOVERING(dbenv) && !F_ISSET(dbp, DB_AM_RECOVER)) { - __db_errx(dbenv, - "%s: Invalid hash meta page %lu", name, (u_long)base_pgno); - ret = EINVAL; - } - - /* Release the meta data page */ - if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; -err: if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __ham_metachk -- - * - * PUBLIC: int __ham_metachk __P((DB *, const char *, HMETA *)); - */ -int -__ham_metachk(dbp, name, hashm) - DB *dbp; - const char *name; - HMETA *hashm; -{ - DB_ENV *dbenv; - u_int32_t vers; - int ret; - - dbenv = dbp->dbenv; - - /* - * At this point, all we know is that the magic number is for a Hash. - * Check the version, the database may be out of date. - */ - vers = hashm->dbmeta.version; - if (F_ISSET(dbp, DB_AM_SWAP)) - M_32_SWAP(vers); - switch (vers) { - case 4: - case 5: - case 6: - __db_errx(dbenv, - "%s: hash version %lu requires a version upgrade", - name, (u_long)vers); - return (DB_OLD_VERSION); - case 7: - case 8: - case 9: - break; - default: - __db_errx(dbenv, - "%s: unsupported hash version: %lu", name, (u_long)vers); - return (EINVAL); - } - - /* Swap the page if we need to. */ - if (F_ISSET(dbp, DB_AM_SWAP) && (ret = __ham_mswap((PAGE *)hashm)) != 0) - return (ret); - - /* Check the type. */ - if (dbp->type != DB_HASH && dbp->type != DB_UNKNOWN) - return (EINVAL); - dbp->type = DB_HASH; - DB_ILLEGAL_METHOD(dbp, DB_OK_HASH); - - /* - * Check application info against metadata info, and set info, flags, - * and type based on metadata info. - */ - if ((ret = __db_fchk(dbenv, - "DB->open", hashm->dbmeta.flags, - DB_HASH_DUP | DB_HASH_SUBDB | DB_HASH_DUPSORT)) != 0) - return (ret); - - if (F_ISSET(&hashm->dbmeta, DB_HASH_DUP)) - F_SET(dbp, DB_AM_DUP); - else - if (F_ISSET(dbp, DB_AM_DUP)) { - __db_errx(dbenv, - "%s: DB_DUP specified to open method but not set in database", - name); - return (EINVAL); - } - - if (F_ISSET(&hashm->dbmeta, DB_HASH_SUBDB)) - F_SET(dbp, DB_AM_SUBDB); - else - if (F_ISSET(dbp, DB_AM_SUBDB)) { - __db_errx(dbenv, - "%s: multiple databases specified but not supported in file", - name); - return (EINVAL); - } - - if (F_ISSET(&hashm->dbmeta, DB_HASH_DUPSORT)) { - if (dbp->dup_compare == NULL) - dbp->dup_compare = __bam_defcmp; - } else - if (dbp->dup_compare != NULL) { - __db_errx(dbenv, - "%s: duplicate sort function specified but not set in database", - name); - return (EINVAL); - } - - /* Set the page size. */ - dbp->pgsize = hashm->dbmeta.pagesize; - - /* Copy the file's ID. */ - memcpy(dbp->fileid, hashm->dbmeta.uid, DB_FILE_ID_LEN); - - return (0); -} - -/* - * __ham_init_meta -- - * - * Initialize a hash meta-data page. We assume that the meta-data page is - * contiguous with the initial buckets that we create. If that turns out - * to be false, we'll fix it up later. Return the initial number of buckets - * allocated. - */ -static db_pgno_t -__ham_init_meta(dbp, meta, pgno, lsnp) - DB *dbp; - HMETA *meta; - db_pgno_t pgno; - DB_LSN *lsnp; -{ - DB_ENV *dbenv; - HASH *hashp; - db_pgno_t nbuckets; - u_int i, l2; - - dbenv = dbp->dbenv; - hashp = dbp->h_internal; - - if (hashp->h_hash == NULL) - hashp->h_hash = DB_HASHVERSION < 5 ? __ham_func4 : __ham_func5; - - if (hashp->h_nelem != 0 && hashp->h_ffactor != 0) { - hashp->h_nelem = (hashp->h_nelem - 1) / hashp->h_ffactor + 1; - l2 = __db_log2(hashp->h_nelem > 2 ? hashp->h_nelem : 2); - } else - l2 = 1; - nbuckets = (db_pgno_t)(1 << l2); - - memset(meta, 0, sizeof(HMETA)); - meta->dbmeta.lsn = *lsnp; - meta->dbmeta.pgno = pgno; - meta->dbmeta.magic = DB_HASHMAGIC; - meta->dbmeta.version = DB_HASHVERSION; - meta->dbmeta.pagesize = dbp->pgsize; - if (F_ISSET(dbp, DB_AM_CHKSUM)) - FLD_SET(meta->dbmeta.metaflags, DBMETA_CHKSUM); - if (F_ISSET(dbp, DB_AM_ENCRYPT)) { - meta->dbmeta.encrypt_alg = - ((DB_CIPHER *)dbenv->crypto_handle)->alg; - DB_ASSERT(dbenv, meta->dbmeta.encrypt_alg != 0); - meta->crypto_magic = meta->dbmeta.magic; - } - meta->dbmeta.type = P_HASHMETA; - meta->dbmeta.free = PGNO_INVALID; - meta->dbmeta.last_pgno = pgno; - meta->max_bucket = nbuckets - 1; - meta->high_mask = nbuckets - 1; - meta->low_mask = (nbuckets >> 1) - 1; - meta->ffactor = hashp->h_ffactor; - meta->nelem = hashp->h_nelem; - meta->h_charkey = hashp->h_hash(dbp, CHARKEY, sizeof(CHARKEY)); - memcpy(meta->dbmeta.uid, dbp->fileid, DB_FILE_ID_LEN); - - if (F_ISSET(dbp, DB_AM_DUP)) - F_SET(&meta->dbmeta, DB_HASH_DUP); - if (F_ISSET(dbp, DB_AM_SUBDB)) - F_SET(&meta->dbmeta, DB_HASH_SUBDB); - if (dbp->dup_compare != NULL) - F_SET(&meta->dbmeta, DB_HASH_DUPSORT); - - /* - * Create the first and second buckets pages so that we have the - * page numbers for them and we can store that page number in the - * meta-data header (spares[0]). - */ - meta->spares[0] = pgno + 1; - - /* Fill in the last fields of the meta data page. */ - for (i = 1; i <= l2; i++) - meta->spares[i] = meta->spares[0]; - for (; i < NCACHED; i++) - meta->spares[i] = PGNO_INVALID; - - return (nbuckets); -} - -/* - * __ham_new_file -- - * Create the necessary pages to begin a new database file. If name - * is NULL, then this is an unnamed file, the mpf has been set in the dbp - * and we simply create the pages using mpool. In this case, we don't log - * because we never have to redo an unnamed create and the undo simply - * frees resources. - * - * This code appears more complex than it is because of the two cases (named - * and unnamed). The way to read the code is that for each page being created, - * there are three parts: 1) a "get page" chunk (which either uses malloc'd - * memory or calls __memp_fget), 2) the initialization, and 3) the "put page" - * chunk which either does a fop write or an __memp_fput. - * - * PUBLIC: int __ham_new_file __P((DB *, DB_TXN *, DB_FH *, const char *)); - */ -int -__ham_new_file(dbp, txn, fhp, name) - DB *dbp; - DB_TXN *txn; - DB_FH *fhp; - const char *name; -{ - DB_ENV *dbenv; - DB_LSN lsn; - DB_MPOOLFILE *mpf; - DB_PGINFO pginfo; - DBT pdbt; - HMETA *meta; - PAGE *page; - int ret; - db_pgno_t lpgno; - void *buf; - - dbenv = dbp->dbenv; - mpf = dbp->mpf; - meta = NULL; - page = NULL; - buf = NULL; - - if (F_ISSET(dbp, DB_AM_INMEM)) { - /* Build meta-data page. */ - lpgno = PGNO_BASE_MD; - if ((ret = __memp_fget(mpf, &lpgno, txn, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &meta)) != 0) - return (ret); - LSN_NOT_LOGGED(lsn); - lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); - meta->dbmeta.last_pgno = lpgno; - if ((ret = __db_log_page(dbp, - txn, &lsn, meta->dbmeta.pgno, (PAGE *)meta)) != 0) - goto err; - ret = __memp_fput(mpf, meta, dbp->priority); - meta = NULL; - if (ret != 0) - goto err; - - /* Allocate the final hash bucket. */ - if ((ret = __memp_fget(mpf, &lpgno, txn, - DB_MPOOL_CREATE, &page)) != 0) - goto err; - P_INIT(page, - dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - LSN_NOT_LOGGED(page->lsn); - if ((ret = - __db_log_page(dbp, txn, &page->lsn, lpgno, page)) != 0) - goto err; - ret = __memp_fput(mpf, page, dbp->priority); - page = NULL; - if (ret != 0) - goto err; - } else { - memset(&pdbt, 0, sizeof(pdbt)); - - /* Build meta-data page. */ - pginfo.db_pagesize = dbp->pgsize; - pginfo.type = dbp->type; - pginfo.flags = - F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); - pdbt.data = &pginfo; - pdbt.size = sizeof(pginfo); - if ((ret = __os_calloc(dbp->dbenv, 1, dbp->pgsize, &buf)) != 0) - return (ret); - meta = (HMETA *)buf; - LSN_NOT_LOGGED(lsn); - lpgno = __ham_init_meta(dbp, meta, PGNO_BASE_MD, &lsn); - meta->dbmeta.last_pgno = lpgno; - if ((ret = __db_pgout(dbenv, PGNO_BASE_MD, meta, &pdbt)) != 0) - goto err; - if ((ret = __fop_write(dbenv, txn, name, DB_APP_DATA, fhp, - dbp->pgsize, 0, 0, buf, dbp->pgsize, 1, F_ISSET( - dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) - goto err; - meta = NULL; - - /* Allocate the final hash bucket. */ -#ifdef DIAGNOSTIC - memset(buf, 0, dbp->pgsize); -#endif - page = (PAGE *)buf; - P_INIT(page, - dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - LSN_NOT_LOGGED(page->lsn); - if ((ret = __db_pgout(dbenv, lpgno, buf, &pdbt)) != 0) - goto err; - if ((ret = __fop_write(dbenv, txn, name, DB_APP_DATA, fhp, - dbp->pgsize, lpgno, 0, buf, dbp->pgsize, 1, F_ISSET( - dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0)) != 0) - goto err; - page = NULL; - } - -err: if (buf != NULL) - __os_free(dbenv, buf); - else { - if (meta != NULL) - (void)__memp_fput(mpf, meta, dbp->priority); - if (page != NULL) - (void)__memp_fput(mpf, page, dbp->priority); - } - return (ret); -} - -/* - * __ham_new_subdb -- - * Create the necessary pages to begin a new subdatabase. - * - * PUBLIC: int __ham_new_subdb __P((DB *, DB *, DB_TXN *)); - */ -int -__ham_new_subdb(mdbp, dbp, txn) - DB *mdbp, *dbp; - DB_TXN *txn; -{ - DBC *dbc; - DB_ENV *dbenv; - DB_LOCK metalock, mmlock; - DB_LSN lsn; - DB_MPOOLFILE *mpf; - DBMETA *mmeta; - HMETA *meta; - PAGE *h; - int i, ret, t_ret; - db_pgno_t lpgno, mpgno; - - dbenv = mdbp->dbenv; - mpf = mdbp->mpf; - dbc = NULL; - meta = NULL; - mmeta = NULL; - LOCK_INIT(metalock); - LOCK_INIT(mmlock); - - if ((ret = __db_cursor(mdbp, txn, - &dbc, CDB_LOCKING(dbenv) ? DB_WRITECURSOR : 0)) != 0) - return (ret); - - /* Get and lock the new meta data page. */ - if ((ret = __db_lget(dbc, - 0, dbp->meta_pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - if ((ret = __memp_fget(mpf, &dbp->meta_pgno, dbc->txn, - DB_MPOOL_CREATE, &meta)) != 0) - goto err; - - /* Initialize the new meta-data page. */ - lsn = meta->dbmeta.lsn; - lpgno = __ham_init_meta(dbp, meta, dbp->meta_pgno, &lsn); - - /* - * We are about to allocate a set of contiguous buckets (lpgno - * worth). We need to get the master meta-data page to figure - * out where these pages are and to allocate them. So, lock and - * get the master meta data page. - */ - mpgno = PGNO_BASE_MD; - if ((ret = __db_lget(dbc, 0, mpgno, DB_LOCK_WRITE, 0, &mmlock)) != 0) - goto err; - if ((ret = __memp_fget(mpf, &mpgno, dbc->txn, - DB_MPOOL_DIRTY, &mmeta)) != 0) - goto err; - - /* - * Now update the hash meta-data page to reflect where the first - * set of buckets are actually located. - */ - meta->spares[0] = mmeta->last_pgno + 1; - for (i = 0; i < NCACHED && meta->spares[i] != PGNO_INVALID; i++) - meta->spares[i] = meta->spares[0]; - - /* The new meta data page is now complete; log it. */ - if ((ret = __db_log_page(mdbp, - txn, &meta->dbmeta.lsn, dbp->meta_pgno, (PAGE *)meta)) != 0) - goto err; - - /* Reflect the group allocation. */ - if (DBENV_LOGGING(dbenv) -#if !defined(DEBUG_WOP) - && txn != NULL -#endif - ) - if ((ret = __ham_groupalloc_log(mdbp, txn, - &LSN(mmeta), 0, &LSN(mmeta), meta->spares[0], - meta->max_bucket + 1, 0, mmeta->last_pgno)) != 0) - goto err; - - /* Release the new meta-data page. */ - if ((ret = __memp_fput(mpf, meta, dbc->priority)) != 0) - goto err; - meta = NULL; - - lpgno += mmeta->last_pgno; - - /* Now allocate the final hash bucket. */ - if ((ret = __memp_fget(mpf, &lpgno, dbc->txn, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &h)) != 0) - goto err; - - mmeta->last_pgno = lpgno; - P_INIT(h, dbp->pgsize, lpgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - LSN(h) = LSN(mmeta); - if ((ret = __memp_fput(mpf, h, dbc->priority)) != 0) - goto err; - -err: /* Now put the master-metadata page back. */ - if (mmeta != NULL && (t_ret = __memp_fput(mpf, - mmeta, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __LPUT(dbc, mmlock)) != 0 && ret == 0) - ret = t_ret; - if (meta != NULL && - (t_ret = __memp_fput(mpf, meta, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) - ret = t_ret; - if (dbc != NULL) - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} diff --git a/db/hash/hash_page.c b/db/hash/hash_page.c deleted file mode 100644 index 9ee48c44d..000000000 --- a/db/hash/hash_page.c +++ /dev/null @@ -1,2633 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id: hash_page.c,v 12.43 2007/05/18 13:58:31 bostic Exp $ - */ - -/* - * PACKAGE: hashing - * - * DESCRIPTION: - * Page manipulation for hashing package. - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/mp.h" - -static int __hamc_delpg - __P((DBC *, db_pgno_t, db_pgno_t, u_int32_t, db_ham_mode, u_int32_t *)); -static int __ham_getindex_sorted __P((DB *, - DB_TXN *, PAGE *, const DBT *, int, int *, db_indx_t *)); -static int __ham_getindex_unsorted __P((DB *, - DB_TXN *, PAGE *, const DBT *, int *, db_indx_t *)); -static int __ham_sort_page_cursor __P((DBC *, PAGE *)); - -/* - * PUBLIC: int __ham_item __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - DB *dbp; - HASH_CURSOR *hcp; - db_pgno_t next_pgno; - int ret; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - - if (F_ISSET(hcp, H_DELETED)) { - __db_errx(dbp->dbenv, "Attempt to return a deleted item"); - return (EINVAL); - } - F_CLR(hcp, H_OK | H_NOMORE); - - /* Check if we need to get a page for this cursor. */ - if ((ret = __ham_get_cpage(dbc, mode)) != 0) - return (ret); - -recheck: - /* Check if we are looking for space in which to insert an item. */ - if (hcp->seek_size != 0 && hcp->seek_found_page == PGNO_INVALID && - hcp->seek_size < P_FREESPACE(dbp, hcp->page)) { - hcp->seek_found_page = hcp->pgno; - hcp->seek_found_indx = NDX_INVALID; - } - - /* Check for off-page duplicates. */ - if (hcp->indx < NUM_ENT(hcp->page) && - HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == H_OFFDUP) { - memcpy(pgnop, - HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), - sizeof(db_pgno_t)); - F_SET(hcp, H_OK); - return (0); - } - - /* Check if we need to go on to the next page. */ - if (F_ISSET(hcp, H_ISDUP)) - /* - * ISDUP is set, and offset is at the beginning of the datum. - * We need to grab the length of the datum, then set the datum - * pointer to be the beginning of the datum. - */ - memcpy(&hcp->dup_len, - HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)) + - hcp->dup_off, sizeof(db_indx_t)); - - if (hcp->indx >= (db_indx_t)NUM_ENT(hcp->page)) { - /* Fetch next page. */ - if (NEXT_PGNO(hcp->page) == PGNO_INVALID) { - F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); - } - next_pgno = NEXT_PGNO(hcp->page); - hcp->indx = 0; - if ((ret = __ham_next_cpage(dbc, next_pgno)) != 0) - return (ret); - goto recheck; - } - - F_SET(hcp, H_OK); - return (0); -} - -/* - * PUBLIC: int __ham_item_reset __P((DBC *)); - */ -int -__ham_item_reset(dbc) - DBC *dbc; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - int ret, t_ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - ret = 0; - if (hcp->page != NULL) - ret = __memp_fput(mpf, hcp->page, dbc->priority); - - if ((t_ret = __ham_item_init(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * PUBLIC: int __ham_item_init __P((DBC *)); - */ -int -__ham_item_init(dbc) - DBC *dbc; -{ - HASH_CURSOR *hcp; - int ret; - - hcp = (HASH_CURSOR *)dbc->internal; - - /* - * If this cursor still holds any locks, we must release them if - * we are not running with transactions. - */ - ret = __TLPUT(dbc, hcp->lock); - - /* - * The following fields must *not* be initialized here because they - * may have meaning across inits. - * hlock, hdr, split_buf, stats - */ - hcp->bucket = BUCKET_INVALID; - hcp->lbucket = BUCKET_INVALID; - LOCK_INIT(hcp->lock); - hcp->lock_mode = DB_LOCK_NG; - hcp->dup_off = 0; - hcp->dup_len = 0; - hcp->dup_tlen = 0; - hcp->seek_size = 0; - hcp->seek_found_page = PGNO_INVALID; - hcp->seek_found_indx = NDX_INVALID; - hcp->flags = 0; - - hcp->pgno = PGNO_INVALID; - hcp->indx = NDX_INVALID; - hcp->page = NULL; - - return (ret); -} - -/* - * Returns the last item in a bucket. - * - * PUBLIC: int __ham_item_last __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item_last(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - HASH_CURSOR *hcp; - int ret; - - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_item_reset(dbc)) != 0) - return (ret); - - hcp->bucket = hcp->hdr->max_bucket; - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - F_SET(hcp, H_OK); - return (__ham_item_prev(dbc, mode, pgnop)); -} - -/* - * PUBLIC: int __ham_item_first __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item_first(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - HASH_CURSOR *hcp; - int ret; - - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_item_reset(dbc)) != 0) - return (ret); - F_SET(hcp, H_OK); - hcp->bucket = 0; - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - return (__ham_item_next(dbc, mode, pgnop)); -} - -/* - * __ham_item_prev -- - * Returns a pointer to key/data pair on a page. In the case of - * bigkeys, just returns the page number and index of the bigkey - * pointer pair. - * - * PUBLIC: int __ham_item_prev __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item_prev(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - DB *dbp; - HASH_CURSOR *hcp; - db_pgno_t next_pgno; - int ret; - - hcp = (HASH_CURSOR *)dbc->internal; - dbp = dbc->dbp; - - /* - * There are 5 cases for backing up in a hash file. - * Case 1: In the middle of a page, no duplicates, just dec the index. - * Case 2: In the middle of a duplicate set, back up one. - * Case 3: At the beginning of a duplicate set, get out of set and - * back up to next key. - * Case 4: At the beginning of a page; go to previous page. - * Case 5: At the beginning of a bucket; go to prev bucket. - */ - F_CLR(hcp, H_OK | H_NOMORE | H_DELETED); - - if ((ret = __ham_get_cpage(dbc, mode)) != 0) - return (ret); - - /* - * First handle the duplicates. Either you'll get the key here - * or you'll exit the duplicate set and drop into the code below - * to handle backing up through keys. - */ - if (!F_ISSET(hcp, H_NEXT_NODUP) && F_ISSET(hcp, H_ISDUP)) { - if (HPAGE_TYPE(dbp, hcp->page, H_DATAINDEX(hcp->indx)) == - H_OFFDUP) { - memcpy(pgnop, - HOFFDUP_PGNO(H_PAIRDATA(dbp, hcp->page, hcp->indx)), - sizeof(db_pgno_t)); - F_SET(hcp, H_OK); - return (0); - } - - /* Duplicates are on-page. */ - if (hcp->dup_off != 0) { - memcpy(&hcp->dup_len, HKEYDATA_DATA( - H_PAIRDATA(dbp, hcp->page, hcp->indx)) - + hcp->dup_off - sizeof(db_indx_t), - sizeof(db_indx_t)); - hcp->dup_off -= - DUP_SIZE(hcp->dup_len); - return (__ham_item(dbc, mode, pgnop)); - } - } - - /* - * If we get here, we are not in a duplicate set, and just need - * to back up the cursor. There are still three cases: - * midpage, beginning of page, beginning of bucket. - */ - - if (F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } else - /* - * We are no longer in a dup set; flag this so the dup code - * will reinitialize should we stumble upon another one. - */ - F_CLR(hcp, H_ISDUP); - - if (hcp->indx == 0) { /* Beginning of page. */ - hcp->pgno = PREV_PGNO(hcp->page); - if (hcp->pgno == PGNO_INVALID) { - /* Beginning of bucket. */ - F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); - } else if ((ret = - __ham_next_cpage(dbc, hcp->pgno)) != 0) - return (ret); - else - hcp->indx = NUM_ENT(hcp->page); - } - - /* - * Either we've got the cursor set up to be decremented, or we - * have to find the end of a bucket. - */ - if (hcp->indx == NDX_INVALID) { - DB_ASSERT(dbp->dbenv, hcp->page != NULL); - - hcp->indx = NUM_ENT(hcp->page); - for (next_pgno = NEXT_PGNO(hcp->page); - next_pgno != PGNO_INVALID; - next_pgno = NEXT_PGNO(hcp->page)) { - if ((ret = __ham_next_cpage(dbc, next_pgno)) != 0) - return (ret); - hcp->indx = NUM_ENT(hcp->page); - } - - if (hcp->indx == 0) { - /* Bucket was empty. */ - F_SET(hcp, H_NOMORE); - return (DB_NOTFOUND); - } - } - - hcp->indx -= 2; - - return (__ham_item(dbc, mode, pgnop)); -} - -/* - * Sets the cursor to the next key/data pair on a page. - * - * PUBLIC: int __ham_item_next __P((DBC *, db_lockmode_t, db_pgno_t *)); - */ -int -__ham_item_next(dbc, mode, pgnop) - DBC *dbc; - db_lockmode_t mode; - db_pgno_t *pgnop; -{ - HASH_CURSOR *hcp; - int ret; - - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __ham_get_cpage(dbc, mode)) != 0) - return (ret); - - /* - * Deleted on-page duplicates are a weird case. If we delete the last - * one, then our cursor is at the very end of a duplicate set and - * we actually need to go on to the next key. - */ - if (F_ISSET(hcp, H_DELETED)) { - if (hcp->indx != NDX_INVALID && - F_ISSET(hcp, H_ISDUP) && - HPAGE_TYPE(dbc->dbp, hcp->page, H_DATAINDEX(hcp->indx)) - == H_DUPLICATE && hcp->dup_tlen == hcp->dup_off) { - if (F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } else { - F_CLR(hcp, H_ISDUP); - hcp->indx += 2; - } - } else if (!F_ISSET(hcp, H_ISDUP) && F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } else if (F_ISSET(hcp, H_ISDUP) && - F_ISSET(hcp, H_NEXT_NODUP)) { - F_CLR(hcp, H_ISDUP); - hcp->indx += 2; - } - F_CLR(hcp, H_DELETED); - } else if (hcp->indx == NDX_INVALID) { - hcp->indx = 0; - F_CLR(hcp, H_ISDUP); - } else if (F_ISSET(hcp, H_NEXT_NODUP)) { - hcp->indx += 2; - F_CLR(hcp, H_ISDUP); - } else if (F_ISSET(hcp, H_ISDUP) && hcp->dup_tlen != 0) { - if (hcp->dup_off + DUP_SIZE(hcp->dup_len) >= - hcp->dup_tlen && F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } - hcp->dup_off += DUP_SIZE(hcp->dup_len); - if (hcp->dup_off >= hcp->dup_tlen) { - F_CLR(hcp, H_ISDUP); - hcp->indx += 2; - } - } else if (F_ISSET(hcp, H_DUPONLY)) { - F_CLR(hcp, H_OK); - F_SET(hcp, H_NOMORE); - return (0); - } else { - hcp->indx += 2; - F_CLR(hcp, H_ISDUP); - } - - return (__ham_item(dbc, mode, pgnop)); -} - -/* - * __ham_insertpair -- - * - * Used for adding a pair of elements to a sorted page. We are guaranteed that - * the pair will fit on this page. - * - * If an index is provided, then use it, otherwise lookup the index using - * __ham_getindex. This saves a getindex call when inserting using a cursor. - * - * We're overloading the meaning of the H_OFFPAGE type here, which is a little - * bit sleazy. When we recover deletes, we have the entire entry instead of - * having only the DBT, so we'll pass type H_OFFPAGE to mean "copy the whole - * entry" as opposed to constructing an H_KEYDATA around it. In the recovery - * case it is assumed that a valid index is passed in, since a lookup using - * the overloaded H_OFFPAGE key will be incorrect. - * - * PUBLIC: int __ham_insertpair __P((DB *, DB_TXN *, - * PUBLIC: PAGE *p, db_indx_t *indxp, const DBT *, const DBT *, int, int)); - */ -int -__ham_insertpair(dbp, txn, p, indxp, key_dbt, data_dbt, key_type, data_type) - DB *dbp; - DB_TXN *txn; - PAGE *p; - db_indx_t *indxp; - const DBT *key_dbt, *data_dbt; - int key_type, data_type; -{ - u_int16_t n, indx; - db_indx_t *inp; - u_int32_t ksize, dsize, increase, distance; - u_int8_t *offset; - int i, match, ret; - - n = NUM_ENT(p); - inp = P_INP(dbp, p); - ksize = (key_type == H_OFFPAGE) ? - key_dbt->size : HKEYDATA_SIZE(key_dbt->size); - dsize = (data_type == H_OFFPAGE) ? - data_dbt->size : HKEYDATA_SIZE(data_dbt->size); - increase = ksize + dsize; - - if (indxp != NULL && *indxp != NDX_INVALID) - indx = *indxp; - else { - if ((ret = __ham_getindex(dbp, txn, p, key_dbt, - key_type, &match, &indx)) != 0) - return (ret); - /* Save the index for the caller */ - if (indxp != NULL) - *indxp = indx; - /* It is an error to insert a duplicate key */ - DB_ASSERT(dbp->dbenv, match != 0); - } - - /* Special case if the page is empty or inserting at end of page.*/ - if (n == 0 || indx == n) { - inp[indx] = HOFFSET(p) - ksize; - inp[indx+1] = HOFFSET(p) - increase; - } else { - /* - * Shuffle the data elements. - * - * For example, inserting an element that sorts between items - * 2 and 3 on a page: - * The copy starts from the beginning of the second item. - * - * --------------------------- - * |pgheader.. - * |__________________________ - * ||1|2|3|4|... - * |-------------------------- - * | - * |__________________________ - * | ...|4|3|2|1| - * |-------------------------- - * --------------------------- - * - * Becomes: - * - * --------------------------- - * |pgheader.. - * |__________________________ - * ||1|2|2a|3|4|... - * |-------------------------- - * | - * |__________________________ - * | ...|4|3|2a|2|1| - * |-------------------------- - * --------------------------- - * - * Index's 3,4 etc move down the page. - * The data for 3,4,etc moves up the page by sizeof(2a) - * The index pointers in 3,4 etc are updated to point at the - * relocated data. - * It is necessary to move the data (not just adjust the index) - * since the hash format uses consecutive data items to - * dynamically calculate the item size. - * An item in this example is a key/data pair. - */ - offset = (u_int8_t *)p + HOFFSET(p); - if (indx == 0) - distance = dbp->pgsize - HOFFSET(p); - else - distance = (u_int32_t) - (P_ENTRY(dbp, p, indx - 1) - offset); - memmove(offset - increase, offset, distance); - - /* Shuffle the index array */ - memmove(&inp[indx + 2], &inp[indx], - (n - indx) * sizeof(db_indx_t)); - - /* update the index array */ - for (i = indx + 2; i < n + 2; i++) - inp[i] -= increase; - - /* set the new index elements. */ - inp[indx] = (HOFFSET(p) - increase) + distance + dsize; - inp[indx + 1] = (HOFFSET(p) - increase) + distance; - } - - HOFFSET(p) -= increase; - /* insert the new elements */ - if (key_type == H_OFFPAGE) - memcpy(P_ENTRY(dbp, p, indx), key_dbt->data, key_dbt->size); - else - PUT_HKEYDATA(P_ENTRY(dbp, p, indx), key_dbt->data, - key_dbt->size, key_type); - if (data_type == H_OFFPAGE) - memcpy(P_ENTRY(dbp, p, indx+1), data_dbt->data, - data_dbt->size); - else - PUT_HKEYDATA(P_ENTRY(dbp, p, indx+1), data_dbt->data, - data_dbt->size, data_type); - NUM_ENT(p) += 2; - - /* - * If debugging a sorted hash page problem, this is a good place to - * insert a call to __ham_verify_sorted_page. - * It used to be called when diagnostic mode was enabled, but that - * causes problems in recovery if a custom comparator was used. - */ - return (0); -} - -/* - * __hame_getindex -- - * - * The key_type parameter overloads the entry type to allow for comparison of - * a key DBT that contains off-page data. A key that is not of type H_OFFPAGE - * might contain data larger than the page size, since this routine can be - * called with user-provided DBTs. - * - * PUBLIC: int __ham_getindex __P((DB *, - * PUBLIC: DB_TXN *, PAGE *, const DBT *, int, int *, db_indx_t *)); - */ -int -__ham_getindex(dbp, txn, p, key, key_type, match, indx) - DB *dbp; - DB_TXN *txn; - PAGE *p; - const DBT *key; - int key_type, *match; - db_indx_t *indx; -{ - /* Since all entries are key/data pairs. */ - DB_ASSERT(dbp->dbenv, NUM_ENT(p)%2 == 0 ); - - /* Support pre 4.6 unsorted hash pages. */ - if (p->type == P_HASH_UNSORTED) - return (__ham_getindex_unsorted( - dbp, txn, p, key, match, indx)); - else - return (__ham_getindex_sorted( - dbp, txn, p, key, key_type, match, indx)); -} - -#undef min -#define min(a, b) (((a) < (b)) ? (a) : (b)) - -/* - * Perform a linear search of an unsorted (pre 4.6 format) hash page. - * - * This routine is never used to generate an index for insertion, because any - * unsorted page is sorted before we insert. - * - * Returns 0 if an exact match is found, with indx set to requested elem. - * Returns 1 if the item did not exist, indx is set to the last element on the - * page. - */ -static int -__ham_getindex_unsorted(dbp, txn, p, key, match, indx) - DB *dbp; - DB_TXN *txn; - PAGE *p; - const DBT *key; - int *match; - db_indx_t *indx; -{ - DBT pg_dbt; - HASH *t; - db_pgno_t pgno; - int i, n, res, ret; - u_int32_t tlen; - u_int8_t *hk; - - n = NUM_ENT(p); - t = dbp->h_internal; - res = 1; - - /* Do a linear search over the page looking for an exact match */ - for (i = 0; i < n; i+=2) { - hk = H_PAIRKEY(dbp, p, i); - switch (HPAGE_PTYPE(hk)) { - case H_OFFPAGE: - /* extract item length from possibly unaligned DBT */ - memcpy(&tlen, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - if (tlen == key->size) { - memcpy(&pgno, - HOFFPAGE_PGNO(hk), sizeof(db_pgno_t)); - if ((ret = __db_moff(dbp, txn, key, - pgno, tlen, t->h_compare, &res)) != 0) - return (ret); - } - break; - case H_KEYDATA: - if (t->h_compare != NULL) { - DB_INIT_DBT(pg_dbt, - HKEYDATA_DATA(hk), key->size); - if (t->h_compare( - dbp, key, &pg_dbt) != 0) - break; - } else if (key->size == - LEN_HKEY(dbp, p, dbp->pgsize, i)) - res = memcmp(key->data, HKEYDATA_DATA(hk), - key->size); - break; - case H_DUPLICATE: - case H_OFFDUP: - /* - * These are errors because keys are never duplicated. - */ - /* FALLTHROUGH */ - default: - return (__db_pgfmt(dbp->dbenv, PGNO(p))); - } - if (res == 0) - break; - } - *indx = i; - *match = (res == 0 ? 0 : 1); - return (0); -} - -/* - * Perform a binary search of a sorted hash page for a key. - * Return 0 if an exact match is found, with indx set to requested elem. - * Return 1 if the item did not exist, indx will be set to the first element - * greater than the requested item. - */ -static int -__ham_getindex_sorted(dbp, txn, p, key, key_type, match, indxp) - DB *dbp; - DB_TXN *txn; - PAGE *p; - const DBT *key; - int key_type, *match; - db_indx_t *indxp; -{ - DBT tmp_dbt; - HASH *t; - HOFFPAGE *offp; - db_indx_t indx; - db_pgno_t off_pgno, koff_pgno; - u_int32_t base, itemlen, lim, off_len; - u_int8_t *entry; - int res, ret; - void *data; - - DB_ASSERT(dbp->dbenv, p->type == P_HASH ); - - t = dbp->h_internal; - /* Initialize so the return params are correct for empty pages. */ - res = indx = 0; - - /* Do a binary search for the element. */ - DB_BINARY_SEARCH_FOR(base, lim, p, 2) { - DB_BINARY_SEARCH_INCR(indx, base, lim, 2); - data = HKEYDATA_DATA(H_PAIRKEY(dbp, p, indx)); - /* - * There are 4 cases here: - * 1) Off page key, off page match - * 2) Off page key, on page match - * 3) On page key, off page match - * 4) On page key, on page match - */ - entry = P_ENTRY(dbp, p, indx); - if (*entry == H_OFFPAGE) { - offp = (HOFFPAGE*)P_ENTRY(dbp, p, indx); - memcpy(&itemlen, HOFFPAGE_TLEN(offp), - sizeof(u_int32_t)); - if (key_type == H_OFFPAGE) { - /* - * Case 1. - * - * If both key and cmp DBTs refer to different - * offpage items, it is necessary to compare - * the content of the entries, in order to be - * able to maintain a valid lexicographic sort - * order. - */ - memcpy(&koff_pgno, HOFFPAGE_PGNO(key->data), - sizeof(db_pgno_t)); - memcpy(&off_pgno, HOFFPAGE_PGNO(offp), - sizeof(db_pgno_t)); - if (koff_pgno == off_pgno) - res = 0; - else { - memset(&tmp_dbt, 0, sizeof(tmp_dbt)); - tmp_dbt.size = HOFFPAGE_SIZE; - tmp_dbt.data = offp; - if ((ret = __db_coff(dbp, txn, key, - &tmp_dbt, t->h_compare, &res)) != 0) - return (ret); - } - } else { - /* Case 2 */ - memcpy(&off_pgno, HOFFPAGE_PGNO(offp), - sizeof(db_pgno_t)); - if ((ret = __db_moff(dbp, txn, key, off_pgno, - itemlen, t->h_compare, &res)) != 0) - return (ret); - } - } else { - itemlen = LEN_HKEYDATA(dbp, p, dbp->pgsize, indx); - if (key_type == H_OFFPAGE) { - /* Case 3 */ - tmp_dbt.data = data; - tmp_dbt.size = itemlen; - offp = (HOFFPAGE *)key->data; - memcpy(&off_pgno, HOFFPAGE_PGNO(offp), - sizeof(db_pgno_t)); - memcpy(&off_len, HOFFPAGE_TLEN(offp), - sizeof(u_int32_t)); - if ((ret = __db_moff(dbp, txn, &tmp_dbt, - off_pgno, off_len, t->h_compare, - &res)) != 0) - return (ret); - /* - * Since we switched the key/match parameters - * in the __db_moff call, the result needs to - * be inverted. - */ - res = -res; - } else if (t->h_compare != NULL) { - /* Case 4, with a user comparison func */ - DB_INIT_DBT(tmp_dbt, data, itemlen); - res = t->h_compare(dbp, key, &tmp_dbt); - } else { - /* Case 4, without a user comparison func */ - if ((res = memcmp(key->data, data, - min(key->size, itemlen))) == 0) - res = itemlen > key->size ? 1 : - (itemlen < key->size ? -1 : 0); - } - } - if (res == 0) { - /* Found a match */ - *indxp = indx; - *match = 0; - return (0); - } else if (res > 0) - DB_BINARY_SEARCH_SHIFT_BASE(indx, base, lim, 2); - } - /* - * If no match was found, and the comparison indicates that the - * closest match was lexicographically less than the input key adjust - * the insertion index to be after the index of the closest match. - */ - if (res > 0) - indx += 2; - *indxp = indx; - *match = 1; - return (0); -} - -/* - * PUBLIC: int __ham_verify_sorted_page __P((DB *, DB_TXN *, PAGE *)); - * - * The__ham_verify_sorted_page function is used to determine the correctness - * of sorted hash pages. The checks are used by verification, they are - * implemented in the hash code because they are also useful debugging aids. - */ -int -__ham_verify_sorted_page (dbp, txn, p) - DB *dbp; - DB_TXN *txn; - PAGE *p; -{ - DB_ENV *dbenv; - DBT prev_dbt, curr_dbt; - HASH *t; - db_pgno_t tpgno; - u_int32_t curr_len, prev_len, tlen; - u_int16_t *indxp; - db_indx_t i, n; - int res, ret; - char *prev, *curr; - - /* Validate that next, prev pointers are OK */ - n = NUM_ENT(p); - DB_ASSERT(dbp->dbenv, n%2 == 0 ); - - dbenv = dbp->dbenv; - t = dbp->h_internal; - - /* Disable verification if a custom comparator is supplied */ - if (t->h_compare != NULL) - return (0); - - /* Iterate through page, ensuring order */ - prev = (char *)HKEYDATA_DATA(H_PAIRKEY(dbp, p, 0)); - prev_len = LEN_HKEYDATA(dbp, p, dbp->pgsize, 0); - for (i = 2; i < n; i+=2) { - curr = (char *)HKEYDATA_DATA(H_PAIRKEY(dbp, p, i)); - curr_len = LEN_HKEYDATA(dbp, p, dbp->pgsize, i); - - if (HPAGE_TYPE(dbp, p, i-2) == H_OFFPAGE && - HPAGE_TYPE(dbp, p, i) == H_OFFPAGE) { - memset(&prev_dbt, 0, sizeof(prev_dbt)); - memset(&curr_dbt, 0, sizeof(curr_dbt)); - prev_dbt.size = curr_dbt.size = HOFFPAGE_SIZE; - prev_dbt.data = H_PAIRKEY(dbp, p, i-2); - curr_dbt.data = H_PAIRKEY(dbp, p, i); - if ((ret = __db_coff(dbp, txn, - &prev_dbt, &curr_dbt, t->h_compare, &res)) != 0) - return (ret); - } else if (HPAGE_TYPE(dbp, p, i-2) == H_OFFPAGE) { - memset(&curr_dbt, 0, sizeof(curr_dbt)); - curr_dbt.size = curr_len; - curr_dbt.data = H_PAIRKEY(dbp, p, i); - memcpy(&tlen, HOFFPAGE_TLEN(H_PAIRKEY(dbp, p, i-2)), - sizeof(u_int32_t)); - memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i-2)), - sizeof(db_pgno_t)); - if ((ret = __db_moff(dbp, txn, - &curr_dbt, tpgno, tlen, t->h_compare, &res)) != 0) - return (ret); - } else if (HPAGE_TYPE(dbp, p, i) == H_OFFPAGE) { - memset(&prev_dbt, 0, sizeof(prev_dbt)); - prev_dbt.size = prev_len; - prev_dbt.data = H_PAIRKEY(dbp, p, i); - memcpy(&tlen, HOFFPAGE_TLEN(H_PAIRKEY(dbp, p, i)), - sizeof(u_int32_t)); - memcpy(&tpgno, HOFFPAGE_PGNO(H_PAIRKEY(dbp, p, i)), - sizeof(db_pgno_t)); - if ((ret = __db_moff(dbp, txn, - &prev_dbt, tpgno, tlen, t->h_compare, &res)) != 0) - return (ret); - } else - res = memcmp(prev, curr, min(curr_len, prev_len)); - - if (res == 0 && curr_len > prev_len) - res = 1; - else if (res == 0 && curr_len < prev_len) - res = -1; - - if (res >= 0) { - __db_msg(dbenv, "key1: %s, key2: %s, len: %lu\n", - (char *)prev, (char *)curr, - (u_long)min(curr_len, prev_len)); - __db_msg(dbenv, "curroffset %lu\n", (u_long)i); - __db_msg(dbenv, "indexes: "); - for (i = 0; i < n; i++) { - indxp = P_INP(dbp, p) + i; - __db_msg(dbenv, "%04X, ", *indxp); - } - __db_msg(dbenv, "\n"); -#ifdef HAVE_STATISTICS - if ((ret = __db_prpage(dbp, p, DB_PR_PAGE)) != 0) - return (ret); -#endif - DB_ASSERT(dbp->dbenv, res < 0); - } - - prev = curr; - prev_len = curr_len; - } - return (0); -} - -/* - * A wrapper for the __ham_sort_page function. Implements logging and cursor - * adjustments associated with sorting a page outside of recovery/upgrade. - */ -static int -__ham_sort_page_cursor(dbc, page) - DBC *dbc; - PAGE *page; -{ - DB *dbp; - DB_LSN new_lsn; - DBT page_dbt; - HASH_CURSOR *hcp; - int ret; - - dbp = dbc->dbp; - hcp = (HASH_CURSOR *)dbc->internal; - - if (DBC_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = page; - if ((ret = __ham_splitdata_log(dbp, dbc->txn, - &new_lsn, 0, SORTPAGE, PGNO(page), - &page_dbt, &LSN(page))) != 0) - return (ret); - } else - LSN_NOT_LOGGED(new_lsn); - /* Move lsn onto page. */ - LSN(page) = new_lsn; /* Structure assignment. */ - - /* - * Invalidate the saved index, it needs to be retrieved - * again once the page is sorted. - */ - hcp->seek_found_indx = NDX_INVALID; - hcp->seek_found_page = PGNO_INVALID; - - return (__ham_sort_page( - dbp, dbc->txn, &hcp->split_buf, page)); -} - -/* - * PUBLIC: int __ham_sort_page __P((DB *, DB_TXN *, PAGE **, PAGE *)); - * - * Convert a page from P_HASH_UNSORTED into the sorted format P_HASH. - * - * All locking and logging is carried out be the caller. A user buffer can - * optionally be passed in to save allocating a page size buffer for sorting. - * This is allows callers to re-use the buffer pre-allocated for page splits - * in the hash cursor. The buffer is optional since no cursor exists when in - * the recovery or upgrade code paths. - */ -int -__ham_sort_page(dbp, txn, tmp_buf, page) - DB *dbp; - DB_TXN *txn; - PAGE **tmp_buf; - PAGE *page; -{ - PAGE *temp_pagep; - db_indx_t i; - int ret; - - DB_ASSERT(dbp->dbenv, page->type == P_HASH_UNSORTED); - - ret = 0; - if (tmp_buf != NULL) - temp_pagep = *tmp_buf; - else if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &temp_pagep)) != 0) - return (ret); - - memcpy(temp_pagep, page, dbp->pgsize); - - /* Re-initialize the page. */ - P_INIT(page, dbp->pgsize, - page->pgno, page->prev_pgno, page->next_pgno, 0, P_HASH); - - for (i = 0; i < NUM_ENT(temp_pagep); i += 2) - if ((ret = - __ham_copypair(dbp, txn, temp_pagep, i, page, NULL)) != 0) - break; - - if (tmp_buf == NULL) - __os_free(dbp->dbenv, temp_pagep); - - return (ret); -} - -/* - * PUBLIC: int __ham_del_pair __P((DBC *, int)); - */ -int -__ham_del_pair(dbc, flags) - DBC *dbc; - int flags; -{ - DB *dbp; - DBT data_dbt, key_dbt; - DB_LSN new_lsn, *n_lsn, tmp_lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - PAGE *n_pagep, *nn_pagep, *p, *p_pagep; - db_ham_mode op; - db_indx_t ndx; - db_pgno_t chg_pgno, pgno, tmp_pgno; - u_int32_t order; - int ret, t_ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - n_pagep = p_pagep = nn_pagep = NULL; - ndx = hcp->indx; - - if (hcp->page == NULL && - (ret = __memp_fget(mpf, &hcp->pgno, dbc->txn, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &hcp->page)) != 0) - return (ret); - p = hcp->page; - - /* - * We optimize for the normal case which is when neither the key nor - * the data are large. In this case, we write a single log record - * and do the delete. If either is large, we'll call __big_delete - * to remove the big item and then update the page to remove the - * entry referring to the big item. - */ - if (HPAGE_PTYPE(H_PAIRKEY(dbp, p, ndx)) == H_OFFPAGE) { - memcpy(&pgno, HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_KEYINDEX(ndx))), - sizeof(db_pgno_t)); - ret = __db_doff(dbc, pgno); - } else - ret = 0; - - if (ret == 0) - switch (HPAGE_PTYPE(H_PAIRDATA(dbp, p, ndx))) { - case H_OFFPAGE: - memcpy(&pgno, - HOFFPAGE_PGNO(P_ENTRY(dbp, p, H_DATAINDEX(ndx))), - sizeof(db_pgno_t)); - ret = __db_doff(dbc, pgno); - break; - case H_OFFDUP: - case H_DUPLICATE: - /* - * If we delete a pair that is/was a duplicate, then - * we had better clear the flag so that we update the - * cursor appropriately. - */ - F_CLR(hcp, H_ISDUP); - break; - default: - /* No-op */ - break; - } - - if (ret) - return (ret); - - /* Now log the delete off this page. */ - if (DBC_LOGGING(dbc)) { - key_dbt.data = P_ENTRY(dbp, p, H_KEYINDEX(ndx)); - key_dbt.size = LEN_HITEM(dbp, p, dbp->pgsize, H_KEYINDEX(ndx)); - data_dbt.data = P_ENTRY(dbp, p, H_DATAINDEX(ndx)); - data_dbt.size = - LEN_HITEM(dbp, p, dbp->pgsize, H_DATAINDEX(ndx)); - - if ((ret = __ham_insdel_log(dbp, - dbc->txn, &new_lsn, 0, DELPAIR, PGNO(p), (u_int32_t)ndx, - &LSN(p), &key_dbt, &data_dbt)) != 0) - return (ret); - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(p) = new_lsn; - /* Do the delete. */ - __ham_dpair(dbp, p, ndx); - - /* - * Mark item deleted so that we don't try to return it, and - * so that we update the cursor correctly on the next call - * to next. - */ - F_SET(hcp, H_DELETED); - F_CLR(hcp, H_OK); - - /* - * If we are locking, we will not maintain this, because it is - * a hot spot. - * - * XXX - * Perhaps we can retain incremental numbers and apply them later. - */ - if (!STD_LOCKING(dbc)) { - if ((ret = __ham_dirty_meta(dbc, 0)) != 0) - return (ret); - --hcp->hdr->nelem; - } - - /* The HAM_DEL_NO_CURSOR flag implies HAM_DEL_NO_RECLAIM. */ - if (LF_ISSET(HAM_DEL_NO_CURSOR)) - return (0); - /* - * Update cursors that are on the page where the delete happened. - */ - if ((ret = __hamc_update(dbc, 0, DB_HAM_CURADJ_DEL, 0)) != 0) - return (ret); - - /* - * If we need to reclaim the page, then check if the page is empty. - * There are two cases. If it's empty and it's not the first page - * in the bucket (i.e., the bucket page) then we can simply remove - * it. If it is the first chain in the bucket, then we need to copy - * the second page into it and remove the second page. - * If its the only page in the bucket we leave it alone. - */ - if (LF_ISSET(HAM_DEL_NO_RECLAIM) || - NUM_ENT(p) != 0 || - (PREV_PGNO(p) == PGNO_INVALID && NEXT_PGNO(p) == PGNO_INVALID)) - return (0); - - if (PREV_PGNO(p) == PGNO_INVALID) { - /* - * First page in chain is empty and we know that there - * are more pages in the chain. - */ - if ((ret = __memp_fget(mpf, &NEXT_PGNO(p), dbc->txn, - DB_MPOOL_DIRTY, &n_pagep)) != 0) - return (ret); - - if (NEXT_PGNO(n_pagep) != PGNO_INVALID && - (ret = __memp_fget(mpf, &NEXT_PGNO(n_pagep), dbc->txn, - DB_MPOOL_DIRTY, &nn_pagep)) != 0) - goto err; - - if (DBC_LOGGING(dbc)) { - key_dbt.data = n_pagep; - key_dbt.size = dbp->pgsize; - if ((ret = __ham_copypage_log(dbp, - dbc->txn, &new_lsn, 0, PGNO(p), - &LSN(p), PGNO(n_pagep), &LSN(n_pagep), - NEXT_PGNO(n_pagep), - nn_pagep == NULL ? NULL : &LSN(nn_pagep), - &key_dbt)) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(p) = new_lsn; /* Structure assignment. */ - LSN(n_pagep) = new_lsn; - if (NEXT_PGNO(n_pagep) != PGNO_INVALID) - LSN(nn_pagep) = new_lsn; - - if (nn_pagep != NULL) { - PREV_PGNO(nn_pagep) = PGNO(p); - if ((ret = __memp_fput(mpf, - nn_pagep, dbc->priority)) != 0) { - nn_pagep = NULL; - goto err; - } - } - - tmp_pgno = PGNO(p); - tmp_lsn = LSN(p); - memcpy(p, n_pagep, dbp->pgsize); - PGNO(p) = tmp_pgno; - LSN(p) = tmp_lsn; - PREV_PGNO(p) = PGNO_INVALID; - - /* - * Update cursors to reflect the fact that records - * on the second page have moved to the first page. - */ - if ((ret = __hamc_delpg(dbc, PGNO(n_pagep), - PGNO(p), 0, DB_HAM_DELFIRSTPG, &order)) != 0) - goto err; - - /* - * Update the cursor to reflect its new position. - */ - hcp->indx = 0; - hcp->pgno = PGNO(p); - hcp->order += order; - - if ((ret = __db_free(dbc, n_pagep)) != 0) { - n_pagep = NULL; - goto err; - } - } else { - if ((ret = __memp_fget(mpf, &PREV_PGNO(p), dbc->txn, - DB_MPOOL_DIRTY, &p_pagep)) != 0) - goto err; - - if (NEXT_PGNO(p) != PGNO_INVALID) { - if ((ret = __memp_fget(mpf, &NEXT_PGNO(p), dbc->txn, - DB_MPOOL_DIRTY, &n_pagep)) != 0) - goto err; - n_lsn = &LSN(n_pagep); - } else { - n_pagep = NULL; - n_lsn = NULL; - } - - NEXT_PGNO(p_pagep) = NEXT_PGNO(p); - if (n_pagep != NULL) - PREV_PGNO(n_pagep) = PGNO(p_pagep); - - if (DBC_LOGGING(dbc)) { - if ((ret = __ham_newpage_log(dbp, dbc->txn, - &new_lsn, 0, DELOVFL, PREV_PGNO(p), &LSN(p_pagep), - PGNO(p), &LSN(p), NEXT_PGNO(p), n_lsn)) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(p_pagep) = new_lsn; /* Structure assignment. */ - if (n_pagep) - LSN(n_pagep) = new_lsn; - LSN(p) = new_lsn; - - if (NEXT_PGNO(p) == PGNO_INVALID) { - /* - * There is no next page; put the cursor on the - * previous page as if we'd deleted the last item - * on that page, with index after the last valid - * entry. - * - * The deleted flag was set up above. - */ - hcp->pgno = PGNO(p_pagep); - hcp->indx = NUM_ENT(p_pagep); - op = DB_HAM_DELLASTPG; - } else { - /* - * There is a next page, so put the cursor at - * the beginning of it. - */ - hcp->pgno = NEXT_PGNO(p); - hcp->indx = 0; - op = DB_HAM_DELMIDPG; - } - - /* - * Since we are about to delete the cursor page and we have - * just moved the cursor, we need to make sure that the - * old page pointer isn't left hanging around in the cursor. - */ - hcp->page = NULL; - chg_pgno = PGNO(p); - ret = __db_free(dbc, p); - if ((t_ret = __memp_fput(mpf, - p_pagep, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if (n_pagep != NULL && (t_ret = __memp_fput(mpf, - n_pagep, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if (ret != 0) - return (ret); - if ((ret = __hamc_delpg(dbc, - chg_pgno, hcp->pgno, hcp->indx, op, &order)) != 0) - return (ret); - hcp->order += order; - } - return (ret); - -err: /* Clean up any pages. */ - if (n_pagep != NULL) - (void)__memp_fput(mpf, n_pagep, dbc->priority); - if (nn_pagep != NULL) - (void)__memp_fput(mpf, nn_pagep, dbc->priority); - if (p_pagep != NULL) - (void)__memp_fput(mpf, p_pagep, dbc->priority); - return (ret); -} - -/* - * __ham_replpair -- - * Given the key data indicated by the cursor, replace part/all of it - * according to the fields in the dbt. - * - * PUBLIC: int __ham_replpair __P((DBC *, DBT *, u_int32_t)); - */ -int -__ham_replpair(dbc, dbt, make_dup) - DBC *dbc; - DBT *dbt; - u_int32_t make_dup; -{ - DB *dbp; - DBC **carray, *dbc_n; - DBT old_dbt, tdata, tmp, *new_dbt; - DB_ENV *dbenv; - DB_LSN new_lsn; - HASH_CURSOR *hcp, *cp; - db_indx_t orig_indx; - db_pgno_t orig_pgno; - u_int32_t change; - u_int32_t dup_flag, len, memsize, newlen; - int beyond_eor, is_big, is_plus, ret, type, i, found, t_ret; - u_int8_t *beg, *dest, *end, *hk, *src; - void *memp; - - /* - * Items that were already offpage (ISBIG) were handled before - * we get in here. So, we need only handle cases where the old - * key is on a regular page. That leaves us 6 cases: - * 1. Original data onpage; new data is smaller - * 2. Original data onpage; new data is the same size - * 3. Original data onpage; new data is bigger, but not ISBIG, - * fits on page - * 4. Original data onpage; new data is bigger, but not ISBIG, - * does not fit on page - * 5. Original data onpage; New data is an off-page item. - * 6. Original data was offpage; new item is smaller. - * - * Cases 1-3 are essentially the same (and should be the common case). - * We handle 4-6 as delete and add. - */ - dbp = dbc->dbp; - dbenv = dbp->dbenv; - hcp = (HASH_CURSOR *)dbc->internal; - found = 0; - dbc_n = memp = NULL; - carray = NULL; - - /* - * We need to compute the number of bytes that we are adding or - * removing from the entry. Normally, we can simply substract - * the number of bytes we are replacing (dbt->dlen) from the - * number of bytes we are inserting (dbt->size). However, if - * we are doing a partial put off the end of a record, then this - * formula doesn't work, because we are essentially adding - * new bytes. - */ - if (dbt->size > dbt->dlen) { - change = dbt->size - dbt->dlen; - is_plus = 1; - } else { - change = dbt->dlen - dbt->size; - is_plus = 0; - } - - hk = H_PAIRDATA(dbp, hcp->page, hcp->indx); - is_big = HPAGE_PTYPE(hk) == H_OFFPAGE; - - if (is_big) - memcpy(&len, HOFFPAGE_TLEN(hk), sizeof(u_int32_t)); - else - len = LEN_HKEYDATA(dbp, hcp->page, - dbp->pgsize, H_DATAINDEX(hcp->indx)); - - beyond_eor = dbt->doff + dbt->dlen > len; - if (beyond_eor) { - /* - * The change is beyond the end of record. If change - * is a positive number, we can simply add the extension - * to it. However, if change is negative, then we need - * to figure out if the extension is larger than the - * negative change. - */ - if (is_plus) - change += dbt->doff + dbt->dlen - len; - else if (dbt->doff + dbt->dlen - len > change) { - /* Extension bigger than change */ - is_plus = 1; - change = (dbt->doff + dbt->dlen - len) - change; - } else /* Extension is smaller than change. */ - change -= (dbt->doff + dbt->dlen - len); - } - - newlen = (is_plus ? len + change : len - change); - if (ISBIG(hcp, newlen) || - (is_plus && change > P_FREESPACE(dbp, hcp->page)) || - beyond_eor || is_big) { - /* - * If we are in cases 4 or 5 then is_plus will be true. - * If we don't have a transaction then we cannot roll back, - * make sure there is enough room for the new page. - */ - if (is_plus && dbc->txn == NULL && - dbp->mpf->mfp->maxpgno != 0 && - dbp->mpf->mfp->maxpgno == dbp->mpf->mfp->last_pgno) - return (__db_space_err(dbp)); - /* - * Cases 4-6 -- two subcases. - * A. This is not really a partial operation, but an overwrite. - * Simple del and add works. - * B. This is a partial and we need to construct the data that - * we are really inserting (yuck). - * In both cases, we need to grab the key off the page (in - * some cases we could do this outside of this routine; for - * cleanliness we do it here. If you happen to be on a big - * key, this could be a performance hit). - */ - memset(&tmp, 0, sizeof(tmp)); - if ((ret = __db_ret(dbp, dbc->txn, - hcp->page, H_KEYINDEX(hcp->indx), &tmp, - &dbc->my_rkey.data, &dbc->my_rkey.ulen)) != 0) - return (ret); - - /* - * In cases 4-6, a delete and insert works, but we need to - * track and update any cursors pointing to the item being - * moved. - */ - orig_pgno = PGNO(hcp->page); - orig_indx = hcp->indx; - if ((ret = __ham_get_clist(dbp, - orig_pgno, orig_indx, &carray)) != 0) - goto err; - - /* Preserve duplicate info. */ - dup_flag = F_ISSET(hcp, H_ISDUP); - if (dbt->doff == 0 && dbt->dlen == len) { - type = (dup_flag ? H_DUPLICATE : H_KEYDATA); - new_dbt = dbt; - } else { /* Case B */ - type = HPAGE_PTYPE(hk) != H_OFFPAGE ? - HPAGE_PTYPE(hk) : H_KEYDATA; - memset(&tdata, 0, sizeof(tdata)); - memsize = 0; - if ((ret = __db_ret(dbp, dbc->txn, hcp->page, - H_DATAINDEX(hcp->indx), &tdata, &memp, &memsize)) - != 0) - goto err; - - /* Now shift old data around to make room for new. */ - if (is_plus) { - if ((ret = __os_realloc(dbenv, - tdata.size + change, &tdata.data)) != 0) - return (ret); - memp = tdata.data; - memsize = tdata.size + change; - memset((u_int8_t *)tdata.data + tdata.size, - 0, change); - } - end = (u_int8_t *)tdata.data + tdata.size; - - src = (u_int8_t *)tdata.data + dbt->doff + dbt->dlen; - if (src < end && tdata.size > dbt->doff + dbt->dlen) { - len = tdata.size - (dbt->doff + dbt->dlen); - if (is_plus) - dest = src + change; - else - dest = src - change; - memmove(dest, src, len); - } - memcpy((u_int8_t *)tdata.data + dbt->doff, - dbt->data, dbt->size); - if (is_plus) - tdata.size += change; - else - tdata.size -= change; - new_dbt = &tdata; - } - if ((ret = __ham_del_pair(dbc, HAM_DEL_NO_CURSOR)) != 0) - goto err; - /* - * Save the state of the cursor after the delete, so that we - * can adjust any cursors impacted by the delete. Don't just - * update the cursors now, to avoid ambiguity in reversing the - * adjustments during abort. - */ - if ((ret = __dbc_dup(dbc, &dbc_n, DB_POSITION)) != 0) - goto err; - if ((ret = __ham_add_el(dbc, &tmp, new_dbt, type)) != 0) - goto err; - F_SET(hcp, dup_flag); - - /* - * If the delete/insert pair caused the item to be moved - * to another location (which is possible for duplicate sets - * that are moved onto another page in the bucket), then update - * any impacted cursors. - */ - if (((HASH_CURSOR*)dbc_n->internal)->pgno != hcp->pgno || - ((HASH_CURSOR*)dbc_n->internal)->indx != hcp->indx) { - /* - * Set any cursors pointing to items in the moved - * duplicate set to the destination location and reset - * the deleted flag. This can't be done earlier, since - * the insert location is not computed until the actual - * __ham_add_el call is made. - */ - if (carray != NULL) { - for (i = 0; carray[i] != NULL; i++) { - cp = (HASH_CURSOR*)carray[i]->internal; - cp->pgno = hcp->pgno; - cp->indx = hcp->indx; - F_CLR(cp, H_DELETED); - found = 1; - } - /* - * Only log the update once, since the recovery - * code iterates through all open cursors and - * applies the change to all matching cursors. - */ - if (found && DBC_LOGGING(dbc) && - IS_SUBTRANSACTION(dbc->txn)) { - if ((ret = - __ham_chgpg_log(dbp, - dbc->txn, &new_lsn, 0, - DB_HAM_CHGPG, orig_pgno, hcp->pgno, - orig_indx, hcp->indx)) != 0) - goto err; - } - } - /* - * Update any cursors impacted by the delete. Do this - * after chgpg log so that recovery does not re-bump - * cursors pointing to the deleted item. - */ - ret = __hamc_update(dbc_n, 0, DB_HAM_CURADJ_DEL, 0); - } - -err: if (dbc_n != NULL && (t_ret = __dbc_close(dbc_n)) != 0 && - ret == 0) - ret = t_ret; - if (carray != NULL) - __os_free(dbenv, carray); - if (memp != NULL) - __os_free(dbenv, memp); - return (ret); - } - - /* - * Set up pointer into existing data. Do it before the log - * message so we can use it inside of the log setup. - */ - beg = HKEYDATA_DATA(H_PAIRDATA(dbp, hcp->page, hcp->indx)); - beg += dbt->doff; - - /* - * If we are going to have to move bytes at all, figure out - * all the parameters here. Then log the call before moving - * anything around. - */ - if (DBC_LOGGING(dbc)) { - old_dbt.data = beg; - old_dbt.size = dbt->dlen; - if ((ret = __ham_replace_log(dbp, - dbc->txn, &new_lsn, 0, PGNO(hcp->page), - (u_int32_t)H_DATAINDEX(hcp->indx), &LSN(hcp->page), - (int32_t)dbt->doff, &old_dbt, dbt, make_dup)) != 0) - return (ret); - } else - LSN_NOT_LOGGED(new_lsn); - - LSN(hcp->page) = new_lsn; /* Structure assignment. */ - - __ham_onpage_replace(dbp, hcp->page, (u_int32_t)H_DATAINDEX(hcp->indx), - (int32_t)dbt->doff, change, is_plus, dbt); - - return (0); -} - -/* - * Replace data on a page with new data, possibly growing or shrinking what's - * there. This is called on two different occasions. On one (from replpair) - * we are interested in changing only the data. On the other (from recovery) - * we are replacing the entire data (header and all) with a new element. In - * the latter case, the off argument is negative. - * pagep: the page that we're changing - * ndx: page index of the element that is growing/shrinking. - * off: Offset at which we are beginning the replacement. - * change: the number of bytes (+ or -) that the element is growing/shrinking. - * dbt: the new data that gets written at beg. - * - * PUBLIC: void __ham_onpage_replace __P((DB *, PAGE *, u_int32_t, - * PUBLIC: int32_t, u_int32_t, int, DBT *)); - */ -void -__ham_onpage_replace(dbp, pagep, ndx, off, change, is_plus, dbt) - DB *dbp; - PAGE *pagep; - u_int32_t ndx; - int32_t off; - u_int32_t change; - int is_plus; - DBT *dbt; -{ - db_indx_t i, *inp; - int32_t len; - size_t pgsize; - u_int8_t *src, *dest; - int zero_me; - - pgsize = dbp->pgsize; - inp = P_INP(dbp, pagep); - if (change != 0) { - zero_me = 0; - src = (u_int8_t *)(pagep) + HOFFSET(pagep); - if (off < 0) - len = inp[ndx] - HOFFSET(pagep); - else if ((u_int32_t)off >= - LEN_HKEYDATA(dbp, pagep, pgsize, ndx)) { - len = (int32_t)(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) - + LEN_HKEYDATA(dbp, pagep, pgsize, ndx) - src); - zero_me = 1; - } else - len = (int32_t)( - (HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off) - - src); - if (is_plus) - dest = src - change; - else - dest = src + change; - memmove(dest, src, (size_t)len); - if (zero_me) - memset(dest + len, 0, change); - - /* Now update the indices. */ - for (i = ndx; i < NUM_ENT(pagep); i++) { - if (is_plus) - inp[i] -= change; - else - inp[i] += change; - } - if (is_plus) - HOFFSET(pagep) -= change; - else - HOFFSET(pagep) += change; - } - if (off >= 0) - memcpy(HKEYDATA_DATA(P_ENTRY(dbp, pagep, ndx)) + off, - dbt->data, dbt->size); - else - memcpy(P_ENTRY(dbp, pagep, ndx), dbt->data, dbt->size); -} - -/* - * PUBLIC: int __ham_split_page __P((DBC *, u_int32_t, u_int32_t)); - */ -int -__ham_split_page(dbc, obucket, nbucket) - DBC *dbc; - u_int32_t obucket, nbucket; -{ - DB *dbp; - DBC **carray, *tmp_dbc; - DBT key, page_dbt; - DB_ENV *dbenv; - DB_LOCK block; - DB_LSN new_lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp, *cp; - PAGE **pp, *old_pagep, *temp_pagep, *new_pagep; - db_indx_t n, dest_indx; - db_pgno_t bucket_pgno, npgno, next_pgno; - u_int32_t big_len, len; - int found, i, ret, t_ret; - void *big_buf; - - dbp = dbc->dbp; - carray = NULL; - dbenv = dbp->dbenv; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - temp_pagep = old_pagep = new_pagep = NULL; - npgno = PGNO_INVALID; - LOCK_INIT(block); - - bucket_pgno = BUCKET_TO_PAGE(hcp, obucket); - if ((ret = __db_lget(dbc, - 0, bucket_pgno, DB_LOCK_WRITE, 0, &block)) != 0) - goto err; - if ((ret = __memp_fget(mpf, &bucket_pgno, dbc->txn, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &old_pagep)) != 0) - goto err; - - /* Sort any unsorted pages before doing a hash split. */ - if (old_pagep->type == P_HASH_UNSORTED) - if ((ret = __ham_sort_page_cursor(dbc, old_pagep)) != 0) - return (ret); - - /* Properly initialize the new bucket page. */ - npgno = BUCKET_TO_PAGE(hcp, nbucket); - if ((ret = __memp_fget(mpf, &npgno, dbc->txn, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &new_pagep)) != 0) - goto err; - P_INIT(new_pagep, - dbp->pgsize, npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - - temp_pagep = hcp->split_buf; - memcpy(temp_pagep, old_pagep, dbp->pgsize); - - if (DBC_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = old_pagep; - if ((ret = __ham_splitdata_log(dbp, - dbc->txn, &new_lsn, 0, SPLITOLD, - PGNO(old_pagep), &page_dbt, &LSN(old_pagep))) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - - LSN(old_pagep) = new_lsn; /* Structure assignment. */ - - P_INIT(old_pagep, dbp->pgsize, PGNO(old_pagep), PGNO_INVALID, - PGNO_INVALID, 0, P_HASH); - - big_len = 0; - big_buf = NULL; - memset(&key, 0, sizeof(key)); - while (temp_pagep != NULL) { - if ((ret = __ham_get_clist(dbp, - PGNO(temp_pagep), NDX_INVALID, &carray)) != 0) - goto err; - - for (n = 0; n < (db_indx_t)NUM_ENT(temp_pagep); n += 2) { - if ((ret = __db_ret(dbp, dbc->txn, temp_pagep, - H_KEYINDEX(n), &key, &big_buf, &big_len)) != 0) - goto err; - - if (__ham_call_hash(dbc, key.data, key.size) == obucket) - pp = &old_pagep; - else - pp = &new_pagep; - - /* - * Figure out how many bytes we need on the new - * page to store the key/data pair. - */ - len = LEN_HITEM(dbp, temp_pagep, dbp->pgsize, - H_DATAINDEX(n)) + - LEN_HITEM(dbp, temp_pagep, dbp->pgsize, - H_KEYINDEX(n)) + - 2 * sizeof(db_indx_t); - - if (P_FREESPACE(dbp, *pp) < len) { - if (DBC_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = *pp; - if ((ret = __ham_splitdata_log(dbp, - dbc->txn, &new_lsn, 0, - SPLITNEW, PGNO(*pp), &page_dbt, - &LSN(*pp))) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - LSN(*pp) = new_lsn; - if ((ret = - __ham_add_ovflpage(dbc, *pp, 1, pp)) != 0) - goto err; - } - - dest_indx = NDX_INVALID; - if ((ret = __ham_copypair(dbp, dbc->txn, temp_pagep, - H_KEYINDEX(n), *pp, &dest_indx)) != 0) - goto err; - - /* - * Update any cursors that were pointing to items - * shuffled because of this insert. - * Use __hamc_update, since the cursor adjustments are - * the same as those required for an insert. The - * overhead of creating a cursor is worthwhile to save - * replicating the adjustment functionality. - * Adjusting shuffled cursors needs to be done prior to - * adjusting any cursors that were pointing to the - * moved item. - * All pages in a bucket are sorted, but the items are - * not sorted across pages within a bucket. This means - * that splitting the first page in a bucket into two - * new buckets won't require any cursor shuffling, - * since all inserts will be appends. Splitting of the - * second etc page from the initial bucket could - * cause an item to be inserted at any location on a - * page (since items already inserted from page 1 of - * the initial bucket may overlap), so only adjust - * cursors for the second etc pages within a bucket. - */ - if (PGNO(temp_pagep) != bucket_pgno) { - if ((ret = __db_cursor( - dbp, dbc->txn, &tmp_dbc, 0)) != 0) - goto err; - hcp = (HASH_CURSOR*)tmp_dbc->internal; - hcp->pgno = PGNO(*pp); - hcp->indx = dest_indx; - hcp->dup_off = 0; - hcp->order = 0; - if ((ret = __hamc_update( - tmp_dbc, len, DB_HAM_CURADJ_ADD, 0)) != 0) - goto err; - if ((ret = __dbc_close(tmp_dbc)) != 0) - goto err; - } - /* Update any cursors pointing at the moved item. */ - if (carray != NULL) { - found = 0; - for (i = 0; carray[i] != NULL; i++) { - cp = - (HASH_CURSOR *)carray[i]->internal; - if (cp->pgno == PGNO(temp_pagep) && - cp->indx == n) { - cp->pgno = PGNO(*pp); - cp->indx = dest_indx; - found = 1; - } - } - /* - * Only log the update once, since the recovery - * code iterates through all open cursors and - * applies the change to all matching cursors. - */ - if (found && DBC_LOGGING(dbc) && - IS_SUBTRANSACTION(dbc->txn)) { - if ((ret = - __ham_chgpg_log(dbp, - dbc->txn, &new_lsn, 0, - DB_HAM_SPLIT, PGNO(temp_pagep), - PGNO(*pp), n, dest_indx)) != 0) - goto err; - } - } - } - next_pgno = NEXT_PGNO(temp_pagep); - - /* Clear temp_page; if it's a link overflow page, free it. */ - if (PGNO(temp_pagep) != bucket_pgno && (ret = - __db_free(dbc, temp_pagep)) != 0) { - temp_pagep = NULL; - goto err; - } - - if (next_pgno == PGNO_INVALID) - temp_pagep = NULL; - else if ((ret = __memp_fget(mpf, &next_pgno, dbc->txn, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &temp_pagep)) != 0) - goto err; - - if (temp_pagep != NULL) { - if (DBC_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = temp_pagep; - if ((ret = __ham_splitdata_log(dbp, - dbc->txn, &new_lsn, 0, - SPLITOLD, PGNO(temp_pagep), - &page_dbt, &LSN(temp_pagep))) != 0) - goto err; - } else - LSN_NOT_LOGGED(new_lsn); - LSN(temp_pagep) = new_lsn; - } - - if (carray != NULL) /* We never knew its size. */ - __os_free(dbenv, carray); - carray = NULL; - } - if (big_buf != NULL) - __os_free(dbenv, big_buf); - - /* - * If the original bucket spanned multiple pages, then we've got - * a pointer to a page that used to be on the bucket chain. It - * should be deleted. - */ - if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno && - (ret = __db_free(dbc, temp_pagep)) != 0) { - temp_pagep = NULL; - goto err; - } - - /* - * Write new buckets out. - */ - if (DBC_LOGGING(dbc)) { - page_dbt.size = dbp->pgsize; - page_dbt.data = old_pagep; - if ((ret = __ham_splitdata_log(dbp, dbc->txn, - &new_lsn, 0, SPLITNEW, PGNO(old_pagep), &page_dbt, - &LSN(old_pagep))) != 0) - goto err; - LSN(old_pagep) = new_lsn; - - page_dbt.data = new_pagep; - if ((ret = __ham_splitdata_log(dbp, dbc->txn, &new_lsn, 0, - SPLITNEW, PGNO(new_pagep), &page_dbt, - &LSN(new_pagep))) != 0) - goto err; - LSN(new_pagep) = new_lsn; - } else { - LSN_NOT_LOGGED(LSN(old_pagep)); - LSN_NOT_LOGGED(LSN(new_pagep)); - } - - ret = __memp_fput(mpf, old_pagep, dbc->priority); - if ((t_ret = - __memp_fput(mpf, new_pagep, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - - if (0) { -err: if (old_pagep != NULL) - (void)__memp_fput(mpf, old_pagep, dbc->priority); - if (new_pagep != NULL) { - P_INIT(new_pagep, dbp->pgsize, - npgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - (void)__memp_fput(mpf, new_pagep, dbc->priority); - } - if (temp_pagep != NULL && PGNO(temp_pagep) != bucket_pgno) - (void)__memp_fput(mpf, temp_pagep, dbc->priority); - } - if ((t_ret = __TLPUT(dbc, block)) != 0 && ret == 0) - ret = t_ret; - if (carray != NULL) /* We never knew its size. */ - __os_free(dbenv, carray); - return (ret); -} - -/* - * Add the given pair to the page. The page in question may already be - * held (i.e. it was already gotten). If it is, then the page is passed - * in via the pagep parameter. On return, pagep will contain the page - * to which we just added something. This allows us to link overflow - * pages and return the new page having correctly put the last page. - * - * PUBLIC: int __ham_add_el __P((DBC *, const DBT *, const DBT *, int)); - */ -int -__ham_add_el(dbc, key, val, type) - DBC *dbc; - const DBT *key, *val; - int type; -{ - const DBT *pkey, *pdata; - DB *dbp; - DBT key_dbt, data_dbt; - DB_LSN new_lsn; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - HOFFPAGE doff, koff; - db_pgno_t next_pgno, pgno; - u_int32_t data_size, key_size; - u_int32_t pages, pagespace, pairsize, rectype; - int do_expand, data_type, is_keybig, is_databig, key_type, match, ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - do_expand = 0; - - pgno = hcp->seek_found_page != PGNO_INVALID ? - hcp->seek_found_page : hcp->pgno; - if (hcp->page == NULL && (ret = __memp_fget(mpf, &pgno, dbc->txn, - DB_MPOOL_CREATE, &hcp->page)) != 0) - return (ret); - - key_size = HKEYDATA_PSIZE(key->size); - data_size = HKEYDATA_PSIZE(val->size); - is_keybig = ISBIG(hcp, key->size); - is_databig = ISBIG(hcp, val->size); - if (is_keybig) - key_size = HOFFPAGE_PSIZE; - if (is_databig) - data_size = HOFFPAGE_PSIZE; - - pairsize = key_size + data_size; - - /* Advance to first page in chain with room for item. */ - while (H_NUMPAIRS(hcp->page) && NEXT_PGNO(hcp->page) != PGNO_INVALID) { - /* - * This may not be the end of the chain, but the pair may fit - * anyway. Check if it's a bigpair that fits or a regular - * pair that fits. - */ - if (P_FREESPACE(dbp, hcp->page) >= pairsize) - break; - next_pgno = NEXT_PGNO(hcp->page); - if ((ret = __ham_next_cpage(dbc, next_pgno)) != 0) - return (ret); - } - - /* - * Check if we need to allocate a new page. - */ - if (P_FREESPACE(dbp, hcp->page) < pairsize) { - do_expand = 1; - if ((ret = __memp_dirty(mpf, - &hcp->page, dbc->txn, dbc->priority, 0)) != 0) - return (ret); - if ((ret = __ham_add_ovflpage(dbc, - (PAGE *)hcp->page, 1, (PAGE **)&hcp->page)) != 0) - return (ret); - hcp->pgno = PGNO(hcp->page); - } - - /* - * If we don't have a transaction then make sure we will not - * run out of file space before updating the key or data. - */ - if (dbc->txn == NULL && - dbp->mpf->mfp->maxpgno != 0 && (is_keybig || is_databig)) { - pagespace = P_MAXSPACE(dbp, dbp->pgsize); - pages = 0; - if (is_databig) - pages = ((data_size - 1) / pagespace) + 1; - if (is_keybig) { - pages += ((key->size - 1) / pagespace) + 1; - if (pages > - (dbp->mpf->mfp->maxpgno - dbp->mpf->mfp->last_pgno)) - return (__db_space_err(dbp)); - } - } - - if ((ret = __memp_dirty(mpf, - &hcp->page, dbc->txn, dbc->priority, 0)) != 0) - return (ret); - - /* - * Update cursor. - */ - hcp->indx = hcp->seek_found_indx; - F_CLR(hcp, H_DELETED); - if (is_keybig) { - koff.type = H_OFFPAGE; - UMRW_SET(koff.unused[0]); - UMRW_SET(koff.unused[1]); - UMRW_SET(koff.unused[2]); - if ((ret = __db_poff(dbc, key, &koff.pgno)) != 0) - return (ret); - koff.tlen = key->size; - key_dbt.data = &koff; - key_dbt.size = sizeof(koff); - pkey = &key_dbt; - key_type = H_OFFPAGE; - } else { - pkey = key; - key_type = H_KEYDATA; - } - - if (is_databig) { - doff.type = H_OFFPAGE; - UMRW_SET(doff.unused[0]); - UMRW_SET(doff.unused[1]); - UMRW_SET(doff.unused[2]); - if ((ret = __db_poff(dbc, val, &doff.pgno)) != 0) - return (ret); - doff.tlen = val->size; - data_dbt.data = &doff; - data_dbt.size = sizeof(doff); - pdata = &data_dbt; - data_type = H_OFFPAGE; - } else { - pdata = val; - data_type = type; - } - - /* Sort any unsorted pages before doing the insert. */ - if (((PAGE *)hcp->page)->type == P_HASH_UNSORTED) - if ((ret = __ham_sort_page_cursor(dbc, hcp->page)) != 0) - return (ret); - - /* - * If inserting on the page found initially, then use the saved index. - * If inserting on a different page resolve the index now so it can be - * logged. - * The page might be different, if P_FREESPACE constraint failed (due - * to a partial put that increases the data size). - */ - if (PGNO(hcp->page) != hcp->seek_found_page) { - if ((ret = __ham_getindex(dbp, dbc->txn, hcp->page, pkey, - key_type, &match, &hcp->seek_found_indx)) != 0) - return (ret); - hcp->seek_found_page = PGNO(hcp->page); - - DB_ASSERT(dbp->dbenv, hcp->seek_found_indx <= NUM_ENT(hcp->page)); - } - - if (DBC_LOGGING(dbc)) { - rectype = PUTPAIR; - if (is_databig) - rectype |= PAIR_DATAMASK; - if (is_keybig) - rectype |= PAIR_KEYMASK; - if (type == H_DUPLICATE) - rectype |= PAIR_DUPMASK; - - if ((ret = __ham_insdel_log(dbp, dbc->txn, &new_lsn, 0, - rectype, PGNO(hcp->page), (u_int32_t)hcp->seek_found_indx, - &LSN(hcp->page), pkey, pdata)) != 0) - return (ret); - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(hcp->page) = new_lsn; /* Structure assignment. */ - - if ((ret = __ham_insertpair(dbp, dbc->txn, hcp->page, - &hcp->seek_found_indx, pkey, pdata, key_type, data_type)) != 0) - return (ret); - - /* - * Adjust any cursors that were pointing at items whose indices were - * shuffled due to the insert. - */ - if ((ret = __hamc_update(dbc, pairsize, DB_HAM_CURADJ_ADD, 0)) != 0) - return (ret); - - /* - * For splits, we are going to update item_info's page number - * field, so that we can easily return to the same page the - * next time we come in here. For other operations, this doesn't - * matter, since this is the last thing that happens before we return - * to the user program. - */ - hcp->pgno = PGNO(hcp->page); - /* - * When moving an item from one page in a bucket to another, due to an - * expanding on page duplicate set, or a partial put that increases the - * size of an item. The destination index needs to be saved so that the - * __ham_replpair code can update any cursors impacted by the move. For - * other operations, this does not matter, since this is the last thing - * that happens before we return to the user program. - */ - hcp->indx = hcp->seek_found_indx; - - /* - * XXX - * Maybe keep incremental numbers here. - */ - if (!STD_LOCKING(dbc)) { - if ((ret = __ham_dirty_meta(dbc, 0)) != 0) - return (ret); - hcp->hdr->nelem++; - } - - if (do_expand || (hcp->hdr->ffactor != 0 && - (u_int32_t)H_NUMPAIRS(hcp->page) > hcp->hdr->ffactor)) - F_SET(hcp, H_EXPAND); - return (0); -} - -/* - * Special insert pair call -- copies a key/data pair from one page to - * another. Works for all types of hash entries (H_OFFPAGE, H_KEYDATA, - * H_DUPLICATE, H_OFFDUP). Since we log splits at a high level, we - * do not need to do any logging here. - * - * dest_indx is an optional parameter, it serves several purposes: - * * ignored if NULL - * * Used as an insert index if non-null and not NDX_INVALID - * * Populated with the insert index if non-null and NDX_INVALID - * - * PUBLIC: int __ham_copypair __P((DB *, DB_TXN *, PAGE *, u_int32_t, - * PUBLIC: PAGE *, db_indx_t *)); - */ -int -__ham_copypair(dbp, txn, src_page, src_ndx, dest_page, dest_indx) - DB *dbp; - DB_TXN *txn; - PAGE *src_page; - u_int32_t src_ndx; - PAGE *dest_page; - db_indx_t *dest_indx; -{ - DBT tkey, tdata; - db_indx_t kindx, dindx; - int ktype, dtype, ret; - - ret = 0; - memset(&tkey, 0, sizeof(tkey)); - memset(&tdata, 0, sizeof(tdata)); - - ktype = HPAGE_TYPE(dbp, src_page, H_KEYINDEX(src_ndx)); - dtype = HPAGE_TYPE(dbp, src_page, H_DATAINDEX(src_ndx)); - kindx = H_KEYINDEX(src_ndx); - dindx = H_DATAINDEX(src_ndx); - if (ktype == H_OFFPAGE) { - tkey.data = P_ENTRY(dbp, src_page, kindx); - tkey.size = LEN_HITEM(dbp, src_page, dbp->pgsize, kindx); - } else { - tkey.data = HKEYDATA_DATA(P_ENTRY(dbp, src_page, kindx)); - tkey.size = LEN_HKEYDATA(dbp, src_page, dbp->pgsize, kindx); - } - if (dtype == H_OFFPAGE) { - tdata.data = P_ENTRY(dbp, src_page, dindx); - tdata.size = LEN_HITEM(dbp, src_page, dbp->pgsize, dindx); - } else { - tdata.data = HKEYDATA_DATA(P_ENTRY(dbp, src_page, dindx)); - tdata.size = LEN_HKEYDATA(dbp, src_page, dbp->pgsize, dindx); - } - if ((ret = __ham_insertpair(dbp, txn, dest_page, dest_indx, - &tkey, &tdata, ktype, dtype)) != 0) - return (ret); - - return (ret); -} - -/* - * __ham_add_ovflpage -- - * - * Returns: - * 0 on success: pp points to new page; !0 on error, pp not valid. - * - * PUBLIC: int __ham_add_ovflpage __P((DBC *, PAGE *, int, PAGE **)); - */ -int -__ham_add_ovflpage(dbc, pagep, release, pp) - DBC *dbc; - PAGE *pagep; - int release; - PAGE **pp; -{ - DB *dbp; - DB_LSN new_lsn; - DB_MPOOLFILE *mpf; - PAGE *new_pagep; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - - DB_ASSERT(dbp->dbenv, IS_DIRTY(pagep)); - - if ((ret = __db_new(dbc, P_HASH, &new_pagep)) != 0) - return (ret); - - if (DBC_LOGGING(dbc)) { - if ((ret = __ham_newpage_log(dbp, dbc->txn, &new_lsn, 0, - PUTOVFL, PGNO(pagep), &LSN(pagep), PGNO(new_pagep), - &LSN(new_pagep), PGNO_INVALID, NULL)) != 0) { - (void)__memp_fput(mpf, pagep, dbc->priority); - return (ret); - } - } else - LSN_NOT_LOGGED(new_lsn); - - /* Move lsn onto page. */ - LSN(pagep) = LSN(new_pagep) = new_lsn; - NEXT_PGNO(pagep) = PGNO(new_pagep); - - PREV_PGNO(new_pagep) = PGNO(pagep); - - if (release) - ret = __memp_fput(mpf, pagep, dbc->priority); - - *pp = new_pagep; - return (ret); -} - -/* - * PUBLIC: int __ham_get_cpage __P((DBC *, db_lockmode_t)); - */ -int -__ham_get_cpage(dbc, mode) - DBC *dbc; - db_lockmode_t mode; -{ - DB *dbp; - DB_LOCK tmp_lock; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - ret = 0; - - /* - * There are four cases with respect to buckets and locks. - * 1. If there is no lock held, then if we are locking, we should - * get the lock. - * 2. If there is a lock held, it's for the current bucket, and it's - * for the right mode, we don't need to do anything. - * 3. If there is a lock held for the current bucket but it's not - * strong enough, we need to upgrade. - * 4. If there is a lock, but it's for a different bucket, then we need - * to release the existing lock and get a new lock. - */ - LOCK_INIT(tmp_lock); - if (STD_LOCKING(dbc)) { - if (hcp->lbucket != hcp->bucket) { /* Case 4 */ - if ((ret = __TLPUT(dbc, hcp->lock)) != 0) - return (ret); - LOCK_INIT(hcp->lock); - } - - /* - * See if we have the right lock. If we are doing - * dirty reads we assume the write lock has been downgraded. - */ - if ((LOCK_ISSET(hcp->lock) && - ((hcp->lock_mode == DB_LOCK_READ || - F_ISSET(dbp, DB_AM_READ_UNCOMMITTED)) && - mode == DB_LOCK_WRITE))) { - /* Case 3. */ - tmp_lock = hcp->lock; - LOCK_INIT(hcp->lock); - } - - /* Acquire the lock. */ - if (!LOCK_ISSET(hcp->lock)) - /* Cases 1, 3, and 4. */ - if ((ret = __ham_lock_bucket(dbc, mode)) != 0) - return (ret); - - if (ret == 0) { - hcp->lock_mode = mode; - hcp->lbucket = hcp->bucket; - /* Case 3: release the original lock. */ - if ((ret = __ENV_LPUT(dbp->dbenv, tmp_lock)) != 0) - return (ret); - } else if (LOCK_ISSET(tmp_lock)) - hcp->lock = tmp_lock; - } - - if (ret == 0 && hcp->page == NULL) { - if (hcp->pgno == PGNO_INVALID) - hcp->pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - if ((ret = __memp_fget(mpf, &hcp->pgno, dbc->txn, - DB_MPOOL_CREATE, &hcp->page)) != 0) - return (ret); - } - return (0); -} - -/* - * Get a new page at the cursor, putting the last page if necessary. - * If the flag is set to H_ISDUP, then we are talking about the - * duplicate page, not the main page. - * - * PUBLIC: int __ham_next_cpage __P((DBC *, db_pgno_t)); - */ -int -__ham_next_cpage(dbc, pgno) - DBC *dbc; - db_pgno_t pgno; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - PAGE *p; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - - if (hcp->page != NULL && - (ret = __memp_fput(mpf, hcp->page, dbc->priority)) != 0) - return (ret); - hcp->page = NULL; - - if ((ret = __memp_fget(mpf, &pgno, dbc->txn, - DB_MPOOL_CREATE, &p)) != 0) - return (ret); - - hcp->page = p; - hcp->pgno = pgno; - hcp->indx = 0; - - return (0); -} - -/* - * __ham_lock_bucket -- - * Get the lock on a particular bucket. - * - * PUBLIC: int __ham_lock_bucket __P((DBC *, db_lockmode_t)); - */ -int -__ham_lock_bucket(dbc, mode) - DBC *dbc; - db_lockmode_t mode; -{ - HASH_CURSOR *hcp; - db_pgno_t pgno; - int gotmeta, ret; - - hcp = (HASH_CURSOR *)dbc->internal; - gotmeta = hcp->hdr == NULL ? 1 : 0; - if (gotmeta) - if ((ret = __ham_get_meta(dbc)) != 0) - return (ret); - pgno = BUCKET_TO_PAGE(hcp, hcp->bucket); - if (gotmeta) - if ((ret = __ham_release_meta(dbc)) != 0) - return (ret); - - ret = __db_lget(dbc, 0, pgno, mode, 0, &hcp->lock); - - hcp->lock_mode = mode; - return (ret); -} - -/* - * __ham_dpair -- - * Delete a pair on a page, paying no attention to what the pair - * represents. The caller is responsible for freeing up duplicates - * or offpage entries that might be referenced by this pair. - * - * Recovery assumes that this may be called without the metadata - * page pinned. - * - * PUBLIC: void __ham_dpair __P((DB *, PAGE *, u_int32_t)); - */ -void -__ham_dpair(dbp, p, indx) - DB *dbp; - PAGE *p; - u_int32_t indx; -{ - db_indx_t delta, n, *inp; - u_int8_t *dest, *src; - - inp = P_INP(dbp, p); - /* - * Compute "delta", the amount we have to shift all of the - * offsets. To find the delta, we just need to calculate - * the size of the pair of elements we are removing. - */ - delta = H_PAIRSIZE(dbp, p, dbp->pgsize, indx); - - /* - * The hard case: we want to remove something other than - * the last item on the page. We need to shift data and - * offsets down. - */ - if ((db_indx_t)indx != NUM_ENT(p) - 2) { - /* - * Move the data: src is the first occupied byte on - * the page. (Length is delta.) - */ - src = (u_int8_t *)p + HOFFSET(p); - - /* - * Destination is delta bytes beyond src. This might - * be an overlapping copy, so we have to use memmove. - */ - dest = src + delta; - memmove(dest, src, inp[H_DATAINDEX(indx)] - HOFFSET(p)); - } - - /* Adjust page metadata. */ - HOFFSET(p) = HOFFSET(p) + delta; - NUM_ENT(p) = NUM_ENT(p) - 2; - - /* Adjust the offsets. */ - for (n = (db_indx_t)indx; n < (db_indx_t)(NUM_ENT(p)); n++) - inp[n] = inp[n + 2] + delta; - -} - -/* - * __hamc_delpg -- - * - * Adjust the cursors after we've emptied a page in a bucket, taking - * care that when we move cursors pointing to deleted items, their - * orders don't collide with the orders of cursors on the page we move - * them to (since after this function is called, cursors with the same - * index on the two pages will be otherwise indistinguishable--they'll - * all have pgno new_pgno). There are three cases: - * - * 1) The emptied page is the first page in the bucket. In this - * case, we've copied all the items from the second page into the - * first page, so the first page is new_pgno and the second page is - * old_pgno. new_pgno is empty, but can have deleted cursors - * pointing at indx 0, so we need to be careful of the orders - * there. This is DB_HAM_DELFIRSTPG. - * - * 2) The page is somewhere in the middle of a bucket. Our caller - * can just delete such a page, so it's old_pgno. old_pgno is - * empty, but may have deleted cursors pointing at indx 0, so we - * need to be careful of indx 0 when we move those cursors to - * new_pgno. This is DB_HAM_DELMIDPG. - * - * 3) The page is the last in a bucket. Again the empty page is - * old_pgno, and again it should only have cursors that are deleted - * and at indx == 0. This time, though, there's no next page to - * move them to, so we set them to indx == num_ent on the previous - * page--and indx == num_ent is the index whose cursors we need to - * be careful of. This is DB_HAM_DELLASTPG. - */ -static int -__hamc_delpg(dbc, old_pgno, new_pgno, num_ent, op, orderp) - DBC *dbc; - db_pgno_t old_pgno, new_pgno; - u_int32_t num_ent; - db_ham_mode op; - u_int32_t *orderp; -{ - DB *dbp, *ldbp; - DBC *cp; - DB_ENV *dbenv; - DB_LSN lsn; - DB_TXN *my_txn; - HASH_CURSOR *hcp; - db_indx_t indx; - u_int32_t order; - int found, ret; - - /* Which is the worrisome index? */ - indx = (op == DB_HAM_DELLASTPG) ? num_ent : 0; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - - my_txn = IS_SUBTRANSACTION(dbc->txn) ? dbc->txn : NULL; - MUTEX_LOCK(dbenv, dbenv->mtx_dblist); - /* - * Find the highest order of any cursor our movement - * may collide with. - */ - FIND_FIRST_DB_MATCH(dbenv, dbp, ldbp); - for (order = 1; - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = TAILQ_NEXT(ldbp, dblistlinks)) { - MUTEX_LOCK(dbenv, dbp->mutex); - TAILQ_FOREACH(cp, &ldbp->active_queue, links) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - hcp = (HASH_CURSOR *)cp->internal; - if (hcp->pgno == new_pgno && - !MVCC_SKIP_CURADJ(cp, new_pgno)) { - if (hcp->indx == indx && - F_ISSET(hcp, H_DELETED) && - hcp->order >= order) - order = hcp->order + 1; - DB_ASSERT(dbenv, op != DB_HAM_DELFIRSTPG || - hcp->indx == NDX_INVALID || - (hcp->indx == 0 && - F_ISSET(hcp, H_DELETED))); - } - } - MUTEX_UNLOCK(dbenv, dbp->mutex); - } - - FIND_FIRST_DB_MATCH(dbenv, dbp, ldbp); - for (found = 0; - ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = TAILQ_NEXT(ldbp, dblistlinks)) { - MUTEX_LOCK(dbenv, dbp->mutex); - TAILQ_FOREACH(cp, &ldbp->active_queue, links) { - if (cp == dbc || cp->dbtype != DB_HASH) - continue; - - hcp = (HASH_CURSOR *)cp->internal; - - if (hcp->pgno == old_pgno && - !MVCC_SKIP_CURADJ(cp, old_pgno)) { - switch (op) { - case DB_HAM_DELFIRSTPG: - /* - * We're moving all items, - * regardless of index. - */ - hcp->pgno = new_pgno; - - /* - * But we have to be careful of - * the order values. - */ - if (hcp->indx == indx) - hcp->order += order; - break; - case DB_HAM_DELMIDPG: - hcp->pgno = new_pgno; - DB_ASSERT(dbenv, hcp->indx == 0 && - F_ISSET(hcp, H_DELETED)); - hcp->order += order; - break; - case DB_HAM_DELLASTPG: - hcp->pgno = new_pgno; - DB_ASSERT(dbenv, hcp->indx == 0 && - F_ISSET(hcp, H_DELETED)); - hcp->indx = indx; - hcp->order += order; - break; - default: - return (__db_unknown_path( - dbenv, "__hamc_delpg")); - } - if (my_txn != NULL && cp->txn != my_txn) - found = 1; - } - } - MUTEX_UNLOCK(dbenv, dbp->mutex); - } - MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist); - - if (found != 0 && DBC_LOGGING(dbc)) { - if ((ret = __ham_chgpg_log(dbp, my_txn, &lsn, 0, op, - old_pgno, new_pgno, indx, order)) != 0) - return (ret); - } - *orderp = order; - return (0); -} diff --git a/db/hash/hash_rec.c b/db/hash/hash_rec.c deleted file mode 100644 index fdfbb8086..000000000 --- a/db/hash/hash_rec.c +++ /dev/null @@ -1,1581 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1995, 1996 - * Margo Seltzer. All rights reserved. - */ -/* - * Copyright (c) 1995, 1996 - * The President and Fellows of Harvard University. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id: hash_rec.c,v 12.37 2007/07/04 11:19:01 alexg Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" - -static int __ham_alloc_pages __P((DB *, __ham_groupalloc_args *, DB_LSN *)); -static int __ham_alloc_pages_42 - __P((DB *, __ham_groupalloc_42_args *, DB_LSN *)); - -/* - * __ham_insdel_recover -- - * - * PUBLIC: int __ham_insdel_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_insdel_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_insdel_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_indx_t dindx; - u_int32_t opcode; - int cmp_n, cmp_p, dtype, ktype, ret; - - pagep = NULL; - - REC_PRINT(__ham_insdel_print); - REC_INTRO(__ham_insdel_read, 1, 0); - - if ((ret = __memp_fget(mpf, &argp->pgno, NULL, - 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - if (ret == DB_PAGE_NOTFOUND) - goto done; - else { - ret = __db_pgerr(file_dbp, argp->pgno, ret); - goto out; - } - } -#ifdef HAVE_FTRUNCATE - /* If the page is not here then it was later truncated. */ - if (!IS_ZERO_LSN(argp->pagelsn)) - goto done; -#endif - /* - * This page was created by a group allocation and - * the file may not have been extend yet. - * Create the page if necessary. - */ - if ((ret = __memp_fget(mpf, &argp->pgno, NULL, - DB_MPOOL_CREATE, &pagep)) != 0) { - ret = __db_pgerr(file_dbp, argp->pgno, ret); - goto out; - } - } - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn); - - /* - * Two possible things going on: - * redo a delete/undo a put: delete the item from the page. - * redo a put/undo a delete: add the item to the page. - * If we are undoing a delete, then the information logged is the - * entire entry off the page, not just the data of a dbt. In - * this case, we want to copy it back onto the page verbatim. - * We do this by calling __insertpair with the type H_OFFPAGE instead - * of H_KEYDATA. - */ - opcode = OPCODE_OF(argp->opcode); - if ((opcode == DELPAIR && cmp_n == 0 && DB_UNDO(op)) || - (opcode == PUTPAIR && cmp_p == 0 && DB_REDO(op))) { - /* - * Need to redo a PUT or undo a delete. - */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - ktype = DB_UNDO(op) || PAIR_ISKEYBIG(argp->opcode) ? - H_OFFPAGE : H_KEYDATA; - if (PAIR_ISDATADUP(argp->opcode)) - dtype = H_DUPLICATE; - else if (DB_UNDO(op) || PAIR_ISDATABIG(argp->opcode)) - dtype = H_OFFPAGE; - else - dtype = H_KEYDATA; - dindx = (db_indx_t)argp->ndx; - if ((ret = __ham_insertpair(file_dbp, NULL, pagep, &dindx, - &argp->key, &argp->data, ktype, dtype)) != 0) - goto out; - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; - } else if ((opcode == DELPAIR && cmp_p == 0 && DB_REDO(op)) || - (opcode == PUTPAIR && cmp_n == 0 && DB_UNDO(op))) { - /* Need to undo a put or redo a delete. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - __ham_dpair(file_dbp, pagep, argp->ndx); - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; - } - - if ((ret = __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - - /* Return the previous LSN. */ -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, pagep, file_dbp->priority); - REC_CLOSE; -} - -/* - * __ham_newpage_recover -- - * This log message is used when we add/remove overflow pages. This - * message takes care of the pointer chains, not the data on the pages. - * - * PUBLIC: int __ham_newpage_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_newpage_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_newpage_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int change, cmp_n, cmp_p, ret; - - pagep = NULL; - - REC_PRINT(__ham_newpage_print); - REC_INTRO(__ham_newpage_read, 1, 0); - - REC_FGET(mpf, argp->new_pgno, &pagep, ppage); - change = 0; - - /* - * There are potentially three pages we need to check: the one - * that we created/deleted, the one before it and the one after - * it. - */ - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn); - - if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) { - /* Redo a create new page or undo a delete new page. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, argp->new_pgno, - argp->prev_pgno, argp->next_pgno, 0, P_HASH); - change = 1; - } else if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DELOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { - /* - * Redo a delete or undo a create new page. All we - * really need to do is change the LSN. - */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - change = 1; - } - - if (change) - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; - - if ((ret = __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - - /* Now do the prev page. */ -ppage: if (argp->prev_pgno != PGNO_INVALID) { - REC_FGET(mpf, argp->prev_pgno, &pagep, npage); - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->prevlsn); - change = 0; - - if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) { - /* Redo a create new page or undo a delete new page. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - pagep->next_pgno = argp->new_pgno; - change = 1; - } else if ((cmp_p == 0 && - DB_REDO(op) && argp->opcode == DELOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { - /* Redo a delete or undo a create new page. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - pagep->next_pgno = argp->next_pgno; - change = 1; - } - - if (change) - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn; - - if ((ret = __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - } - - /* Now time to do the next page */ -npage: if (argp->next_pgno != PGNO_INVALID) { - REC_FGET(mpf, argp->next_pgno, &pagep, done); - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->nextlsn); - change = 0; - - if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == PUTOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DELOVFL)) { - /* Redo a create new page or undo a delete new page. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - pagep->prev_pgno = argp->new_pgno; - change = 1; - } else if ((cmp_p == 0 && - DB_REDO(op) && argp->opcode == DELOVFL) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == PUTOVFL)) { - /* Redo a delete or undo a create new page. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - pagep->prev_pgno = argp->prev_pgno; - change = 1; - } - - if (change) - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn; - - if ((ret = __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - } -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, pagep, file_dbp->priority); - REC_CLOSE; -} - -/* - * __ham_replace_recover -- - * This log message refers to partial puts that are local to a single - * page. You can think of them as special cases of the more general - * insdel log message. - * - * PUBLIC: int __ham_replace_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_replace_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_replace_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - DBT dbt; - PAGE *pagep; - u_int32_t change; - int cmp_n, cmp_p, is_plus, modified, ret; - u_int8_t *hk; - - pagep = NULL; - - REC_PRINT(__ham_replace_print); - REC_INTRO(__ham_replace_read, 1, 0); - - REC_FGET(mpf, argp->pgno, &pagep, done); - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn); - - memset(&dbt, 0, sizeof(dbt)); - modified = 0; - - /* - * Before we know the direction of the transformation we will - * determine the size differential; then once we know if we are - * redoing or undoing, we'll adjust the sign (is_plus) appropriately. - */ - if (argp->newitem.size > argp->olditem.size) { - change = argp->newitem.size - argp->olditem.size; - is_plus = 1; - } else { - change = argp->olditem.size - argp->newitem.size; - is_plus = 0; - } - if (cmp_p == 0 && DB_REDO(op)) { - /* Reapply the change as specified. */ - dbt.data = argp->newitem.data; - dbt.size = argp->newitem.size; - REC_DIRTY(mpf, file_dbp->priority, &pagep); - LSN(pagep) = *lsnp; - /* - * The is_plus flag is set properly to reflect - * newitem.size - olditem.size. - */ - modified = 1; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Undo the already applied change. */ - dbt.data = argp->olditem.data; - dbt.size = argp->olditem.size; - /* - * Invert is_plus to reflect sign of - * olditem.size - newitem.size. - */ - is_plus = !is_plus; - REC_DIRTY(mpf, file_dbp->priority, &pagep); - LSN(pagep) = argp->pagelsn; - modified = 1; - } - - if (modified) { - __ham_onpage_replace(file_dbp, pagep, - argp->ndx, argp->off, change, is_plus, &dbt); - if (argp->makedup) { - hk = P_ENTRY(file_dbp, pagep, argp->ndx); - if (DB_REDO(op)) - HPAGE_PTYPE(hk) = H_DUPLICATE; - else - HPAGE_PTYPE(hk) = H_KEYDATA; - } - } - - if ((ret = __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, pagep, file_dbp->priority); - REC_CLOSE; -} - -/* - * __ham_splitdata_recover -- - * - * PUBLIC: int __ham_splitdata_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_splitdata_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_splitdata_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, ret; - - pagep = NULL; - - REC_PRINT(__ham_splitdata_print); - REC_INTRO(__ham_splitdata_read, 1, 0); - - if ((ret = __memp_fget(mpf, &argp->pgno, NULL, - 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - if (ret == DB_PAGE_NOTFOUND) - goto done; - else { - ret = __db_pgerr(file_dbp, argp->pgno, ret); - goto out; - } - } -#ifdef HAVE_FTRUNCATE - /* If the page is not here then it was later truncated. */ - if (!IS_ZERO_LSN(argp->pagelsn)) - goto done; -#endif - /* - * This page was created by a group allocation and - * the file may not have been extend yet. - * Create the page if necessary. - */ - if ((ret = __memp_fget(mpf, &argp->pgno, - NULL, DB_MPOOL_CREATE, &pagep)) != 0) { - ret = __db_pgerr(file_dbp, argp->pgno, ret); - goto out; - } - } - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn); - - /* - * There are three types of log messages here. Two are related - * to an actual page split operation, one for the old page - * and one for the new pages created. The original image in the - * SPLITOLD record is used for undo. The image in the SPLITNEW - * is used for redo. We should never have a case where there is - * a redo operation and the SPLITOLD record is on disk, but not - * the SPLITNEW record. Therefore, we only have work to do when - * redo NEW messages and undo OLD messages, but we have to update - * LSNs in both cases. - * - * The third message is generated when a page is sorted (SORTPAGE). In - * an undo the original image in the SORTPAGE is used. In a redo we - * recreate the sort operation by calling __ham_sort_page. - */ - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, file_dbp->priority, &pagep); - if (argp->opcode == SPLITNEW) - /* Need to redo the split described. */ - memcpy(pagep, argp->pageimage.data, - argp->pageimage.size); - else if (argp->opcode == SORTPAGE) { - if ((ret = __ham_sort_page(file_dbp, - NULL, NULL, pagep)) != 0) - goto out; - } - LSN(pagep) = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - REC_DIRTY(mpf, file_dbp->priority, &pagep); - if (argp->opcode == SPLITOLD || argp->opcode == SORTPAGE) { - /* Put back the old image. */ - memcpy(pagep, argp->pageimage.data, - argp->pageimage.size); - } else - P_INIT(pagep, file_dbp->pgsize, argp->pgno, - PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - LSN(pagep) = argp->pagelsn; - } - if ((ret = __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, pagep, file_dbp->priority); - REC_CLOSE; -} - -/* - * __ham_copypage_recover -- - * Recovery function for copypage. - * - * PUBLIC: int __ham_copypage_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_copypage_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_copypage_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, ret; - - pagep = NULL; - - REC_PRINT(__ham_copypage_print); - REC_INTRO(__ham_copypage_read, 1, 0); - - /* This is the bucket page. */ - REC_FGET(mpf, argp->pgno, &pagep, donext); - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn); - - if (cmp_p == 0 && DB_REDO(op)) { - /* Need to redo update described. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - memcpy(pagep, argp->page.data, argp->page.size); - PGNO(pagep) = argp->pgno; - PREV_PGNO(pagep) = PGNO_INVALID; - LSN(pagep) = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, argp->pgno, PGNO_INVALID, - argp->next_pgno, 0, P_HASH); - LSN(pagep) = argp->pagelsn; - } - if ((ret = __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - -donext: /* Now fix up the "next" page. */ - REC_FGET(mpf, argp->next_pgno, &pagep, do_nn); - - /* For REDO just update the LSN. For UNDO copy page back. */ - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->nextlsn); - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, file_dbp->priority, &pagep); - LSN(pagep) = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - memcpy(pagep, argp->page.data, argp->page.size); - } - if ((ret = __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - - /* Now fix up the next's next page. */ -do_nn: if (argp->nnext_pgno == PGNO_INVALID) - goto done; - - REC_FGET(mpf, argp->nnext_pgno, &pagep, done); - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nnextlsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->nnextlsn); - - if (cmp_p == 0 && DB_REDO(op)) { - /* Need to redo update described. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - PREV_PGNO(pagep) = argp->pgno; - LSN(pagep) = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - PREV_PGNO(pagep) = argp->next_pgno; - LSN(pagep) = argp->nnextlsn; - } - if ((ret = __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, pagep, file_dbp->priority); - REC_CLOSE; -} - -/* - * __ham_metagroup_recover -- - * Recovery function for metagroup. - * - * PUBLIC: int __ham_metagroup_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_metagroup_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_metagroup_args *argp; - HASH_CURSOR *hcp; - DB *file_dbp; - DBMETA *mmeta; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - u_int32_t flags; - int cmp_n, cmp_p, did_alloc, groupgrow, ret; - - did_alloc = 0; - mmeta = NULL; - REC_PRINT(__ham_metagroup_print); - REC_INTRO(__ham_metagroup_read, 1, 1); - - /* - * This logs the virtual create of pages pgno to pgno + bucket - * If HAVE_FTRUNCATE is not supported the mpool page-allocation is not - * transaction protected, we can never undo it. Even in an abort, - * we have to allocate these pages to the hash table if they - * were actually created. In particular, during disaster - * recovery the metapage may be before this point if we - * are rolling backward. If the file has not been extended - * then the metapage could not have been updated. - * The log record contains: - * bucket: old maximum bucket - * pgno: page number of the new bucket. - * We round up on log calculations, so we can figure out if we are - * about to double the hash table if argp->bucket+1 is a power of 2. - * If it is, then we are allocating an entire doubling of pages, - * otherwise, we are simply allocated one new page. - */ - groupgrow = - (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1; - pgno = argp->pgno; - if (argp->newalloc) - pgno += argp->bucket; - - flags = 0; - pagep = NULL; -#ifndef HAVE_FTRUNCATE - LF_SET(DB_MPOOL_CREATE); -#endif - ret = __memp_fget(mpf, &pgno, NULL, flags, &pagep); - -#ifdef HAVE_FTRUNCATE - /* If we are undoing, then we don't want to create the page. */ - if (ret != 0 && DB_REDO(op)) - ret = __memp_fget(mpf, - &pgno, NULL, DB_MPOOL_CREATE, &pagep); - else if (ret == DB_PAGE_NOTFOUND) - goto do_meta; -#endif - if (ret != 0) { - if (ret != ENOSPC) - goto out; - pgno = 0; - goto do_meta; - } - - /* - * When we get here then either we did not grow the file - * (groupgrow == 0) or we did grow the file and the allocation - * of those new pages succeeded. - */ - did_alloc = groupgrow; - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn); - - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, dbc->priority, &pagep); - pagep->lsn = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { -#ifdef HAVE_FTRUNCATE - /* If this record allocated the pages give them back. */ - if (argp->newalloc) { - if (pagep != NULL && (ret = __memp_fput(mpf, - pagep, DB_PRIORITY_VERY_LOW)) != 0) - goto out; - pagep = NULL; - if ((ret = __memp_ftruncate(mpf, argp->pgno, 0)) != 0) - goto out; - } else -#endif - { - /* - * Otherwise just roll the page back to its - * previous state. - */ - REC_DIRTY(mpf, dbc->priority, &pagep); - pagep->lsn = argp->pagelsn; - } - } - if (pagep != NULL && - (ret = __memp_fput(mpf, pagep, dbc->priority)) != 0) - goto out; - -do_meta: - /* Now we have to update the meta-data page. */ - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - cmp_n = LOG_COMPARE(lsnp, &hcp->hdr->dbmeta.lsn); - cmp_p = LOG_COMPARE(&hcp->hdr->dbmeta.lsn, &argp->metalsn); - CHECK_LSN(dbenv, op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn); - if (cmp_p == 0 && DB_REDO(op)) { - /* Redo the actual updating of bucket counts. */ - REC_DIRTY(mpf, dbc->priority, &hcp->hdr); - ++hcp->hdr->max_bucket; - if (groupgrow) { - hcp->hdr->low_mask = hcp->hdr->high_mask; - hcp->hdr->high_mask = - (argp->bucket + 1) | hcp->hdr->low_mask; - } - hcp->hdr->dbmeta.lsn = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Undo the actual updating of bucket counts. */ - REC_DIRTY(mpf, dbc->priority, &hcp->hdr); - hcp->hdr->max_bucket = argp->bucket; - if (groupgrow) { - hcp->hdr->high_mask = argp->bucket; - hcp->hdr->low_mask = hcp->hdr->high_mask >> 1; - } - hcp->hdr->dbmeta.lsn = argp->metalsn; - } - - /* - * Now we need to fix up the spares array. Each entry in the - * spares array indicates the beginning page number for the - * indicated doubling. We need to fill this in whenever the - * spares array is invalid, if we never reclaim pages then - * we have to allocate the pages to the spares array in both - * the redo and undo cases. - */ - if (did_alloc && -#ifdef HAVE_FTRUNCATE - !DB_UNDO(op) && -#endif - hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == PGNO_INVALID) { - REC_DIRTY(mpf, dbc->priority, &hcp->hdr); - hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] = - (argp->pgno - argp->bucket) - 1; - } -#ifdef HAVE_FTRUNCATE - if (cmp_n == 0 && groupgrow && DB_UNDO(op)) { - REC_DIRTY(mpf, dbc->priority, &hcp->hdr); - hcp->hdr->spares[ - __db_log2(argp->bucket + 1) + 1] = PGNO_INVALID; - } -#endif - - /* - * Finally, we need to potentially fix up the last_pgno field - * in the master meta-data page (which may or may not be the - * same as the hash header page). - */ - if (argp->mmpgno != argp->mpgno) { - if ((ret = __memp_fget(mpf, &argp->mmpgno, NULL, - DB_MPOOL_EDIT, &mmeta)) != 0) { - if (DB_UNDO(op) && ret == DB_PAGE_NOTFOUND) - ret = 0; - goto out; - } - cmp_n = LOG_COMPARE(lsnp, &mmeta->lsn); - cmp_p = LOG_COMPARE(&mmeta->lsn, &argp->mmetalsn); - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, dbc->priority, &mmeta); - mmeta->lsn = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - REC_DIRTY(mpf, dbc->priority, &mmeta); - mmeta->lsn = argp->mmetalsn; - } - } else { - mmeta = (DBMETA *)hcp->hdr; - REC_DIRTY(mpf, dbc->priority, &mmeta); - } - -#ifdef HAVE_FTRUNCATE - if (cmp_n == 0 && DB_UNDO(op)) - mmeta->last_pgno = argp->last_pgno; - else if (DB_REDO(op)) -#endif - if (mmeta->last_pgno < pgno) - mmeta->last_pgno = pgno; - - if (argp->mmpgno != argp->mpgno && - (ret = __memp_fput(mpf, mmeta, dbc->priority)) != 0) - goto out; - mmeta = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (mmeta != NULL) - (void)__memp_fput(mpf, mmeta, dbc->priority); - if (dbc != NULL) - (void)__ham_release_meta(dbc); - if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC) - ret = 0; - - REC_CLOSE; -} - -/* - * __ham_groupalloc_recover -- - * Recover the batch creation of a set of pages for a new database. - * - * PUBLIC: int __ham_groupalloc_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_groupalloc_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_groupalloc_args *argp; - DBMETA *mmeta; - DB_MPOOLFILE *mpf; - DB *file_dbp; - DBC *dbc; - PAGE *pagep; - db_pgno_t pgno; - int cmp_n, cmp_p, ret; - - mmeta = NULL; - REC_PRINT(__ham_groupalloc_print); - REC_INTRO(__ham_groupalloc_read, 0, 0); - - pgno = PGNO_BASE_MD; - if ((ret = __memp_fget(mpf, &pgno, NULL, - 0, &mmeta)) != 0) { - if (DB_REDO(op)) { - ret = __db_pgerr(file_dbp, pgno, ret); - goto out; - } else - goto done; - } - - cmp_n = LOG_COMPARE(lsnp, &LSN(mmeta)); - cmp_p = LOG_COMPARE(&LSN(mmeta), &argp->meta_lsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(mmeta), &argp->meta_lsn); - - /* - * Basically, we used mpool to allocate a chunk of pages. - * We need to either add those to a free list (in the undo - * case) or initialize them (in the redo case). - * - * If we are redoing and this is a hash subdatabase, it's possible - * that the pages were never allocated, so we'd better check for - * that and handle it here. - */ - pgno = argp->start_pgno + argp->num - 1; - if (DB_REDO(op)) { - if ((ret = __ham_alloc_pages(file_dbp, argp, lsnp)) != 0) - goto out; - if (cmp_p == 0) { - REC_DIRTY(mpf, file_dbp->priority, &mmeta); - LSN(mmeta) = *lsnp; - } - } else if (DB_UNDO(op)) { - /* - * Fetch the last page and determine if it is in - * the post allocation state. - */ - pagep = NULL; - if ((ret = __memp_fget(mpf, &pgno, NULL, - DB_MPOOL_EDIT, &pagep)) == 0) { - if (LOG_COMPARE(&pagep->lsn, lsnp) != 0) { - if ((ret = __memp_fput(mpf, pagep, - DB_PRIORITY_VERY_LOW)) != 0) - goto out; - pagep = NULL; - } - } else if (ret != DB_PAGE_NOTFOUND) - goto out; -#ifdef HAVE_FTRUNCATE - /* - * If the last page was allocated then truncate back - * to the first page. - */ - if (pagep != NULL) { - if ((ret = __memp_fput(mpf, - pagep, DB_PRIORITY_VERY_LOW)) != 0) - goto out; - if ((ret = - __memp_ftruncate(mpf, argp->start_pgno, 0)) != 0) - goto out; - } - - /* - * If we are rolling back the metapage, then make - * sure it reflects the the correct last_pgno. - */ - if (cmp_n == 0) { - REC_DIRTY(mpf, file_dbp->priority, &mmeta); - mmeta->last_pgno = argp->last_pgno; - } - pgno = 0; -#else - /* - * Reset the last page back to its preallocation state. - */ - if (pagep != NULL) { - REC_DIRTY(mpf, file_dbp->priority, &pagep); - if (LOG_COMPARE(&pagep->lsn, lsnp) == 0) - ZERO_LSN(pagep->lsn); - - if ((ret = - __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - } - /* - * Put the pages into the limbo list and free them later. - */ - if ((ret = __db_add_limbo(dbenv, - info, argp->fileid, argp->start_pgno, argp->num)) != 0) - goto out; -#endif - if (cmp_n == 0) { - REC_DIRTY(mpf, file_dbp->priority, &mmeta); - LSN(mmeta) = argp->meta_lsn; - } - } - - /* - * In both REDO and UNDO, we have grown the file and need to make - * sure that last_pgno is correct. If we HAVE_FTRUNCATE pgno - * will only be valid on REDO. - */ - if (pgno > mmeta->last_pgno) { - REC_DIRTY(mpf, file_dbp->priority, &mmeta); - mmeta->last_pgno = pgno; - } - -done: if (ret == 0) - *lsnp = argp->prev_lsn; - ret = 0; - -out: if (mmeta != NULL) - (void)__memp_fput(mpf, mmeta, file_dbp->priority); - - if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC) - ret = 0; - REC_CLOSE; -} - -/* - * __ham_alloc_pages -- - * - * Called during redo of a file create. We create new pages in the file - * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a - * __crdel_metasub message. If we manage to crash without the newly written - * pages getting to disk (I'm not sure this can happen anywhere except our - * test suite?!), then we need to go through a recreate the final pages. - * Hash normally has holes in its files and handles them appropriately. - */ -static int -__ham_alloc_pages(file_dbp, argp, lsnp) - DB *file_dbp; - __ham_groupalloc_args *argp; - DB_LSN *lsnp; -{ - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - int ret; - - mpf = file_dbp->mpf; - - /* Read the last page of the allocation. */ - pgno = argp->start_pgno + argp->num - 1; - - /* If the page exists, and it has been initialized, then we're done. */ - if ((ret = - __memp_fget(mpf, &pgno, NULL, 0, &pagep)) == 0) { - if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn)) - goto reinit_page; - return (__memp_fput(mpf, pagep, file_dbp->priority)); - } - - /* Had to create the page. */ - if ((ret = __memp_fget( - mpf, &pgno, NULL, DB_MPOOL_CREATE, &pagep)) != 0) - return (__db_pgerr(file_dbp, pgno, ret)); - -reinit_page: - /* Initialize the newly allocated page. */ - REC_DIRTY(mpf, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, - pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - pagep->lsn = *lsnp; - -out: return (__memp_fput(mpf, pagep, file_dbp->priority)); -} - -/* - * __ham_curadj_recover -- - * Undo cursor adjustments if a subtransaction fails. - * - * PUBLIC: int __ham_curadj_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_curadj_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_curadj_args *argp; - DB_MPOOLFILE *mpf; - DB *file_dbp; - DBC *dbc; - int ret; - HASH_CURSOR *hcp; - db_ham_curadj mode, hamc_mode; - - REC_PRINT(__ham_curadj_print); - REC_INTRO(__ham_curadj_read, 0, 1); - - if (op != DB_TXN_ABORT) - goto done; - - mode = (db_ham_curadj)argp->add; - - /* - * Reverse the logged operation, so that the consequences are reversed - * by the __hamc_update code. - */ - switch (mode) { - case DB_HAM_CURADJ_DEL: - hamc_mode = DB_HAM_CURADJ_ADD; - break; - case DB_HAM_CURADJ_ADD: - hamc_mode = DB_HAM_CURADJ_DEL; - break; - case DB_HAM_CURADJ_ADDMOD: - hamc_mode = DB_HAM_CURADJ_DELMOD; - break; - case DB_HAM_CURADJ_DELMOD: - hamc_mode = DB_HAM_CURADJ_ADDMOD; - break; - default: - __db_errx(dbenv, - "Invalid flag in __ham_curadj_recover"); - ret = EINVAL; - goto out; - } - - /* - * Undo the adjustment by reinitializing the the cursor to look like - * the one that was used to do the adjustment, then we invert the - * add so that undo the adjustment. - */ - hcp = (HASH_CURSOR *)dbc->internal; - hcp->pgno = argp->pgno; - hcp->indx = argp->indx; - hcp->dup_off = argp->dup_off; - hcp->order = argp->order; - if (mode == DB_HAM_CURADJ_DEL) - F_SET(hcp, H_DELETED); - (void)__hamc_update(dbc, argp->len, hamc_mode, argp->is_dup); - -done: *lsnp = argp->prev_lsn; -out: REC_CLOSE; -} - -/* - * __ham_chgpg_recover -- - * Undo cursor adjustments if a subtransaction fails. - * - * PUBLIC: int __ham_chgpg_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_chgpg_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_chgpg_args *argp; - BTREE_CURSOR *opdcp; - DB_MPOOLFILE *mpf; - DB *file_dbp, *ldbp; - DBC *dbc; - int ret; - DBC *cp; - HASH_CURSOR *lcp; - u_int32_t order, indx; - - REC_PRINT(__ham_chgpg_print); - REC_INTRO(__ham_chgpg_read, 0, 0); - - if (op != DB_TXN_ABORT) - goto done; - - /* Overloaded fields for DB_HAM_DEL*PG */ - indx = argp->old_indx; - order = argp->new_indx; - - MUTEX_LOCK(dbenv, dbenv->mtx_dblist); - FIND_FIRST_DB_MATCH(dbenv, file_dbp, ldbp); - for (; - ldbp != NULL && ldbp->adj_fileid == file_dbp->adj_fileid; - ldbp = TAILQ_NEXT(ldbp, dblistlinks)) { - MUTEX_LOCK(dbenv, file_dbp->mutex); - TAILQ_FOREACH(cp, &ldbp->active_queue, links) { - lcp = (HASH_CURSOR *)cp->internal; - - switch (argp->mode) { - case DB_HAM_DELFIRSTPG: - if (lcp->pgno != argp->new_pgno || - MVCC_SKIP_CURADJ(cp, lcp->pgno)) - break; - if (lcp->indx != indx || - !F_ISSET(lcp, H_DELETED) || - lcp->order >= order) { - lcp->pgno = argp->old_pgno; - if (lcp->indx == indx) - lcp->order -= order; - } - break; - case DB_HAM_DELMIDPG: - case DB_HAM_DELLASTPG: - if (lcp->pgno == argp->new_pgno && - lcp->indx == indx && - F_ISSET(lcp, H_DELETED) && - lcp->order >= order && - !MVCC_SKIP_CURADJ(cp, lcp->pgno)) { - lcp->pgno = argp->old_pgno; - lcp->order -= order; - lcp->indx = 0; - } - break; - case DB_HAM_CHGPG: - /* - * If we're doing a CHGPG, we're undoing - * the move of a non-deleted item to a - * new page. Any cursors with the deleted - * flag set do not belong to this item; - * don't touch them. - */ - if (F_ISSET(lcp, H_DELETED)) - break; - /* FALLTHROUGH */ - case DB_HAM_SPLIT: - if (lcp->pgno == argp->new_pgno && - lcp->indx == argp->new_indx && - !MVCC_SKIP_CURADJ(cp, lcp->pgno)) { - lcp->indx = argp->old_indx; - lcp->pgno = argp->old_pgno; - } - break; - case DB_HAM_DUP: - if (lcp->opd == NULL) - break; - opdcp = (BTREE_CURSOR *)lcp->opd->internal; - if (opdcp->pgno != argp->new_pgno || - opdcp->indx != argp->new_indx || - MVCC_SKIP_CURADJ(lcp->opd, opdcp->pgno)) - break; - - if (F_ISSET(opdcp, C_DELETED)) - F_SET(lcp, H_DELETED); - /* - * We can't close a cursor while we have the - * dbp mutex locked, since c_close reacquires - * it. It should be safe to drop the mutex - * here, though, since newly opened cursors - * are put only at the end of the tailq and - * the cursor we're adjusting can't be closed - * under us. - */ - MUTEX_UNLOCK(dbenv, file_dbp->mutex); - if ((ret = __dbc_close(lcp->opd)) != 0) - goto out; - MUTEX_LOCK(dbenv, file_dbp->mutex); - lcp->opd = NULL; - break; - } - } - MUTEX_UNLOCK(dbenv, file_dbp->mutex); - } - MUTEX_UNLOCK(dbenv, dbenv->mtx_dblist); - -done: *lsnp = argp->prev_lsn; -out: REC_CLOSE; -} - -/* - * __ham_metagroup_recover -- - * Recovery function for metagroup. - * - * PUBLIC: int __ham_metagroup_42_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_metagroup_42_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_metagroup_42_args *argp; - HASH_CURSOR *hcp; - DB *file_dbp; - DBMETA *mmeta; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - u_int32_t flags; - int cmp_n, cmp_p, did_alloc, groupgrow, ret; - - did_alloc = 0; - mmeta = NULL; - REC_PRINT(__ham_metagroup_42_print); - REC_INTRO(__ham_metagroup_42_read, 1, 1); - - /* - * This logs the virtual create of pages pgno to pgno + bucket - * If HAVE_FTRUNCATE is not supported the mpool page-allocation is not - * transaction protected, we can never undo it. Even in an abort, - * we have to allocate these pages to the hash table if they - * were actually created. In particular, during disaster - * recovery the metapage may be before this point if we - * are rolling backward. If the file has not been extended - * then the metapage could not have been updated. - * The log record contains: - * bucket: old maximum bucket - * pgno: page number of the new bucket. - * We round up on log calculations, so we can figure out if we are - * about to double the hash table if argp->bucket+1 is a power of 2. - * If it is, then we are allocating an entire doubling of pages, - * otherwise, we are simply allocated one new page. - */ - groupgrow = - (u_int32_t)(1 << __db_log2(argp->bucket + 1)) == argp->bucket + 1; - pgno = argp->pgno; - if (argp->newalloc) - pgno += argp->bucket; - - flags = 0; - pagep = NULL; - LF_SET(DB_MPOOL_CREATE); - ret = __memp_fget(mpf, &pgno, NULL, flags, &pagep); - - if (ret != 0) { - if (ret != ENOSPC) - goto out; - pgno = 0; - goto do_meta; - } - - /* - * When we get here then either we did not grow the file - * (groupgrow == 0) or we did grow the file and the allocation - * of those new pages succeeded. - */ - did_alloc = groupgrow; - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(pagep), &argp->pagelsn); - - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, dbc->priority, &pagep); - pagep->lsn = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* - * Otherwise just roll the page back to its - * previous state. - */ - REC_DIRTY(mpf, dbc->priority, &pagep); - pagep->lsn = argp->pagelsn; - } - if (pagep != NULL && - (ret = __memp_fput(mpf, pagep, dbc->priority)) != 0) - goto out; - -do_meta: - /* Now we have to update the meta-data page. */ - hcp = (HASH_CURSOR *)dbc->internal; - if ((ret = __ham_get_meta(dbc)) != 0) - goto out; - cmp_n = LOG_COMPARE(lsnp, &hcp->hdr->dbmeta.lsn); - cmp_p = LOG_COMPARE(&hcp->hdr->dbmeta.lsn, &argp->metalsn); - CHECK_LSN(dbenv, op, cmp_p, &hcp->hdr->dbmeta.lsn, &argp->metalsn); - if (cmp_p == 0 && DB_REDO(op)) { - /* Redo the actual updating of bucket counts. */ - REC_DIRTY(mpf, dbc->priority, &hcp->hdr); - ++hcp->hdr->max_bucket; - if (groupgrow) { - hcp->hdr->low_mask = hcp->hdr->high_mask; - hcp->hdr->high_mask = - (argp->bucket + 1) | hcp->hdr->low_mask; - } - hcp->hdr->dbmeta.lsn = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Undo the actual updating of bucket counts. */ - REC_DIRTY(mpf, dbc->priority, &hcp->hdr); - hcp->hdr->max_bucket = argp->bucket; - if (groupgrow) { - hcp->hdr->high_mask = argp->bucket; - hcp->hdr->low_mask = hcp->hdr->high_mask >> 1; - } - hcp->hdr->dbmeta.lsn = argp->metalsn; - } - - /* - * Now we need to fix up the spares array. Each entry in the - * spares array indicates the beginning page number for the - * indicated doubling. We need to fill this in whenever the - * spares array is invalid, if we never reclaim pages then - * we have to allocate the pages to the spares array in both - * the redo and undo cases. - */ - if (did_alloc && - hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] == PGNO_INVALID) { - REC_DIRTY(mpf, dbc->priority, &hcp->hdr); - hcp->hdr->spares[__db_log2(argp->bucket + 1) + 1] = - (argp->pgno - argp->bucket) - 1; - } - - /* - * Finally, we need to potentially fix up the last_pgno field - * in the master meta-data page (which may or may not be the - * same as the hash header page). - */ - if (argp->mmpgno != argp->mpgno) { - if ((ret = __memp_fget(mpf, &argp->mmpgno, NULL, - DB_MPOOL_EDIT, &mmeta)) != 0) { - if (DB_UNDO(op) && ret == DB_PAGE_NOTFOUND) - ret = 0; - goto out; - } - cmp_n = LOG_COMPARE(lsnp, &mmeta->lsn); - cmp_p = LOG_COMPARE(&mmeta->lsn, &argp->mmetalsn); - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, dbc->priority, &mmeta); - mmeta->lsn = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - REC_DIRTY(mpf, dbc->priority, &mmeta); - mmeta->lsn = argp->mmetalsn; - } - } else { - mmeta = (DBMETA *)hcp->hdr; - REC_DIRTY(mpf, dbc->priority, &mmeta); - } - - if (mmeta->last_pgno < pgno) - mmeta->last_pgno = pgno; - - if (argp->mmpgno != argp->mpgno && - (ret = __memp_fput(mpf, mmeta, dbc->priority)) != 0) - goto out; - mmeta = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (mmeta != NULL) - (void)__memp_fput(mpf, mmeta, dbc->priority); - if (dbc != NULL) - (void)__ham_release_meta(dbc); - if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC) - ret = 0; - - REC_CLOSE; -} - -/* - * __ham_groupalloc_42_recover -- - * Recover the batch creation of a set of pages for a new database. - * - * PUBLIC: int __ham_groupalloc_42_recover - * PUBLIC: __P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__ham_groupalloc_42_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __ham_groupalloc_42_args *argp; - DBMETA *mmeta; - DB_MPOOLFILE *mpf; - DB *file_dbp; - DBC *dbc; - PAGE *pagep; - db_pgno_t pgno; - int cmp_n, cmp_p, ret; - - mmeta = NULL; - REC_PRINT(__ham_groupalloc_42_print); - REC_INTRO(__ham_groupalloc_42_read, 0, 0); - - pgno = PGNO_BASE_MD; - if ((ret = __memp_fget(mpf, &pgno, NULL, 0, &mmeta)) != 0) { - if (DB_REDO(op)) { - ret = __db_pgerr(file_dbp, pgno, ret); - goto out; - } else - goto done; - } - - cmp_n = LOG_COMPARE(lsnp, &LSN(mmeta)); - cmp_p = LOG_COMPARE(&LSN(mmeta), &argp->meta_lsn); - CHECK_LSN(dbenv, op, cmp_p, &LSN(mmeta), &argp->meta_lsn); - - /* - * Basically, we used mpool to allocate a chunk of pages. - * We need to either add those to a free list (in the undo - * case) or initialize them (in the redo case). - * - * If we are redoing and this is a hash subdatabase, it's possible - * that the pages were never allocated, so we'd better check for - * that and handle it here. - */ - pgno = argp->start_pgno + argp->num - 1; - if (DB_REDO(op)) { - if ((ret = __ham_alloc_pages_42(file_dbp, argp, lsnp)) != 0) - goto out; - if (cmp_p == 0) { - REC_DIRTY(mpf, file_dbp->priority, &mmeta); - LSN(mmeta) = *lsnp; - } - } else if (DB_UNDO(op)) { - /* - * Fetch the last page and determine if it is in - * the post allocation state. - */ - pagep = NULL; - if ((ret = __memp_fget(mpf, &pgno, NULL, 0, &pagep)) == 0) { - if (LOG_COMPARE(&pagep->lsn, lsnp) != 0) { - if ((ret = __memp_fput(mpf, pagep, - DB_PRIORITY_VERY_LOW)) != 0) - goto out; - pagep = NULL; - } - } else if (ret != DB_PAGE_NOTFOUND) - goto out; - /* - * Reset the last page back to its preallocation state. - */ - if (pagep != NULL) { - if (LOG_COMPARE(&pagep->lsn, lsnp) == 0) { - REC_DIRTY(mpf, file_dbp->priority, &pagep); - ZERO_LSN(pagep->lsn); - } - - if ((ret = - __memp_fput(mpf, pagep, file_dbp->priority)) != 0) - goto out; - } - /* - * Put the pages into the limbo list and free them later. - */ - if ((ret = __db_add_limbo(dbenv, - info, argp->fileid, argp->start_pgno, argp->num)) != 0) - goto out; - if (cmp_n == 0) { - REC_DIRTY(mpf, file_dbp->priority, &mmeta); - LSN(mmeta) = argp->meta_lsn; - } - } - - /* - * In both REDO and UNDO, we have grown the file and need to make - * sure that last_pgno is correct. If we HAVE_FTRUNCATE pgno - * will only be valid on REDO. - */ - if (pgno > mmeta->last_pgno) { - REC_DIRTY(mpf, file_dbp->priority, &mmeta); - mmeta->last_pgno = pgno; - } - -done: if (ret == 0) - *lsnp = argp->prev_lsn; - ret = 0; - -out: if (mmeta != NULL) - (void)__memp_fput(mpf, mmeta, file_dbp->priority); - - if (ret == ENOENT && op == DB_TXN_BACKWARD_ALLOC) - ret = 0; - REC_CLOSE; -} - -/* - * __ham_alloc_pages_42 -- - * - * Called during redo of a file create. We create new pages in the file - * using the MPOOL_NEW_GROUP flag. We then log the meta-data page with a - * __crdel_metasub message. If we manage to crash without the newly written - * pages getting to disk (I'm not sure this can happen anywhere except our - * test suite?!), then we need to go through a recreate the final pages. - * Hash normally has holes in its files and handles them appropriately. - */ -static int -__ham_alloc_pages_42(dbp, argp, lsnp) - DB *dbp; - __ham_groupalloc_42_args *argp; - DB_LSN *lsnp; -{ - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - int ret; - - mpf = dbp->mpf; - - /* Read the last page of the allocation. */ - pgno = argp->start_pgno + argp->num - 1; - - /* If the page exists, and it has been initialized, then we're done. */ - if ((ret = __memp_fget(mpf, &pgno, NULL, 0, &pagep)) == 0) { - if (NUM_ENT(pagep) == 0 && IS_ZERO_LSN(pagep->lsn)) - goto reinit_page; - if ((ret = __memp_fput(mpf, pagep, dbp->priority)) != 0) - return (ret); - return (0); - } - - /* Had to create the page. */ - if ((ret = __memp_fget(mpf, &pgno, NULL, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &pagep)) != 0) - return (__db_pgerr(dbp, pgno, ret)); - -reinit_page: - /* Initialize the newly allocated page. */ - P_INIT(pagep, dbp->pgsize, pgno, PGNO_INVALID, PGNO_INVALID, 0, P_HASH); - pagep->lsn = *lsnp; - - if ((ret = __memp_fput(mpf, pagep, dbp->priority)) != 0) - return (ret); - - return (0); -} diff --git a/db/hash/hash_reclaim.c b/db/hash/hash_reclaim.c deleted file mode 100644 index 8a692645c..000000000 --- a/db/hash/hash_reclaim.c +++ /dev/null @@ -1,96 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - * - * $Id: hash_reclaim.c,v 12.9 2007/05/17 15:15:38 bostic Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" - -/* - * __ham_reclaim -- - * Reclaim the pages from a subdatabase and return them to the - * parent free list. For now, we link each freed page on the list - * separately. If people really store hash databases in subdatabases - * and do a lot of creates and deletes, this is going to be a problem, - * because hash needs chunks of contiguous storage. We may eventually - * need to go to a model where we maintain the free list with chunks of - * contiguous pages as well. - * - * PUBLIC: int __ham_reclaim __P((DB *, DB_TXN *txn)); - */ -int -__ham_reclaim(dbp, txn) - DB *dbp; - DB_TXN *txn; -{ - DBC *dbc; - HASH_CURSOR *hcp; - int ret; - - /* Open up a cursor that we'll use for traversing. */ - if ((ret = __db_cursor(dbp, txn, &dbc, 0)) != 0) - return (ret); - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __ham_get_meta(dbc)) != 0) - goto err; - - /* Write lock the metapage for deallocations. */ - if ((ret = __ham_dirty_meta(dbc, 0)) != 0) - goto err; - - /* Avoid locking every page, we have the handle locked exclusive. */ - F_SET(dbc, DBC_DONTLOCK); - - if ((ret = __ham_traverse(dbc, - DB_LOCK_WRITE, __db_reclaim_callback, dbc, 1)) != 0) - goto err; - if ((ret = __dbc_close(dbc)) != 0) - goto err; - if ((ret = __ham_release_meta(dbc)) != 0) - goto err; - return (0); - -err: if (hcp->hdr != NULL) - (void)__ham_release_meta(dbc); - (void)__dbc_close(dbc); - return (ret); -} - -/* - * __ham_truncate -- - * Reclaim the pages from a subdatabase and return them to the - * parent free list. - * - * PUBLIC: int __ham_truncate __P((DBC *, u_int32_t *)); - */ -int -__ham_truncate(dbc, countp) - DBC *dbc; - u_int32_t *countp; -{ - db_trunc_param trunc; - int ret, t_ret; - - if ((ret = __ham_get_meta(dbc)) != 0) - return (ret); - - trunc.count = 0; - trunc.dbc = dbc; - - ret = __ham_traverse(dbc, - DB_LOCK_WRITE, __db_truncate_callback, &trunc, 1); - - if ((t_ret = __ham_release_meta(dbc)) != 0 && ret == 0) - ret = t_ret; - - if (countp != NULL) - *countp = trunc.count; - return (ret); -} diff --git a/db/hash/hash_stat.c b/db/hash/hash_stat.c deleted file mode 100644 index f079f1885..000000000 --- a/db/hash/hash_stat.c +++ /dev/null @@ -1,514 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - * - * $Id: hash_stat.c,v 12.17 2007/07/02 16:58:02 alexg Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/mp.h" - -#ifdef HAVE_STATISTICS -static int __ham_stat_callback __P((DB *, PAGE *, void *, int *)); - -/* - * __ham_stat -- - * Gather/print the hash statistics - * - * PUBLIC: int __ham_stat __P((DBC *, void *, u_int32_t)); - */ -int -__ham_stat(dbc, spp, flags) - DBC *dbc; - void *spp; - u_int32_t flags; -{ - DB *dbp; - DB_ENV *dbenv; - DB_HASH_STAT *sp; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - PAGE *h; - db_pgno_t pgno; - int ret; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - - mpf = dbp->mpf; - sp = NULL; - - hcp = (HASH_CURSOR *)dbc->internal; - - if ((ret = __ham_get_meta(dbc)) != 0) - goto err; - - /* Allocate and clear the structure. */ - if ((ret = __os_umalloc(dbenv, sizeof(*sp), &sp)) != 0) - goto err; - memset(sp, 0, sizeof(*sp)); - /* Copy the fields that we have. */ - sp->hash_nkeys = hcp->hdr->dbmeta.key_count; - sp->hash_ndata = hcp->hdr->dbmeta.record_count; - /* - * Don't take the page number from the meta-data page -- that value is - * only maintained in the primary database, we may have been called on - * a subdatabase. - */ - if ((ret = __memp_get_last_pgno(dbp->mpf, &pgno)) != 0) - goto err; - sp->hash_pagecnt = pgno + 1; - sp->hash_pagesize = dbp->pgsize; - sp->hash_buckets = hcp->hdr->max_bucket + 1; - sp->hash_magic = hcp->hdr->dbmeta.magic; - sp->hash_version = hcp->hdr->dbmeta.version; - sp->hash_metaflags = hcp->hdr->dbmeta.flags; - sp->hash_ffactor = hcp->hdr->ffactor; - - if (flags == DB_FAST_STAT) - goto done; - - /* Walk the free list, counting pages. */ - for (sp->hash_free = 0, pgno = hcp->hdr->dbmeta.free; - pgno != PGNO_INVALID;) { - ++sp->hash_free; - - if ((ret = __memp_fget(mpf, &pgno, dbc->txn, 0, &h)) != 0) - goto err; - - pgno = h->next_pgno; - (void)__memp_fput(mpf, h, dbc->priority); - } - - /* Now traverse the rest of the table. */ - sp->hash_nkeys = 0; - sp->hash_ndata = 0; - if ((ret = __ham_traverse(dbc, - DB_LOCK_READ, __ham_stat_callback, sp, 0)) != 0) - goto err; - - if (!F_ISSET(dbp, DB_AM_RDONLY)) { - /* - * A transaction is not required for DB->stat, so this update - * can't safely make a copy of the meta page. We have to - * update in place. - */ - if ((ret = __ham_dirty_meta(dbc, - (dbc->txn == NULL) ? DB_MPOOL_EDIT : 0)) != 0) - goto err; - hcp->hdr->dbmeta.key_count = sp->hash_nkeys; - hcp->hdr->dbmeta.record_count = sp->hash_ndata; - } - -done: if ((ret = __ham_release_meta(dbc)) != 0) - goto err; - - *(DB_HASH_STAT **)spp = sp; - return (0); - -err: if (sp != NULL) - __os_ufree(dbenv, sp); - - if (hcp->hdr != NULL) - (void)__ham_release_meta(dbc); - - return (ret); -} - -/* - * __ham_stat_print -- - * Display hash statistics. - * - * PUBLIC: int __ham_stat_print __P((DBC *, u_int32_t)); - */ -int -__ham_stat_print(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - static const FN fn[] = { - { DB_HASH_DUP, "duplicates" }, - { DB_HASH_SUBDB, "multiple-databases" }, - { DB_HASH_DUPSORT, "sorted duplicates" }, - { 0, NULL } - }; - DB *dbp; - DB_ENV *dbenv; - DB_HASH_STAT *sp; - int lorder, ret; - const char *s; - - dbp = dbc->dbp; - dbenv = dbp->dbenv; - - if ((ret = __ham_stat(dbc, &sp, LF_ISSET(DB_FAST_STAT))) != 0) - return (ret); - - if (LF_ISSET(DB_STAT_ALL)) { - __db_msg(dbenv, "%s", DB_GLOBAL(db_line)); - __db_msg(dbenv, "Default Hash database information:"); - } - __db_msg(dbenv, "%lx\tHash magic number", (u_long)sp->hash_magic); - __db_msg(dbenv, - "%lu\tHash version number", (u_long)sp->hash_version); - (void)__db_get_lorder(dbp, &lorder); - switch (lorder) { - case 1234: - s = "Little-endian"; - break; - case 4321: - s = "Big-endian"; - break; - default: - s = "Unrecognized byte order"; - break; - } - __db_msg(dbenv, "%s\tByte order", s); - __db_prflags(dbenv, NULL, sp->hash_metaflags, fn, NULL, "\tFlags"); - __db_dl(dbenv, - "Underlying database page size", (u_long)sp->hash_pagesize); - __db_dl(dbenv, "Specified fill factor", (u_long)sp->hash_ffactor); - __db_dl(dbenv, - "Number of keys in the database", (u_long)sp->hash_nkeys); - __db_dl(dbenv, - "Number of data items in the database", (u_long)sp->hash_ndata); - - __db_dl(dbenv, "Number of hash buckets", (u_long)sp->hash_buckets); - __db_dl_pct(dbenv, "Number of bytes free on bucket pages", - (u_long)sp->hash_bfree, DB_PCT_PG( - sp->hash_bfree, sp->hash_buckets, sp->hash_pagesize), "ff"); - - __db_dl(dbenv, - "Number of overflow pages", (u_long)sp->hash_bigpages); - __db_dl_pct(dbenv, "Number of bytes free in overflow pages", - (u_long)sp->hash_big_bfree, DB_PCT_PG( - sp->hash_big_bfree, sp->hash_bigpages, sp->hash_pagesize), "ff"); - - __db_dl(dbenv, - "Number of bucket overflow pages", (u_long)sp->hash_overflows); - __db_dl_pct(dbenv, - "Number of bytes free in bucket overflow pages", - (u_long)sp->hash_ovfl_free, DB_PCT_PG( - sp->hash_ovfl_free, sp->hash_overflows, sp->hash_pagesize), "ff"); - - __db_dl(dbenv, "Number of duplicate pages", (u_long)sp->hash_dup); - __db_dl_pct(dbenv, "Number of bytes free in duplicate pages", - (u_long)sp->hash_dup_free, DB_PCT_PG( - sp->hash_dup_free, sp->hash_dup, sp->hash_pagesize), "ff"); - - __db_dl(dbenv, - "Number of pages on the free list", (u_long)sp->hash_free); - - __os_ufree(dbenv, sp); - - return (0); -} - -static int -__ham_stat_callback(dbp, pagep, cookie, putp) - DB *dbp; - PAGE *pagep; - void *cookie; - int *putp; -{ - DB_HASH_STAT *sp; - DB_BTREE_STAT bstat; - db_indx_t indx, len, off, tlen, top; - u_int8_t *hk; - int ret; - - *putp = 0; - sp = cookie; - - switch (pagep->type) { - case P_INVALID: - /* - * Hash pages may be wholly zeroed; this is not a bug. - * Obviously such pages have no data, so we can just proceed. - */ - break; - case P_HASH_UNSORTED: - case P_HASH: - /* - * We count the buckets and the overflow pages - * separately and tally their bytes separately - * as well. We need to figure out if this page - * is a bucket. - */ - if (PREV_PGNO(pagep) == PGNO_INVALID) - sp->hash_bfree += P_FREESPACE(dbp, pagep); - else { - sp->hash_overflows++; - sp->hash_ovfl_free += P_FREESPACE(dbp, pagep); - } - top = NUM_ENT(pagep); - /* Correct for on-page duplicates and deleted items. */ - for (indx = 0; indx < top; indx += P_INDX) { - switch (*H_PAIRDATA(dbp, pagep, indx)) { - case H_OFFDUP: - break; - case H_OFFPAGE: - case H_KEYDATA: - sp->hash_ndata++; - break; - case H_DUPLICATE: - tlen = LEN_HDATA(dbp, pagep, 0, indx); - hk = H_PAIRDATA(dbp, pagep, indx); - for (off = 0; off < tlen; - off += len + 2 * sizeof(db_indx_t)) { - sp->hash_ndata++; - memcpy(&len, - HKEYDATA_DATA(hk) - + off, sizeof(db_indx_t)); - } - break; - default: - return (__db_pgfmt(dbp->dbenv, PGNO(pagep))); - } - } - sp->hash_nkeys += H_NUMPAIRS(pagep); - break; - case P_IBTREE: - case P_IRECNO: - case P_LBTREE: - case P_LRECNO: - case P_LDUP: - /* - * These are all btree pages; get a correct - * cookie and call them. Then add appropriate - * fields into our stat structure. - */ - memset(&bstat, 0, sizeof(bstat)); - if ((ret = __bam_stat_callback(dbp, pagep, &bstat, putp)) != 0) - return (ret); - sp->hash_dup++; - sp->hash_dup_free += bstat.bt_leaf_pgfree + - bstat.bt_dup_pgfree + bstat.bt_int_pgfree; - sp->hash_ndata += bstat.bt_ndata; - break; - case P_OVERFLOW: - sp->hash_bigpages++; - sp->hash_big_bfree += P_OVFLSPACE(dbp, dbp->pgsize, pagep); - break; - default: - return (__db_pgfmt(dbp->dbenv, PGNO(pagep))); - } - - return (0); -} - -/* - * __ham_print_cursor -- - * Display the current cursor. - * - * PUBLIC: void __ham_print_cursor __P((DBC *)); - */ -void -__ham_print_cursor(dbc) - DBC *dbc; -{ - static const FN fn[] = { - { H_CONTINUE, "H_CONTINUE" }, - { H_DELETED, "H_DELETED" }, - { H_DUPONLY, "H_DUPONLY" }, - { H_EXPAND, "H_EXPAND" }, - { H_ISDUP, "H_ISDUP" }, - { H_NEXT_NODUP, "H_NEXT_NODUP" }, - { H_NOMORE, "H_NOMORE" }, - { H_OK, "H_OK" }, - { 0, NULL } - }; - DB_ENV *dbenv; - HASH_CURSOR *cp; - - dbenv = dbc->dbp->dbenv; - cp = (HASH_CURSOR *)dbc->internal; - - STAT_ULONG("Bucket traversing", cp->bucket); - STAT_ULONG("Bucket locked", cp->lbucket); - STAT_ULONG("Duplicate set offset", cp->dup_off); - STAT_ULONG("Current duplicate length", cp->dup_len); - STAT_ULONG("Total duplicate set length", cp->dup_tlen); - STAT_ULONG("Bytes needed for add", cp->seek_size); - STAT_ULONG("Page on which we can insert", cp->seek_found_page); - STAT_ULONG("Order", cp->order); - __db_prflags(dbenv, NULL, cp->flags, fn, NULL, "\tInternal Flags"); -} - -#else /* !HAVE_STATISTICS */ - -int -__ham_stat(dbc, spp, flags) - DBC *dbc; - void *spp; - u_int32_t flags; -{ - COMPQUIET(spp, NULL); - COMPQUIET(flags, 0); - - return (__db_stat_not_built(dbc->dbp->dbenv)); -} -#endif - -/* - * __ham_traverse - * Traverse an entire hash table. We use the callback so that we - * can use this both for stat collection and for deallocation. - * - * PUBLIC: int __ham_traverse __P((DBC *, db_lockmode_t, - * PUBLIC: int (*)(DB *, PAGE *, void *, int *), void *, int)); - */ -int -__ham_traverse(dbc, mode, callback, cookie, look_past_max) - DBC *dbc; - db_lockmode_t mode; - int (*callback) __P((DB *, PAGE *, void *, int *)); - void *cookie; - int look_past_max; -{ - DB *dbp; - DBC *opd; - DB_MPOOLFILE *mpf; - HASH_CURSOR *hcp; - HKEYDATA *hk; - db_pgno_t pgno, opgno; - int did_put, i, ret, t_ret; - u_int32_t bucket, spares_entry; - - dbp = dbc->dbp; - opd = NULL; - mpf = dbp->mpf; - hcp = (HASH_CURSOR *)dbc->internal; - ret = 0; - - /* - * In a perfect world, we could simply read each page in the file - * and look at its page type to tally the information necessary. - * Unfortunately, the bucket locking that hash tables do to make - * locking easy, makes this a pain in the butt. We have to traverse - * duplicate, overflow and big pages from the bucket so that we - * don't access anything that isn't properly locked. - * - */ - for (bucket = 0;; bucket++) { - /* - * We put the loop exit condition check here, because - * it made for a really vile extended ?: that made SCO's - * compiler drop core. - * - * If look_past_max is not set, we can stop at max_bucket; - * if it is set, we need to include pages that are part of - * the current doubling but beyond the highest bucket we've - * split into, as well as pages from a "future" doubling - * that may have been created within an aborted - * transaction. To do this, keep looping (and incrementing - * bucket) until the corresponding spares array entries - * cease to be defined. - */ - if (look_past_max) { - spares_entry = __db_log2(bucket + 1); - if (spares_entry >= NCACHED || - hcp->hdr->spares[spares_entry] == 0) - break; - } else { - if (bucket > hcp->hdr->max_bucket) - break; - } - - hcp->bucket = bucket; - hcp->pgno = pgno = BUCKET_TO_PAGE(hcp, bucket); - for (ret = __ham_get_cpage(dbc, mode); ret == 0; - ret = __ham_next_cpage(dbc, pgno)) { - - /* - * If we are cleaning up pages past the max_bucket, - * then they may be on the free list and have their - * next pointers set, but they should be ignored. In - * fact, we really ought to just skip anybody who is - * not a valid page. - */ - if (TYPE(hcp->page) == P_INVALID) - break; - pgno = NEXT_PGNO(hcp->page); - - /* - * Go through each item on the page checking for - * duplicates (in which case we have to count the - * duplicate pages) or big key/data items (in which - * case we have to count those pages). - */ - for (i = 0; i < NUM_ENT(hcp->page); i++) { - hk = (HKEYDATA *)P_ENTRY(dbp, hcp->page, i); - switch (HPAGE_PTYPE(hk)) { - case H_OFFDUP: - memcpy(&opgno, HOFFDUP_PGNO(hk), - sizeof(db_pgno_t)); - if ((ret = __dbc_newopd(dbc, - opgno, NULL, &opd)) != 0) - return (ret); - if ((ret = __bam_traverse(opd, - DB_LOCK_READ, opgno, - callback, cookie)) - != 0) - goto err; - if ((ret = __dbc_close(opd)) != 0) - return (ret); - opd = NULL; - break; - case H_OFFPAGE: - /* - * We are about to get a big page - * which will use the same spot that - * the current page uses, so we need - * to restore the current page before - * looking at it again. - */ - memcpy(&opgno, HOFFPAGE_PGNO(hk), - sizeof(db_pgno_t)); - if ((ret = __db_traverse_big(dbp, - opgno, dbc->txn, - callback, cookie)) != 0) - goto err; - break; - case H_KEYDATA: - case H_DUPLICATE: - break; - default: - ret = __db_unknown_path( - dbp->dbenv, "__ham_traverse"); - goto err; - } - } - - /* Call the callback on main pages. */ - if ((ret = callback(dbp, - hcp->page, cookie, &did_put)) != 0) - goto err; - - if (did_put) - hcp->page = NULL; - if (pgno == PGNO_INVALID) - break; - } - if (ret != 0) - goto err; - - if (hcp->page != NULL) { - if ((ret = - __memp_fput(mpf, hcp->page, dbc->priority)) != 0) - return (ret); - hcp->page = NULL; - } - - } -err: if (opd != NULL && - (t_ret = __dbc_close(opd)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} diff --git a/db/hash/hash_stub.c b/db/hash/hash_stub.c deleted file mode 100644 index 7a1f94e0a..000000000 --- a/db/hash/hash_stub.c +++ /dev/null @@ -1,450 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - * - * $Id: hash_stub.c,v 12.12 2007/05/17 15:15:38 bostic Exp $ - */ - -#ifndef HAVE_HASH -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" - -/* - * If the library wasn't compiled with the Hash access method, various - * routines aren't available. Stub them here, returning an appropriate - * error. - */ - -/* - * __db_nohasham -- - * Error when a Berkeley DB build doesn't include the access method. - * - * PUBLIC: int __db_no_hash_am __P((DB_ENV *)); - */ -int -__db_no_hash_am(dbenv) - DB_ENV *dbenv; -{ - __db_errx(dbenv, - "library build did not include support for the Hash access method"); - return (DB_OPNOTSUP); -} - -int -__ham_30_hashmeta(dbp, real_name, obuf) - DB *dbp; - char *real_name; - u_int8_t *obuf; -{ - COMPQUIET(real_name, NULL); - COMPQUIET(obuf, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_30_sizefix(dbp, fhp, realname, metabuf) - DB *dbp; - DB_FH *fhp; - char *realname; - u_int8_t *metabuf; -{ - COMPQUIET(fhp, NULL); - COMPQUIET(realname, NULL); - COMPQUIET(metabuf, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_31_hash(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - COMPQUIET(real_name, NULL); - COMPQUIET(flags, 0); - COMPQUIET(fhp, NULL); - COMPQUIET(h, NULL); - COMPQUIET(dirtyp, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_31_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - COMPQUIET(real_name, NULL); - COMPQUIET(flags, 0); - COMPQUIET(fhp, NULL); - COMPQUIET(h, NULL); - COMPQUIET(dirtyp, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_46_hash(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - COMPQUIET(real_name, NULL); - COMPQUIET(flags, 0); - COMPQUIET(fhp, NULL); - COMPQUIET(h, NULL); - COMPQUIET(dirtyp, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_46_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - COMPQUIET(real_name, NULL); - COMPQUIET(flags, 0); - COMPQUIET(fhp, NULL); - COMPQUIET(h, NULL); - COMPQUIET(dirtyp, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__hamc_count(dbc, recnop) - DBC *dbc; - db_recno_t *recnop; -{ - COMPQUIET(recnop, NULL); - return (__db_no_hash_am(dbc->dbp->dbenv)); -} - -int -__hamc_dup(orig_dbc, new_dbc) - DBC *orig_dbc, *new_dbc; -{ - COMPQUIET(new_dbc, NULL); - return (__db_no_hash_am(orig_dbc->dbp->dbenv)); -} - -int -__hamc_init(dbc) - DBC *dbc; -{ - return (__db_no_hash_am(dbc->dbp->dbenv)); -} - -int -__ham_db_close(dbp) - DB *dbp; -{ - COMPQUIET(dbp, NULL); - return (0); -} - -int -__ham_db_create(dbp) - DB *dbp; -{ - COMPQUIET(dbp, NULL); - return (0); -} - -int -__ham_init_print(dbenv, dtabp, dtabsizep) - DB_ENV *dbenv; - int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - size_t *dtabsizep; -{ - COMPQUIET(dbenv, NULL); - COMPQUIET(dtabp, NULL); - COMPQUIET(dtabsizep, NULL); - return (0); -} - -int -__ham_init_recover(dbenv, dtabp, dtabsizep) - DB_ENV *dbenv; - int (***dtabp)__P((DB_ENV *, DBT *, DB_LSN *, db_recops, void *)); - size_t *dtabsizep; -{ - COMPQUIET(dbenv, NULL); - COMPQUIET(dtabp, NULL); - COMPQUIET(dtabsizep, NULL); - return (0); -} - -int -__ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) - DB *dbp; - VRFY_DBINFO *vdp; - HMETA *hmeta; - u_int32_t flags; - DB *pgset; -{ - COMPQUIET(vdp, NULL); - COMPQUIET(hmeta, NULL); - COMPQUIET(flags, 0); - COMPQUIET(pgset, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_metachk(dbp, name, hashm) - DB *dbp; - const char *name; - HMETA *hashm; -{ - COMPQUIET(name, NULL); - COMPQUIET(hashm, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_metagroup_42_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - COMPQUIET(dbtp, NULL); - COMPQUIET(lsnp, NULL); - COMPQUIET(op, 0); - COMPQUIET(info, NULL); - return (__db_no_hash_am(dbenv)); -} - -int -__ham_groupalloc_42_recover(dbenv, dbtp, lsnp, op, info) - DB_ENV *dbenv; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - COMPQUIET(dbtp, NULL); - COMPQUIET(lsnp, NULL); - COMPQUIET(op, 0); - COMPQUIET(info, NULL); - return (__db_no_hash_am(dbenv)); -} - -int -__ham_new_file(dbp, txn, fhp, name) - DB *dbp; - DB_TXN *txn; - DB_FH *fhp; - const char *name; -{ - COMPQUIET(txn, NULL); - COMPQUIET(fhp, NULL); - COMPQUIET(name, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_new_subdb(mdbp, dbp, txn) - DB *mdbp, *dbp; - DB_TXN *txn; -{ - COMPQUIET(dbp, NULL); - COMPQUIET(txn, NULL); - return (__db_no_hash_am(mdbp->dbenv)); -} - -int -__ham_open(dbp, txn, name, base_pgno, flags) - DB *dbp; - DB_TXN *txn; - const char *name; - db_pgno_t base_pgno; - u_int32_t flags; -{ - COMPQUIET(txn, NULL); - COMPQUIET(name, NULL); - COMPQUIET(base_pgno, 0); - COMPQUIET(flags, 0); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_pgin(dbenv, dummydbp, pg, pp, cookie) - DB_ENV *dbenv; - DB *dummydbp; - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - COMPQUIET(dummydbp, NULL); - COMPQUIET(pg, 0); - COMPQUIET(pp, NULL); - COMPQUIET(cookie, NULL); - return (__db_no_hash_am(dbenv)); -} - -int -__ham_pgout(dbenv, dummydbp, pg, pp, cookie) - DB_ENV *dbenv; - DB *dummydbp; - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - COMPQUIET(dummydbp, NULL); - COMPQUIET(pg, 0); - COMPQUIET(pp, NULL); - COMPQUIET(cookie, NULL); - return (__db_no_hash_am(dbenv)); -} - -void -__ham_print_cursor(dbc) - DBC *dbc; -{ - (void)__db_no_hash_am(dbc->dbp->dbenv); -} - -int -__ham_quick_delete(dbc) - DBC *dbc; -{ - return (__db_no_hash_am(dbc->dbp->dbenv)); -} - -int -__ham_reclaim(dbp, txn) - DB *dbp; - DB_TXN *txn; -{ - COMPQUIET(txn, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - PAGE *h; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - COMPQUIET(vdp, NULL); - COMPQUIET(pgno, 0); - COMPQUIET(h, NULL); - COMPQUIET(handle, NULL); - COMPQUIET(callback, NULL); - COMPQUIET(flags, 0); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_stat(dbc, spp, flags) - DBC *dbc; - void *spp; - u_int32_t flags; -{ - COMPQUIET(spp, NULL); - COMPQUIET(flags, 0); - return (__db_no_hash_am(dbc->dbp->dbenv)); -} - -int -__ham_stat_print(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - COMPQUIET(flags, 0); - return (__db_no_hash_am(dbc->dbp->dbenv)); -} - -int -__ham_truncate(dbc, countp) - DBC *dbc; - u_int32_t *countp; -{ - COMPQUIET(dbc, NULL); - COMPQUIET(countp, NULL); - return (__db_no_hash_am(dbc->dbp->dbenv)); -} - -int -__ham_vrfy(dbp, vdp, h, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - PAGE *h; - db_pgno_t pgno; - u_int32_t flags; -{ - COMPQUIET(vdp, NULL); - COMPQUIET(h, NULL); - COMPQUIET(pgno, 0); - COMPQUIET(flags, 0); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc) - DB *dbp; - u_int32_t nentries; - HMETA *m; - u_int32_t thisbucket; - db_pgno_t pgno; - u_int32_t flags; - u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); -{ - COMPQUIET(nentries, 0); - COMPQUIET(m, NULL); - COMPQUIET(thisbucket, 0); - COMPQUIET(pgno, 0); - COMPQUIET(flags, 0); - COMPQUIET(hfunc, NULL); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_vrfy_meta(dbp, vdp, m, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - HMETA *m; - db_pgno_t pgno; - u_int32_t flags; -{ - COMPQUIET(vdp, NULL); - COMPQUIET(m, NULL); - COMPQUIET(pgno, 0); - COMPQUIET(flags, 0); - return (__db_no_hash_am(dbp->dbenv)); -} - -int -__ham_vrfy_structure(dbp, vdp, meta_pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t meta_pgno; - u_int32_t flags; -{ - COMPQUIET(vdp, NULL); - COMPQUIET(meta_pgno, 0); - COMPQUIET(flags, 0); - return (__db_no_hash_am(dbp->dbenv)); -} -#endif /* !HAVE_HASH */ diff --git a/db/hash/hash_upgrade.c b/db/hash/hash_upgrade.c deleted file mode 100644 index c3b69d200..000000000 --- a/db/hash/hash_upgrade.c +++ /dev/null @@ -1,314 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996,2007 Oracle. All rights reserved. - * - * $Id: hash_upgrade.c,v 12.11 2007/05/17 17:18:00 bostic Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" -#include "dbinc/db_upgrade.h" - -/* - * __ham_30_hashmeta -- - * Upgrade the database from version 4/5 to version 6. - * - * PUBLIC: int __ham_30_hashmeta __P((DB *, char *, u_int8_t *)); - */ -int -__ham_30_hashmeta(dbp, real_name, obuf) - DB *dbp; - char *real_name; - u_int8_t *obuf; -{ - DB_ENV *dbenv; - HASHHDR *oldmeta; - HMETA30 newmeta; - u_int32_t *o_spares, *n_spares; - u_int32_t fillf, i, maxb, max_entry, nelem; - int ret; - - dbenv = dbp->dbenv; - memset(&newmeta, 0, sizeof(newmeta)); - - oldmeta = (HASHHDR *)obuf; - - /* - * The first 32 bytes are similar. The only change is the version - * and that we removed the ovfl_point and have the page type now. - */ - - newmeta.dbmeta.lsn = oldmeta->lsn; - newmeta.dbmeta.pgno = oldmeta->pgno; - newmeta.dbmeta.magic = oldmeta->magic; - newmeta.dbmeta.version = 6; - newmeta.dbmeta.pagesize = oldmeta->pagesize; - newmeta.dbmeta.type = P_HASHMETA; - - /* Move flags */ - newmeta.dbmeta.flags = oldmeta->flags; - - /* Copy the free list, which has changed its name but works the same. */ - newmeta.dbmeta.free = oldmeta->last_freed; - - /* Copy: max_bucket, high_mask, low-mask, ffactor, nelem, h_charkey */ - newmeta.max_bucket = oldmeta->max_bucket; - newmeta.high_mask = oldmeta->high_mask; - newmeta.low_mask = oldmeta->low_mask; - newmeta.ffactor = oldmeta->ffactor; - newmeta.nelem = oldmeta->nelem; - newmeta.h_charkey = oldmeta->h_charkey; - - /* - * There was a bug in 2.X versions where the nelem could go negative. - * In general, this is considered "bad." If it does go negative - * (that is, very large and positive), we'll die trying to dump and - * load this database. So, let's see if we can fix it here. - */ - nelem = newmeta.nelem; - fillf = newmeta.ffactor; - maxb = newmeta.max_bucket; - - if ((fillf != 0 && fillf * maxb < 2 * nelem) || - (fillf == 0 && nelem > 0x8000000)) - newmeta.nelem = 0; - - /* - * We now have to convert the spares array. The old spares array - * contained the total number of extra pages allocated prior to - * the bucket that begins the next doubling. The new spares array - * contains the page number of the first bucket in the next doubling - * MINUS the bucket number of that bucket. - */ - o_spares = oldmeta->spares; - n_spares = newmeta.spares; - max_entry = __db_log2(maxb + 1); /* highest spares entry in use */ - n_spares[0] = 1; - for (i = 1; i < NCACHED && i <= max_entry; i++) - n_spares[i] = 1 + o_spares[i - 1]; - - /* Replace the unique ID. */ - if ((ret = __os_fileid(dbenv, real_name, 1, newmeta.dbmeta.uid)) != 0) - return (ret); - - /* Overwrite the original. */ - memcpy(oldmeta, &newmeta, sizeof(newmeta)); - - return (0); -} - -/* - * __ham_30_sizefix -- - * Make sure that all hash pages belonging to the current - * hash doubling are within the bounds of the file. - * - * PUBLIC: int __ham_30_sizefix __P((DB *, DB_FH *, char *, u_int8_t *)); - */ -int -__ham_30_sizefix(dbp, fhp, realname, metabuf) - DB *dbp; - DB_FH *fhp; - char *realname; - u_int8_t *metabuf; -{ - u_int8_t buf[DB_MAX_PGSIZE]; - DB_ENV *dbenv; - HMETA30 *meta; - db_pgno_t last_actual, last_desired; - int ret; - size_t nw; - u_int32_t pagesize; - - dbenv = dbp->dbenv; - memset(buf, 0, DB_MAX_PGSIZE); - - meta = (HMETA30 *)metabuf; - pagesize = meta->dbmeta.pagesize; - - /* - * Get the last page number. To do this, we'll need dbp->pgsize - * to be set right, so slam it into place. - */ - dbp->pgsize = pagesize; - if ((ret = __db_lastpgno(dbp, realname, fhp, &last_actual)) != 0) - return (ret); - - /* - * The last bucket in the doubling is equal to high_mask; calculate - * the page number that implies. - */ - last_desired = BS_TO_PAGE(meta->high_mask, meta->spares); - - /* - * If last_desired > last_actual, we need to grow the file. Write - * a zeroed page where last_desired would go. - */ - if (last_desired > last_actual) { - if ((ret = __os_seek( - dbenv, fhp, last_desired, pagesize, 0)) != 0) - return (ret); - if ((ret = __os_write(dbenv, fhp, buf, pagesize, &nw)) != 0) - return (ret); - } - - return (0); -} - -/* - * __ham_31_hashmeta -- - * Upgrade the database from version 6 to version 7. - * - * PUBLIC: int __ham_31_hashmeta - * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); - */ -int -__ham_31_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - HMETA31 *newmeta; - HMETA30 *oldmeta; - - COMPQUIET(dbp, NULL); - COMPQUIET(real_name, NULL); - COMPQUIET(fhp, NULL); - - newmeta = (HMETA31 *)h; - oldmeta = (HMETA30 *)h; - - /* - * Copy the fields down the page. - * The fields may overlap so start at the bottom and use memmove(). - */ - memmove(newmeta->spares, oldmeta->spares, sizeof(oldmeta->spares)); - newmeta->h_charkey = oldmeta->h_charkey; - newmeta->nelem = oldmeta->nelem; - newmeta->ffactor = oldmeta->ffactor; - newmeta->low_mask = oldmeta->low_mask; - newmeta->high_mask = oldmeta->high_mask; - newmeta->max_bucket = oldmeta->max_bucket; - memmove(newmeta->dbmeta.uid, - oldmeta->dbmeta.uid, sizeof(oldmeta->dbmeta.uid)); - newmeta->dbmeta.flags = oldmeta->dbmeta.flags; - newmeta->dbmeta.record_count = 0; - newmeta->dbmeta.key_count = 0; - ZERO_LSN(newmeta->dbmeta.unused3); - - /* Update the version. */ - newmeta->dbmeta.version = 7; - - /* Upgrade the flags. */ - if (LF_ISSET(DB_DUPSORT)) - F_SET(&newmeta->dbmeta, DB_HASH_DUPSORT); - - *dirtyp = 1; - return (0); -} - -/* - * __ham_31_hash -- - * Upgrade the database hash leaf pages. - * - * PUBLIC: int __ham_31_hash - * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); - */ -int -__ham_31_hash(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - HKEYDATA *hk; - db_pgno_t pgno, tpgno; - db_indx_t indx; - int ret; - - COMPQUIET(flags, 0); - - ret = 0; - for (indx = 0; indx < NUM_ENT(h); indx += 2) { - hk = (HKEYDATA *)H_PAIRDATA(dbp, h, indx); - if (HPAGE_PTYPE(hk) == H_OFFDUP) { - memcpy(&pgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t)); - tpgno = pgno; - if ((ret = __db_31_offdup(dbp, real_name, fhp, - LF_ISSET(DB_DUPSORT) ? 1 : 0, &tpgno)) != 0) - break; - if (pgno != tpgno) { - *dirtyp = 1; - memcpy(HOFFDUP_PGNO(hk), - &tpgno, sizeof(db_pgno_t)); - } - } - } - - return (ret); -} - -/* - * __ham_46_hashmeta -- - * Upgrade the database from version 8 to version 9. - * - * PUBLIC: int __ham_46_hashmeta - * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); - */ -int -__ham_46_hashmeta(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - HMETA33 *newmeta; - - COMPQUIET(dbp, NULL); - COMPQUIET(real_name, NULL); - COMPQUIET(flags, 0); - COMPQUIET(fhp, NULL); - - newmeta = (HMETA33 *)h; - /* Update the version. */ - newmeta->dbmeta.version = 9; - *dirtyp = 1; - - return (0); -} - -/* - * __ham_46_hash -- - * Upgrade the database hash leaf pages. - * From version 8 databases to version 9. - * Involves sorting leaf pages, no format change. - * - * PUBLIC: int __ham_46_hash - * PUBLIC: __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); - */ -int -__ham_46_hash(dbp, real_name, flags, fhp, h, dirtyp) - DB *dbp; - char *real_name; - u_int32_t flags; - DB_FH *fhp; - PAGE *h; - int *dirtyp; -{ - COMPQUIET(real_name, NULL); - COMPQUIET(flags, 0); - COMPQUIET(fhp, NULL); - - *dirtyp = 1; - return (__ham_sort_page(dbp, NULL, NULL, h)); -} diff --git a/db/hash/hash_verify.c b/db/hash/hash_verify.c deleted file mode 100644 index 1531c758d..000000000 --- a/db/hash/hash_verify.c +++ /dev/null @@ -1,1082 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1999,2007 Oracle. All rights reserved. - * - * $Id: hash_verify.c,v 12.26 2007/07/02 16:58:02 alexg Exp $ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_verify.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/mp.h" - -static int __ham_dups_unsorted __P((DB *, u_int8_t *, u_int32_t)); -static int __ham_vrfy_bucket __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, - u_int32_t)); -static int __ham_vrfy_item __P((DB *, - VRFY_DBINFO *, db_pgno_t, PAGE *, u_int32_t, u_int32_t)); - -/* - * __ham_vrfy_meta -- - * Verify the hash-specific part of a metadata page. - * - * Note that unlike btree, we don't save things off, because we - * will need most everything again to verify each page and the - * amount of state here is significant. - * - * PUBLIC: int __ham_vrfy_meta __P((DB *, VRFY_DBINFO *, HMETA *, - * PUBLIC: db_pgno_t, u_int32_t)); - */ -int -__ham_vrfy_meta(dbp, vdp, m, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - HMETA *m; - db_pgno_t pgno; - u_int32_t flags; -{ - DB_ENV *dbenv; - HASH *hashp; - VRFY_PAGEINFO *pip; - int i, ret, t_ret, isbad; - u_int32_t pwr, mbucket; - u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); - - dbenv = dbp->dbenv; - isbad = 0; - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - hashp = dbp->h_internal; - - if (hashp != NULL && hashp->h_hash != NULL) - hfunc = hashp->h_hash; - else - hfunc = __ham_func5; - - /* - * If we haven't already checked the common fields in pagezero, - * check them. - */ - if (!F_ISSET(pip, VRFY_INCOMPLETE) && - (ret = __db_vrfy_meta(dbp, vdp, &m->dbmeta, pgno, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - /* h_charkey */ - if (!LF_ISSET(DB_NOORDERCHK)) - if (m->h_charkey != hfunc(dbp, CHARKEY, sizeof(CHARKEY))) { - EPRINT((dbenv, -"Page %lu: database has custom hash function; reverify with DB_NOORDERCHK set", - (u_long)pgno)); - /* - * Return immediately; this is probably a sign of user - * error rather than database corruption, so we want to - * avoid extraneous errors. - */ - isbad = 1; - goto err; - } - - /* max_bucket must be less than the last pgno. */ - if (m->max_bucket > vdp->last_pgno) { - EPRINT((dbenv, - "Page %lu: Impossible max_bucket %lu on meta page", - (u_long)pgno, (u_long)m->max_bucket)); - /* - * Most other fields depend somehow on max_bucket, so - * we just return--there will be lots of extraneous - * errors. - */ - isbad = 1; - goto err; - } - - /* - * max_bucket, high_mask and low_mask: high_mask must be one - * less than the next power of two above max_bucket, and - * low_mask must be one less than the power of two below it. - */ - pwr = (m->max_bucket == 0) ? 1 : 1 << __db_log2(m->max_bucket + 1); - if (m->high_mask != pwr - 1) { - EPRINT((dbenv, - "Page %lu: incorrect high_mask %lu, should be %lu", - (u_long)pgno, (u_long)m->high_mask, (u_long)pwr - 1)); - isbad = 1; - } - pwr >>= 1; - if (m->low_mask != pwr - 1) { - EPRINT((dbenv, - "Page %lu: incorrect low_mask %lu, should be %lu", - (u_long)pgno, (u_long)m->low_mask, (u_long)pwr - 1)); - isbad = 1; - } - - /* ffactor: no check possible. */ - pip->h_ffactor = m->ffactor; - - /* - * nelem: just make sure it's not astronomical for now. This is the - * same check that hash_upgrade does, since there was a bug in 2.X - * which could make nelem go "negative". - */ - if (m->nelem > 0x80000000) { - EPRINT((dbenv, - "Page %lu: suspiciously high nelem of %lu", - (u_long)pgno, (u_long)m->nelem)); - isbad = 1; - pip->h_nelem = 0; - } else - pip->h_nelem = m->nelem; - - /* flags */ - if (F_ISSET(&m->dbmeta, DB_HASH_DUP)) - F_SET(pip, VRFY_HAS_DUPS); - if (F_ISSET(&m->dbmeta, DB_HASH_DUPSORT)) - F_SET(pip, VRFY_HAS_DUPSORT); - /* XXX: Why is the DB_HASH_SUBDB flag necessary? */ - - /* spares array */ - for (i = 0; m->spares[i] != 0 && i < NCACHED; i++) { - /* - * We set mbucket to the maximum bucket that would use a given - * spares entry; we want to ensure that it's always less - * than last_pgno. - */ - mbucket = (1 << i) - 1; - if (BS_TO_PAGE(mbucket, m->spares) > vdp->last_pgno) { - EPRINT((dbenv, - "Page %lu: spares array entry %d is invalid", - (u_long)pgno, i)); - isbad = 1; - } - } - -err: if ((t_ret = __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - if (LF_ISSET(DB_SALVAGE) && - (t_ret = __db_salvage_markdone(vdp, pgno)) != 0 && ret == 0) - ret = t_ret; - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __ham_vrfy -- - * Verify hash page. - * - * PUBLIC: int __ham_vrfy __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, - * PUBLIC: u_int32_t)); - */ -int -__ham_vrfy(dbp, vdp, h, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - PAGE *h; - db_pgno_t pgno; - u_int32_t flags; -{ - DB_ENV *dbenv; - VRFY_PAGEINFO *pip; - u_int32_t ent, himark, inpend; - db_indx_t *inp; - int isbad, ret, t_ret; - - dbenv = dbp->dbenv; - isbad = 0; - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - if (TYPE(h) != P_HASH && TYPE(h) != P_HASH_UNSORTED) { - ret = __db_unknown_path(dbenv, "__ham_vrfy"); - goto err; - } - - /* Verify and save off fields common to all PAGEs. */ - if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - /* - * Verify inp[]. Each offset from 0 to NUM_ENT(h) must be lower - * than the previous one, higher than the current end of the inp array, - * and lower than the page size. - * - * In any case, we return immediately if things are bad, as it would - * be unsafe to proceed. - */ - inp = P_INP(dbp, h); - for (ent = 0, himark = dbp->pgsize, - inpend = (u_int32_t)((u_int8_t *)inp - (u_int8_t *)h); - ent < NUM_ENT(h); ent++) - if (inp[ent] >= himark) { - EPRINT((dbenv, - "Page %lu: item %lu is out of order or nonsensical", - (u_long)pgno, (u_long)ent)); - isbad = 1; - goto err; - } else if (inpend >= himark) { - EPRINT((dbenv, - "Page %lu: entries array collided with data", - (u_long)pgno)); - isbad = 1; - goto err; - - } else { - himark = inp[ent]; - inpend += sizeof(db_indx_t); - if ((ret = __ham_vrfy_item( - dbp, vdp, pgno, h, ent, flags)) != 0) - goto err; - } - - if (!LF_ISSET(DB_NOORDERCHK) && TYPE(h) == P_HASH && - (ret = __ham_verify_sorted_page(dbp, NULL, h)) != 0) - isbad = 1; - -err: if ((t_ret = - __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - return (ret == 0 && isbad == 1 ? DB_VERIFY_BAD : ret); -} - -/* - * __ham_vrfy_item -- - * Given a hash page and an offset, sanity-check the item itself, - * and save off any overflow items or off-page dup children as necessary. - */ -static int -__ham_vrfy_item(dbp, vdp, pgno, h, i, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - PAGE *h; - u_int32_t i, flags; -{ - HOFFPAGE hop; - HOFFDUP hod; - VRFY_CHILDINFO child; - VRFY_PAGEINFO *pip; - db_indx_t offset, len, dlen, elen; - int ret, t_ret; - u_int8_t *databuf; - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - switch (HPAGE_TYPE(dbp, h, i)) { - case H_KEYDATA: - /* Nothing to do here--everything but the type field is data */ - break; - case H_DUPLICATE: - /* Are we a datum or a key? Better be the former. */ - if (i % 2 == 0) { - EPRINT((dbp->dbenv, - "Page %lu: hash key stored as duplicate item %lu", - (u_long)pip->pgno, (u_long)i)); - } - /* - * Dups are encoded as a series within a single HKEYDATA, - * in which each dup is surrounded by a copy of its length - * on either side (so that the series can be walked in either - * direction. We loop through this series and make sure - * each dup is reasonable. - * - * Note that at this point, we've verified item i-1, so - * it's safe to use LEN_HKEYDATA (which looks at inp[i-1]). - */ - len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); - databuf = HKEYDATA_DATA(P_ENTRY(dbp, h, i)); - for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { - memcpy(&dlen, databuf + offset, sizeof(db_indx_t)); - - /* Make sure the length is plausible. */ - if (offset + DUP_SIZE(dlen) > len) { - EPRINT((dbp->dbenv, - "Page %lu: duplicate item %lu has bad length", - (u_long)pip->pgno, (u_long)i)); - ret = DB_VERIFY_BAD; - goto err; - } - - /* - * Make sure the second copy of the length is the - * same as the first. - */ - memcpy(&elen, - databuf + offset + dlen + sizeof(db_indx_t), - sizeof(db_indx_t)); - if (elen != dlen) { - EPRINT((dbp->dbenv, - "Page %lu: duplicate item %lu has two different lengths", - (u_long)pip->pgno, (u_long)i)); - ret = DB_VERIFY_BAD; - goto err; - } - } - F_SET(pip, VRFY_HAS_DUPS); - if (!LF_ISSET(DB_NOORDERCHK) && - __ham_dups_unsorted(dbp, databuf, len)) - F_SET(pip, VRFY_DUPS_UNSORTED); - break; - case H_OFFPAGE: - /* Offpage item. Make sure pgno is sane, save off. */ - memcpy(&hop, P_ENTRY(dbp, h, i), HOFFPAGE_SIZE); - if (!IS_VALID_PGNO(hop.pgno) || hop.pgno == pip->pgno || - hop.pgno == PGNO_INVALID) { - EPRINT((dbp->dbenv, - "Page %lu: offpage item %lu has bad pgno %lu", - (u_long)pip->pgno, (u_long)i, (u_long)hop.pgno)); - ret = DB_VERIFY_BAD; - goto err; - } - memset(&child, 0, sizeof(VRFY_CHILDINFO)); - child.pgno = hop.pgno; - child.type = V_OVERFLOW; - child.tlen = hop.tlen; /* This will get checked later. */ - if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) - goto err; - break; - case H_OFFDUP: - /* Offpage duplicate item. Same drill. */ - memcpy(&hod, P_ENTRY(dbp, h, i), HOFFDUP_SIZE); - if (!IS_VALID_PGNO(hod.pgno) || hod.pgno == pip->pgno || - hod.pgno == PGNO_INVALID) { - EPRINT((dbp->dbenv, - "Page %lu: offpage item %lu has bad page number", - (u_long)pip->pgno, (u_long)i)); - ret = DB_VERIFY_BAD; - goto err; - } - memset(&child, 0, sizeof(VRFY_CHILDINFO)); - child.pgno = hod.pgno; - child.type = V_DUPLICATE; - if ((ret = __db_vrfy_childput(vdp, pip->pgno, &child)) != 0) - goto err; - F_SET(pip, VRFY_HAS_DUPS); - break; - default: - EPRINT((dbp->dbenv, - "Page %lu: item %lu has bad type", - (u_long)pip->pgno, (u_long)i)); - ret = DB_VERIFY_BAD; - break; - } - -err: if ((t_ret = - __db_vrfy_putpageinfo(dbp->dbenv, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __ham_vrfy_structure -- - * Verify the structure of a hash database. - * - * PUBLIC: int __ham_vrfy_structure __P((DB *, VRFY_DBINFO *, db_pgno_t, - * PUBLIC: u_int32_t)); - */ -int -__ham_vrfy_structure(dbp, vdp, meta_pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t meta_pgno; - u_int32_t flags; -{ - DB *pgset; - DB_MPOOLFILE *mpf; - HMETA *m; - PAGE *h; - VRFY_PAGEINFO *pip; - int isbad, p, ret, t_ret; - db_pgno_t pgno; - u_int32_t bucket, spares_entry; - - mpf = dbp->mpf; - pgset = vdp->pgset; - h = NULL; - ret = isbad = 0; - - if ((ret = __db_vrfy_pgset_get(pgset, meta_pgno, &p)) != 0) - return (ret); - if (p != 0) { - EPRINT((dbp->dbenv, - "Page %lu: Hash meta page referenced twice", - (u_long)meta_pgno)); - return (DB_VERIFY_BAD); - } - if ((ret = __db_vrfy_pgset_inc(pgset, meta_pgno)) != 0) - return (ret); - - /* Get the meta page; we'll need it frequently. */ - if ((ret = __memp_fget(mpf, &meta_pgno, NULL, 0, &m)) != 0) - return (ret); - - /* Loop through bucket by bucket. */ - for (bucket = 0; bucket <= m->max_bucket; bucket++) - if ((ret = - __ham_vrfy_bucket(dbp, vdp, m, bucket, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - /* - * There may be unused hash pages corresponding to buckets - * that have been allocated but not yet used. These may be - * part of the current doubling above max_bucket, or they may - * correspond to buckets that were used in a transaction - * that then aborted. - * - * Loop through them, as far as the spares array defines them, - * and make sure they're all empty. - * - * Note that this should be safe, since we've already verified - * that the spares array is sane. - */ - for (bucket = m->max_bucket + 1; spares_entry = __db_log2(bucket + 1), - spares_entry < NCACHED && m->spares[spares_entry] != 0; bucket++) { - pgno = BS_TO_PAGE(bucket, m->spares); - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - goto err; - - /* It's okay if these pages are totally zeroed; unmark it. */ - F_CLR(pip, VRFY_IS_ALLZEROES); - - /* It's also OK if this page is simply invalid. */ - if (pip->type == P_INVALID) { - if ((ret = __db_vrfy_putpageinfo(dbp->dbenv, - vdp, pip)) != 0) - goto err; - continue; - } - - if (pip->type != P_HASH && pip->type != P_HASH_UNSORTED) { - EPRINT((dbp->dbenv, - "Page %lu: hash bucket %lu maps to non-hash page", - (u_long)pgno, (u_long)bucket)); - isbad = 1; - } else if (pip->entries != 0) { - EPRINT((dbp->dbenv, - "Page %lu: non-empty page in unused hash bucket %lu", - (u_long)pgno, (u_long)bucket)); - isbad = 1; - } else { - if ((ret = __db_vrfy_pgset_get(pgset, pgno, &p)) != 0) - goto err; - if (p != 0) { - EPRINT((dbp->dbenv, - "Page %lu: above max_bucket referenced", - (u_long)pgno)); - isbad = 1; - } else { - if ((ret = - __db_vrfy_pgset_inc(pgset, pgno)) != 0) - goto err; - if ((ret = __db_vrfy_putpageinfo(dbp->dbenv, - vdp, pip)) != 0) - goto err; - continue; - } - } - - /* If we got here, it's an error. */ - (void)__db_vrfy_putpageinfo(dbp->dbenv, vdp, pip); - goto err; - } - -err: if ((t_ret = __memp_fput(mpf, m, dbp->priority)) != 0) - return (t_ret); - if (h != NULL && (t_ret = __memp_fput(mpf, h, dbp->priority)) != 0) - return (t_ret); - return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD: ret); -} - -/* - * __ham_vrfy_bucket -- - * Verify a given bucket. - */ -static int -__ham_vrfy_bucket(dbp, vdp, m, bucket, flags) - DB *dbp; - VRFY_DBINFO *vdp; - HMETA *m; - u_int32_t bucket, flags; -{ - DB_ENV *dbenv; - HASH *hashp; - VRFY_CHILDINFO *child; - VRFY_PAGEINFO *mip, *pip; - int ret, t_ret, isbad, p; - db_pgno_t pgno, next_pgno; - DBC *cc; - u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); - - dbenv = dbp->dbenv; - isbad = 0; - pip = NULL; - cc = NULL; - - hashp = dbp->h_internal; - if (hashp != NULL && hashp->h_hash != NULL) - hfunc = hashp->h_hash; - else - hfunc = __ham_func5; - - if ((ret = __db_vrfy_getpageinfo(vdp, PGNO(m), &mip)) != 0) - return (ret); - - /* Calculate the first pgno for this bucket. */ - pgno = BS_TO_PAGE(bucket, m->spares); - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - goto err; - - /* Make sure we got a plausible page number. */ - if (pgno > vdp->last_pgno || - (pip->type != P_HASH && pip->type != P_HASH_UNSORTED)) { - EPRINT((dbenv, - "Page %lu: impossible first page in bucket %lu", - (u_long)pgno, (u_long)bucket)); - /* Unsafe to continue. */ - isbad = 1; - goto err; - } - - if (pip->prev_pgno != PGNO_INVALID) { - EPRINT((dbenv, - "Page %lu: first page in hash bucket %lu has a prev_pgno", - (u_long)pgno, (u_long)bucket)); - isbad = 1; - } - - /* - * Set flags for dups and sorted dups. - */ - flags |= F_ISSET(mip, VRFY_HAS_DUPS) ? ST_DUPOK : 0; - flags |= F_ISSET(mip, VRFY_HAS_DUPSORT) ? ST_DUPSORT : 0; - - /* Loop until we find a fatal bug, or until we run out of pages. */ - for (;;) { - /* Provide feedback on our progress to the application. */ - if (!LF_ISSET(DB_SALVAGE)) - __db_vrfy_struct_feedback(dbp, vdp); - - if ((ret = __db_vrfy_pgset_get(vdp->pgset, pgno, &p)) != 0) - goto err; - if (p != 0) { - EPRINT((dbenv, - "Page %lu: hash page referenced twice", - (u_long)pgno)); - isbad = 1; - /* Unsafe to continue. */ - goto err; - } else if ((ret = __db_vrfy_pgset_inc(vdp->pgset, pgno)) != 0) - goto err; - - /* - * Hash pages that nothing has ever hashed to may never - * have actually come into existence, and may appear to be - * entirely zeroed. This is acceptable, and since there's - * no real way for us to know whether this has actually - * occurred, we clear the "wholly zeroed" flag on every - * hash page. A wholly zeroed page, by nature, will appear - * to have no flags set and zero entries, so should - * otherwise verify correctly. - */ - F_CLR(pip, VRFY_IS_ALLZEROES); - - /* If we have dups, our meta page had better know about it. */ - if (F_ISSET(pip, VRFY_HAS_DUPS) && - !F_ISSET(mip, VRFY_HAS_DUPS)) { - EPRINT((dbenv, - "Page %lu: duplicates present in non-duplicate database", - (u_long)pgno)); - isbad = 1; - } - - /* - * If the database has sorted dups, this page had better - * not have unsorted ones. - */ - if (F_ISSET(mip, VRFY_HAS_DUPSORT) && - F_ISSET(pip, VRFY_DUPS_UNSORTED)) { - EPRINT((dbenv, - "Page %lu: unsorted dups in sorted-dup database", - (u_long)pgno)); - isbad = 1; - } - - /* Walk overflow chains and offpage dup trees. */ - if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) - goto err; - for (ret = __db_vrfy_ccset(cc, pip->pgno, &child); ret == 0; - ret = __db_vrfy_ccnext(cc, &child)) - if (child->type == V_OVERFLOW) { - if ((ret = __db_vrfy_ovfl_structure(dbp, vdp, - child->pgno, child->tlen, - flags | ST_OVFL_LEAF)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - } else if (child->type == V_DUPLICATE) { - if ((ret = __db_vrfy_duptype(dbp, - vdp, child->pgno, flags)) != 0) { - isbad = 1; - continue; - } - if ((ret = __bam_vrfy_subtree(dbp, vdp, - child->pgno, NULL, NULL, - flags | ST_RECNUM | ST_DUPSET | ST_TOPLEVEL, - NULL, NULL, NULL)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - } - if ((ret = __db_vrfy_ccclose(cc)) != 0) - goto err; - cc = NULL; - - /* If it's safe to check that things hash properly, do so. */ - if (isbad == 0 && !LF_ISSET(DB_NOORDERCHK) && - (ret = __ham_vrfy_hashing(dbp, pip->entries, - m, bucket, pgno, flags, hfunc)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - next_pgno = pip->next_pgno; - ret = __db_vrfy_putpageinfo(dbenv, vdp, pip); - - pip = NULL; - if (ret != 0) - goto err; - - if (next_pgno == PGNO_INVALID) - break; /* End of the bucket. */ - - /* We already checked this, but just in case... */ - if (!IS_VALID_PGNO(next_pgno)) { - EPRINT((dbenv, - "Page %lu: hash page has bad next_pgno", - (u_long)pgno)); - isbad = 1; - goto err; - } - - if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0) - goto err; - - if (pip->prev_pgno != pgno) { - EPRINT((dbenv, - "Page %lu: hash page has bad prev_pgno", - (u_long)next_pgno)); - isbad = 1; - } - pgno = next_pgno; - } - -err: if (cc != NULL && ((t_ret = __db_vrfy_ccclose(cc)) != 0) && ret == 0) - ret = t_ret; - if (mip != NULL && ((t_ret = - __db_vrfy_putpageinfo(dbenv, vdp, mip)) != 0) && ret == 0) - ret = t_ret; - if (pip != NULL && ((t_ret = - __db_vrfy_putpageinfo(dbenv, vdp, pip)) != 0) && ret == 0) - ret = t_ret; - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __ham_vrfy_hashing -- - * Verify that all items on a given hash page hash correctly. - * - * PUBLIC: int __ham_vrfy_hashing __P((DB *, - * PUBLIC: u_int32_t, HMETA *, u_int32_t, db_pgno_t, u_int32_t, - * PUBLIC: u_int32_t (*) __P((DB *, const void *, u_int32_t)))); - */ -int -__ham_vrfy_hashing(dbp, nentries, m, thisbucket, pgno, flags, hfunc) - DB *dbp; - u_int32_t nentries; - HMETA *m; - u_int32_t thisbucket; - db_pgno_t pgno; - u_int32_t flags; - u_int32_t (*hfunc) __P((DB *, const void *, u_int32_t)); -{ - DBT dbt; - DB_MPOOLFILE *mpf; - PAGE *h; - db_indx_t i; - int ret, t_ret, isbad; - u_int32_t hval, bucket; - - mpf = dbp->mpf; - ret = isbad = 0; - - memset(&dbt, 0, sizeof(DBT)); - F_SET(&dbt, DB_DBT_REALLOC); - - if ((ret = __memp_fget(mpf, &pgno, NULL, 0, &h)) != 0) - return (ret); - - for (i = 0; i < nentries; i += 2) { - /* - * We've already verified the page integrity and that of any - * overflow chains linked off it; it is therefore safe to use - * __db_ret. It's also not all that much slower, since we have - * to copy every hash item to deal with alignment anyway; we - * can tweak this a bit if this proves to be a bottleneck, - * but for now, take the easy route. - */ - if ((ret = __db_ret(dbp, NULL, h, i, &dbt, NULL, NULL)) != 0) - goto err; - hval = hfunc(dbp, dbt.data, dbt.size); - - bucket = hval & m->high_mask; - if (bucket > m->max_bucket) - bucket = bucket & m->low_mask; - - if (bucket != thisbucket) { - EPRINT((dbp->dbenv, - "Page %lu: item %lu hashes incorrectly", - (u_long)pgno, (u_long)i)); - isbad = 1; - } - } - -err: if (dbt.data != NULL) - __os_ufree(dbp->dbenv, dbt.data); - if ((t_ret = __memp_fput(mpf, h, dbp->priority)) != 0) - return (t_ret); - - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __ham_salvage -- - * Safely dump out anything that looks like a key on an alleged - * hash page. - * - * PUBLIC: int __ham_salvage __P((DB *, VRFY_DBINFO *, db_pgno_t, PAGE *, - * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); - */ -int -__ham_salvage(dbp, vdp, pgno, h, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - PAGE *h; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - DBT dbt, unkdbt; - db_pgno_t dpgno; - int ret, err_ret, t_ret; - u_int32_t himark, i; - u_int8_t *hk, *p; - void *buf; - db_indx_t dlen, len, tlen; - - memset(&dbt, 0, sizeof(DBT)); - dbt.flags = DB_DBT_REALLOC; - - DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1); - - err_ret = 0; - - /* - * Allocate a buffer for overflow items. Start at one page; - * __db_safe_goff will realloc as needed. - */ - if ((ret = __os_malloc(dbp->dbenv, dbp->pgsize, &buf)) != 0) - return (ret); - - himark = dbp->pgsize; - for (i = 0;; i++) { - /* If we're not aggressive, break when we hit NUM_ENT(h). */ - if (!LF_ISSET(DB_AGGRESSIVE) && i >= NUM_ENT(h)) - break; - - /* Verify the current item. */ - ret = __db_vrfy_inpitem(dbp, - h, pgno, i, 0, flags, &himark, NULL); - /* If this returned a fatality, it's time to break. */ - if (ret == DB_VERIFY_FATAL) - break; - - if (ret == 0) { - /* Set len to total entry length. */ - len = LEN_HITEM(dbp, h, dbp->pgsize, i); - hk = P_ENTRY(dbp, h, i); - if (len == 0 || len > dbp->pgsize || - (u_int32_t)(hk + len - (u_int8_t *)h) > - dbp->pgsize) { - /* Item is unsafely large; skip it. */ - err_ret = DB_VERIFY_BAD; - continue; - } - switch (HPAGE_PTYPE(hk)) { - default: - if (!LF_ISSET(DB_AGGRESSIVE)) - break; - err_ret = DB_VERIFY_BAD; - break; - case H_KEYDATA: - /* Update len to size of item. */ - len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); -keydata: memcpy(buf, HKEYDATA_DATA(hk), len); - dbt.size = len; - dbt.data = buf; - if ((ret = __db_vrfy_prdbt(&dbt, - 0, " ", handle, callback, 0, vdp)) != 0) - err_ret = ret; - break; - case H_OFFPAGE: - if (len < HOFFPAGE_SIZE) { - err_ret = DB_VERIFY_BAD; - continue; - } - memcpy(&dpgno, - HOFFPAGE_PGNO(hk), sizeof(dpgno)); - if ((ret = __db_safe_goff(dbp, vdp, - dpgno, &dbt, &buf, flags)) != 0) { - err_ret = ret; - (void)__db_vrfy_prdbt(&unkdbt, 0, " ", - handle, callback, 0, vdp); - break; - } - if ((ret = __db_vrfy_prdbt(&dbt, - 0, " ", handle, callback, 0, vdp)) != 0) - err_ret = ret; - break; - case H_OFFDUP: - if (len < HOFFDUP_SIZE) { - err_ret = DB_VERIFY_BAD; - continue; - } - memcpy(&dpgno, - HOFFDUP_PGNO(hk), sizeof(dpgno)); - /* UNKNOWN iff pgno is bad or we're a key. */ - if (!IS_VALID_PGNO(dpgno) || (i % 2 == 0)) { - if ((ret = - __db_vrfy_prdbt(&unkdbt, 0, " ", - handle, callback, 0, vdp)) != 0) - err_ret = ret; - } else if ((ret = __db_salvage_duptree(dbp, - vdp, dpgno, &dbt, handle, callback, - flags | SA_SKIPFIRSTKEY)) != 0) - err_ret = ret; - break; - case H_DUPLICATE: - len = LEN_HKEYDATA(dbp, h, dbp->pgsize, i); - /* - * We're a key; printing dups will seriously - * foul the output. If we're being aggressive, - * pretend this is a key and let the app. - * programmer sort out the mess. - */ - if (i % 2 == 0) { - err_ret = ret; - if (LF_ISSET(DB_AGGRESSIVE)) - goto keydata; - break; - } - - /* - * Check if too small to have any data. - * But first, we have to update the len to - * reflect the size of the data not the - * size of the on-page entry. - */ - if (len < - HKEYDATA_SIZE(2 * sizeof(db_indx_t))) { - err_ret = DB_VERIFY_BAD; - continue; - } - - /* Loop until we hit the total length. */ - for (tlen = 0; tlen + sizeof(db_indx_t) < len; - tlen += dlen) { - p = HKEYDATA_DATA(hk) + tlen; - tlen += sizeof(db_indx_t); - memcpy(&dlen, p, sizeof(db_indx_t)); - p += sizeof(db_indx_t); - /* - * If dlen is too long, print all the - * rest of the dup set in a chunk. - */ - if (dlen + tlen > len) - dlen = len - tlen; - memcpy(buf, p, dlen); - dbt.size = dlen; - dbt.data = buf; - if ((ret = __db_vrfy_prdbt(&dbt, 0, " ", - handle, callback, 0, vdp)) != 0) - err_ret = ret; - tlen += sizeof(db_indx_t); - } - break; - } - } - } - - __os_free(dbp->dbenv, buf); - if ((t_ret = __db_salvage_markdone(vdp, pgno)) != 0) - return (t_ret); - return ((ret == 0 && err_ret != 0) ? err_ret : ret); -} - -/* - * __ham_meta2pgset -- - * Return the set of hash pages corresponding to the given - * known-good meta page. - * - * PUBLIC: int __ham_meta2pgset __P((DB *, VRFY_DBINFO *, HMETA *, u_int32_t, - * PUBLIC: DB *)); - */ -int -__ham_meta2pgset(dbp, vdp, hmeta, flags, pgset) - DB *dbp; - VRFY_DBINFO *vdp; - HMETA *hmeta; - u_int32_t flags; - DB *pgset; -{ - DB_MPOOLFILE *mpf; - PAGE *h; - db_pgno_t pgno; - u_int32_t bucket, totpgs; - int ret, val; - - /* - * We don't really need flags, but leave them for consistency with - * __bam_meta2pgset. - */ - COMPQUIET(flags, 0); - - DB_ASSERT(dbp->dbenv, pgset != NULL); - - mpf = dbp->mpf; - totpgs = 0; - - /* - * Loop through all the buckets, pushing onto pgset the corresponding - * page(s) for each one. - */ - for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) { - pgno = BS_TO_PAGE(bucket, hmeta->spares); - - /* - * We know the initial pgno is safe because the spares array has - * been verified. - * - * Safely walk the list of pages in this bucket. - */ - for (;;) { - if ((ret = __memp_fget(mpf, &pgno, NULL, 0, &h)) != 0) - return (ret); - if (TYPE(h) == P_HASH || TYPE(h) == P_HASH_UNSORTED) { - - /* - * Make sure we don't go past the end of - * pgset. - */ - if (++totpgs > vdp->last_pgno) { - (void)__memp_fput(mpf, - h, dbp->priority); - return (DB_VERIFY_BAD); - } - if ((ret = - __db_vrfy_pgset_inc(pgset, pgno)) != 0) { - (void)__memp_fput(mpf, - h, dbp->priority); - return (ret); - } - - pgno = NEXT_PGNO(h); - } else - pgno = PGNO_INVALID; - - if ((ret = __memp_fput(mpf, h, dbp->priority)) != 0) - return (ret); - - /* If the new pgno is wonky, go onto the next bucket. */ - if (!IS_VALID_PGNO(pgno) || - pgno == PGNO_INVALID) - break; - - /* - * If we've touched this page before, we have a cycle; - * go on to the next bucket. - */ - if ((ret = __db_vrfy_pgset_get(pgset, pgno, &val)) != 0) - return (ret); - if (val != 0) - break; - } - } - return (0); -} - -/* - * __ham_dups_unsorted -- - * Takes a known-safe hash duplicate set and its total length. - * Returns 1 if there are out-of-order duplicates in this set, - * 0 if there are not. - */ -static int -__ham_dups_unsorted(dbp, buf, len) - DB *dbp; - u_int8_t *buf; - u_int32_t len; -{ - DBT a, b; - db_indx_t offset, dlen; - int (*func) __P((DB *, const DBT *, const DBT *)); - - memset(&a, 0, sizeof(DBT)); - memset(&b, 0, sizeof(DBT)); - - func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; - - /* - * Loop through the dup set until we hit the end or we find - * a pair of dups that's out of order. b is always the current - * dup, a the one before it. - */ - for (offset = 0; offset < len; offset += DUP_SIZE(dlen)) { - memcpy(&dlen, buf + offset, sizeof(db_indx_t)); - b.data = buf + offset + sizeof(db_indx_t); - b.size = dlen; - - if (a.data != NULL && func(dbp, &a, &b) > 0) - return (1); - - a.data = b.data; - a.size = b.size; - } - - return (0); -} |