diff options
author | Zhang Qiang <qiang.z.zhang@intel.com> | 2012-05-29 11:25:24 +0800 |
---|---|---|
committer | Zhang Qiang <qiang.z.zhang@intel.com> | 2012-05-29 11:25:24 +0800 |
commit | e776056ea09ba0b6d9505ced6913c9190a12d632 (patch) | |
tree | 092838f2a86042abc586aa5576e36ae6cb47e256 /db | |
parent | 2e082c838d2ca750f5daac6dcdabecc22dfd4e46 (diff) | |
download | db4-e776056ea09ba0b6d9505ced6913c9190a12d632.tar.gz db4-e776056ea09ba0b6d9505ced6913c9190a12d632.tar.bz2 db4-e776056ea09ba0b6d9505ced6913c9190a12d632.zip |
updated with Tizen:Base source codes
Diffstat (limited to 'db')
-rw-r--r-- | db/Makefile.inc | 5 | ||||
-rw-r--r-- | db/crdel.src | 72 | ||||
-rw-r--r-- | db/crdel_auto.c | 945 | ||||
-rw-r--r-- | db/crdel_autop.c | 227 | ||||
-rw-r--r-- | db/crdel_rec.c | 298 | ||||
-rw-r--r-- | db/db.c | 1544 | ||||
-rw-r--r-- | db/db.src | 328 | ||||
-rw-r--r-- | db/db_am.c | 1015 | ||||
-rw-r--r-- | db/db_auto.c | 3267 | ||||
-rw-r--r-- | db/db_autop.c | 802 | ||||
-rw-r--r-- | db/db_cam.c | 3460 | ||||
-rw-r--r-- | db/db_cds.c | 177 | ||||
-rw-r--r-- | db/db_conv.c | 733 | ||||
-rw-r--r-- | db/db_dispatch.c | 953 | ||||
-rw-r--r-- | db/db_dup.c | 203 | ||||
-rw-r--r-- | db/db_iface.c | 2817 | ||||
-rw-r--r-- | db/db_join.c | 940 | ||||
-rw-r--r-- | db/db_meta.c | 1299 | ||||
-rw-r--r-- | db/db_method.c | 1052 | ||||
-rw-r--r-- | db/db_open.c | 628 | ||||
-rw-r--r-- | db/db_overflow.c | 706 | ||||
-rw-r--r-- | db/db_ovfl_vrfy.c | 409 | ||||
-rw-r--r-- | db/db_pr.c | 1659 | ||||
-rw-r--r-- | db/db_rec.c | 1859 | ||||
-rw-r--r-- | db/db_reclaim.c | 246 | ||||
-rw-r--r-- | db/db_remove.c | 492 | ||||
-rw-r--r-- | db/db_rename.c | 372 | ||||
-rw-r--r-- | db/db_ret.c | 156 | ||||
-rw-r--r-- | db/db_setid.c | 213 | ||||
-rw-r--r-- | db/db_setlsn.c | 137 | ||||
-rw-r--r-- | db/db_sort_multiple.c | 287 | ||||
-rw-r--r-- | db/db_stati.c | 494 | ||||
-rw-r--r-- | db/db_truncate.c | 225 | ||||
-rw-r--r-- | db/db_upg.c | 510 | ||||
-rw-r--r-- | db/db_upg_opd.c | 343 | ||||
-rw-r--r-- | db/db_vrfy.c | 2894 | ||||
-rw-r--r-- | db/db_vrfy_stub.c | 117 | ||||
-rw-r--r-- | db/db_vrfyutil.c | 916 | ||||
-rw-r--r-- | db/partition.c | 2048 | ||||
-rw-r--r-- | db/tags | 205 |
40 files changed, 262 insertions, 34791 deletions
diff --git a/db/Makefile.inc b/db/Makefile.inc new file mode 100644 index 0000000..59478ba --- /dev/null +++ b/db/Makefile.inc @@ -0,0 +1,5 @@ +# @(#)Makefile.inc 8.1 (Berkeley) 6/4/93 + +.PATH: ${.CURDIR}/db/db + +SRCS+= db.c diff --git a/db/crdel.src b/db/crdel.src deleted file mode 100644 index cd0b02f..0000000 --- a/db/crdel.src +++ /dev/null @@ -1,72 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -DBPRIVATE -PREFIX __crdel - -INCLUDE #include "db_int.h" -INCLUDE #include "dbinc/crypto.h" -INCLUDE #include "dbinc/db_page.h" -INCLUDE #include "dbinc/db_dispatch.h" -INCLUDE #include "dbinc/db_am.h" -INCLUDE #include "dbinc/log.h" -INCLUDE #include "dbinc/txn.h" -INCLUDE - -/* - * Metasub: log the creation of a subdatabase meta data page. - * - * fileid: identifies the file being acted upon. - * pgno: page number on which to write this meta-data page - * page: the actual meta-data page - * lsn: lsn of the page. - */ -BEGIN metasub 42 142 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -PGDBT page DBT s -POINTER lsn DB_LSN * lu -END - -/* - * Inmem_create: Log the creation of an in-memory database. - * - * name: Name of the database - * fid: File id of the database - */ -BEGIN inmem_create 44 138 -ARG fileid int32_t ld -DBT name DBT s -DBT fid DBT s -ARG pgsize u_int32_t lu -END - -/* - * Inmem_rename: Log the renaming of an in-memory only database. - * - * oldname: database's starting name - * newname: database's ending name - * fid: fileid - */ -BEGIN inmem_rename 44 139 -DBT oldname DBT s -DBT newname DBT s -DBT fid DBT s -END - -/* - * Inmem_remove: Log the removal of an in-memory only database. - * - * name: database's ending name - * fid: fileid - */ -BEGIN inmem_remove 44 140 -DBT name DBT s -DBT fid DBT s -END - diff --git a/db/crdel_auto.c b/db/crdel_auto.c deleted file mode 100644 index 801a0a5..0000000 --- a/db/crdel_auto.c +++ /dev/null @@ -1,945 +0,0 @@ -/* Do not edit: automatically built by gen_rec.awk. */ - -#include "db_config.h" -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/db_page.h" -#include "dbinc/db_dispatch.h" -#include "dbinc/db_am.h" -#include "dbinc/log.h" -#include "dbinc/txn.h" - -/* - * PUBLIC: int __crdel_metasub_read __P((ENV *, DB **, void *, - * PUBLIC: void *, __crdel_metasub_args **)); - */ -int -__crdel_metasub_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __crdel_metasub_args **argpp; -{ - __crdel_metasub_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__crdel_metasub_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->page, 0, sizeof(argp->page)); - LOGCOPY_32(env,&argp->page.size, bp); - bp += sizeof(u_int32_t); - argp->page.data = bp; - bp += argp->page.size; - if (LOG_SWAPPED(env) && dbpp != NULL && *dbpp != NULL) { - int t_ret; - if ((t_ret = __db_pageswap(*dbpp, (PAGE *)argp->page.data, - (size_t)argp->page.size, NULL, 1)) != 0) - return (t_ret); - } - - LOGCOPY_TOLSN(env, &argp->lsn, bp); - bp += sizeof(DB_LSN); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __crdel_metasub_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, const DBT *, DB_LSN *)); - */ -int -__crdel_metasub_log(dbp, txnp, ret_lsnp, flags, pgno, page, lsn) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t pgno; - const DBT *page; - DB_LSN * lsn; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___crdel_metasub; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (page == NULL ? 0 : page->size) - + sizeof(*lsn); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (page == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &page->size); - bp += sizeof(page->size); - memcpy(bp, page->data, page->size); - if (LOG_SWAPPED(env)) - if ((ret = __db_pageswap(dbp, - (PAGE *)bp, (size_t)page->size, (DBT *)NULL, 0)) != 0) - return (ret); - bp += page->size; - } - - if (lsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(lsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, lsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, lsn); - } else - memset(bp, 0, sizeof(*lsn)); - bp += sizeof(*lsn); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__crdel_metasub_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __crdel_inmem_create_read __P((ENV *, void *, - * PUBLIC: __crdel_inmem_create_args **)); - */ -int -__crdel_inmem_create_read(env, recbuf, argpp) - ENV *env; - void *recbuf; - __crdel_inmem_create_args **argpp; -{ - __crdel_inmem_create_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__crdel_inmem_create_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->name, 0, sizeof(argp->name)); - LOGCOPY_32(env,&argp->name.size, bp); - bp += sizeof(u_int32_t); - argp->name.data = bp; - bp += argp->name.size; - - memset(&argp->fid, 0, sizeof(argp->fid)); - LOGCOPY_32(env,&argp->fid.size, bp); - bp += sizeof(u_int32_t); - argp->fid.data = bp; - bp += argp->fid.size; - - LOGCOPY_32(env, &argp->pgsize, bp); - bp += sizeof(argp->pgsize); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __crdel_inmem_create_log __P((ENV *, DB_TXN *, - * PUBLIC: DB_LSN *, u_int32_t, int32_t, const DBT *, const DBT *, - * PUBLIC: u_int32_t)); - */ -int -__crdel_inmem_create_log(env, txnp, ret_lsnp, flags, - fileid, name, fid, pgsize) - ENV *env; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - int32_t fileid; - const DBT *name; - const DBT *fid; - u_int32_t pgsize; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - rlsnp = ret_lsnp; - rectype = DB___crdel_inmem_create; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (name == NULL ? 0 : name->size) - + sizeof(u_int32_t) + (fid == NULL ? 0 : fid->size) - + sizeof(u_int32_t); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)fileid; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (name == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &name->size); - bp += sizeof(name->size); - memcpy(bp, name->data, name->size); - bp += name->size; - } - - if (fid == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &fid->size); - bp += sizeof(fid->size); - memcpy(bp, fid->data, fid->size); - bp += fid->size; - } - - LOGCOPY_32(env, bp, &pgsize); - bp += sizeof(pgsize); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__crdel_inmem_create_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __crdel_inmem_rename_read __P((ENV *, void *, - * PUBLIC: __crdel_inmem_rename_args **)); - */ -int -__crdel_inmem_rename_read(env, recbuf, argpp) - ENV *env; - void *recbuf; - __crdel_inmem_rename_args **argpp; -{ - __crdel_inmem_rename_args *argp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__crdel_inmem_rename_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - memset(&argp->oldname, 0, sizeof(argp->oldname)); - LOGCOPY_32(env,&argp->oldname.size, bp); - bp += sizeof(u_int32_t); - argp->oldname.data = bp; - bp += argp->oldname.size; - - memset(&argp->newname, 0, sizeof(argp->newname)); - LOGCOPY_32(env,&argp->newname.size, bp); - bp += sizeof(u_int32_t); - argp->newname.data = bp; - bp += argp->newname.size; - - memset(&argp->fid, 0, sizeof(argp->fid)); - LOGCOPY_32(env,&argp->fid.size, bp); - bp += sizeof(u_int32_t); - argp->fid.data = bp; - bp += argp->fid.size; - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __crdel_inmem_rename_log __P((ENV *, DB_TXN *, - * PUBLIC: DB_LSN *, u_int32_t, const DBT *, const DBT *, const DBT *)); - */ -int -__crdel_inmem_rename_log(env, txnp, ret_lsnp, flags, - oldname, newname, fid) - ENV *env; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - const DBT *oldname; - const DBT *newname; - const DBT *fid; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - u_int32_t zero, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - rlsnp = ret_lsnp; - rectype = DB___crdel_inmem_rename; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) + (oldname == NULL ? 0 : oldname->size) - + sizeof(u_int32_t) + (newname == NULL ? 0 : newname->size) - + sizeof(u_int32_t) + (fid == NULL ? 0 : fid->size); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - if (oldname == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &oldname->size); - bp += sizeof(oldname->size); - memcpy(bp, oldname->data, oldname->size); - bp += oldname->size; - } - - if (newname == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &newname->size); - bp += sizeof(newname->size); - memcpy(bp, newname->data, newname->size); - bp += newname->size; - } - - if (fid == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &fid->size); - bp += sizeof(fid->size); - memcpy(bp, fid->data, fid->size); - bp += fid->size; - } - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__crdel_inmem_rename_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __crdel_inmem_remove_read __P((ENV *, void *, - * PUBLIC: __crdel_inmem_remove_args **)); - */ -int -__crdel_inmem_remove_read(env, recbuf, argpp) - ENV *env; - void *recbuf; - __crdel_inmem_remove_args **argpp; -{ - __crdel_inmem_remove_args *argp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__crdel_inmem_remove_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - memset(&argp->name, 0, sizeof(argp->name)); - LOGCOPY_32(env,&argp->name.size, bp); - bp += sizeof(u_int32_t); - argp->name.data = bp; - bp += argp->name.size; - - memset(&argp->fid, 0, sizeof(argp->fid)); - LOGCOPY_32(env,&argp->fid.size, bp); - bp += sizeof(u_int32_t); - argp->fid.data = bp; - bp += argp->fid.size; - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __crdel_inmem_remove_log __P((ENV *, DB_TXN *, - * PUBLIC: DB_LSN *, u_int32_t, const DBT *, const DBT *)); - */ -int -__crdel_inmem_remove_log(env, txnp, ret_lsnp, flags, - name, fid) - ENV *env; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - const DBT *name; - const DBT *fid; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - u_int32_t zero, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - rlsnp = ret_lsnp; - rectype = DB___crdel_inmem_remove; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) + (name == NULL ? 0 : name->size) - + sizeof(u_int32_t) + (fid == NULL ? 0 : fid->size); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - if (name == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &name->size); - bp += sizeof(name->size); - memcpy(bp, name->data, name->size); - bp += name->size; - } - - if (fid == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &fid->size); - bp += sizeof(fid->size); - memcpy(bp, fid->data, fid->size); - bp += fid->size; - } - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__crdel_inmem_remove_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __crdel_init_recover __P((ENV *, DB_DISTAB *)); - */ -int -__crdel_init_recover(env, dtabp) - ENV *env; - DB_DISTAB *dtabp; -{ - int ret; - - if ((ret = __db_add_recovery_int(env, dtabp, - __crdel_metasub_recover, DB___crdel_metasub)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __crdel_inmem_create_recover, DB___crdel_inmem_create)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __crdel_inmem_rename_recover, DB___crdel_inmem_rename)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __crdel_inmem_remove_recover, DB___crdel_inmem_remove)) != 0) - return (ret); - return (0); -} diff --git a/db/crdel_autop.c b/db/crdel_autop.c deleted file mode 100644 index 6bf4bb6..0000000 --- a/db/crdel_autop.c +++ /dev/null @@ -1,227 +0,0 @@ -/* Do not edit: automatically built by gen_rec.awk. */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/db_page.h" -#include "dbinc/db_dispatch.h" -#include "dbinc/db_am.h" -#include "dbinc/log.h" -#include "dbinc/txn.h" - -/* - * PUBLIC: int __crdel_metasub_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__crdel_metasub_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __crdel_metasub_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __crdel_metasub_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__crdel_metasub%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tpage: "); - for (i = 0; i < argp->page.size; i++) { - ch = ((u_int8_t *)argp->page.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tlsn: [%lu][%lu]\n", - (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __crdel_inmem_create_print __P((ENV *, DBT *, - * PUBLIC: DB_LSN *, db_recops, void *)); - */ -int -__crdel_inmem_create_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __crdel_inmem_create_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __crdel_inmem_create_read(env, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__crdel_inmem_create%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tname: "); - for (i = 0; i < argp->name.size; i++) { - ch = ((u_int8_t *)argp->name.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tfid: "); - for (i = 0; i < argp->fid.size; i++) { - ch = ((u_int8_t *)argp->fid.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tpgsize: %lu\n", (u_long)argp->pgsize); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __crdel_inmem_rename_print __P((ENV *, DBT *, - * PUBLIC: DB_LSN *, db_recops, void *)); - */ -int -__crdel_inmem_rename_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __crdel_inmem_rename_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __crdel_inmem_rename_read(env, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__crdel_inmem_rename%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\toldname: "); - for (i = 0; i < argp->oldname.size; i++) { - ch = ((u_int8_t *)argp->oldname.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tnewname: "); - for (i = 0; i < argp->newname.size; i++) { - ch = ((u_int8_t *)argp->newname.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tfid: "); - for (i = 0; i < argp->fid.size; i++) { - ch = ((u_int8_t *)argp->fid.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __crdel_inmem_remove_print __P((ENV *, DBT *, - * PUBLIC: DB_LSN *, db_recops, void *)); - */ -int -__crdel_inmem_remove_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __crdel_inmem_remove_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __crdel_inmem_remove_read(env, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__crdel_inmem_remove%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tname: "); - for (i = 0; i < argp->name.size; i++) { - ch = ((u_int8_t *)argp->name.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tfid: "); - for (i = 0; i < argp->fid.size; i++) { - ch = ((u_int8_t *)argp->fid.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __crdel_init_print __P((ENV *, DB_DISTAB *)); - */ -int -__crdel_init_print(env, dtabp) - ENV *env; - DB_DISTAB *dtabp; -{ - int ret; - - if ((ret = __db_add_recovery_int(env, dtabp, - __crdel_metasub_print, DB___crdel_metasub)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __crdel_inmem_create_print, DB___crdel_inmem_create)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __crdel_inmem_rename_print, DB___crdel_inmem_rename)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __crdel_inmem_remove_print, DB___crdel_inmem_remove)) != 0) - return (ret); - return (0); -} diff --git a/db/crdel_rec.c b/db/crdel_rec.c deleted file mode 100644 index 285b965..0000000 --- a/db/crdel_rec.c +++ /dev/null @@ -1,298 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/fop.h" -#include "dbinc/hash.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/txn.h" - -/* - * __crdel_metasub_recover -- - * Recovery function for metasub. - * - * PUBLIC: int __crdel_metasub_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__crdel_metasub_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __crdel_metasub_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_p, ret, t_ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - pagep = NULL; - REC_PRINT(__crdel_metasub_print); - REC_INTRO(__crdel_metasub_read, ip, 0); - - /* - * If we are undoing this operation, but the DB that we got back - * was never really opened, then this open was an in-memory open - * that did not finish. We can let the file creation take care - * of any necessary undo/cleanup. - */ - if (DB_UNDO(op) && !F_ISSET(file_dbp, DB_AM_OPEN_CALLED)) - goto done; - - if ((ret = __memp_fget(mpf, &argp->pgno, - ip, NULL, 0, &pagep)) != 0) { - /* If this is an in-memory file, this might be OK. */ - if (F_ISSET(file_dbp, DB_AM_INMEM) && - (ret = __memp_fget(mpf, &argp->pgno, ip, NULL, - DB_MPOOL_CREATE | DB_MPOOL_DIRTY, &pagep)) == 0) { - LSN_NOT_LOGGED(LSN(pagep)); - } else { - *lsnp = argp->prev_lsn; - ret = 0; - goto out; - } - } - - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); - - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - memcpy(pagep, argp->page.data, argp->page.size); - LSN(pagep) = *lsnp; - - /* - * If this was an in-memory database and we are re-creating - * and this is the meta-data page, then we need to set up a - * bunch of fields in the dbo as well. - */ - if (F_ISSET(file_dbp, DB_AM_INMEM) && - argp->pgno == PGNO_BASE_MD && - (ret = __db_meta_setup(file_dbp->env, file_dbp, - file_dbp->dname, (DBMETA *)pagep, 0, DB_CHK_META)) != 0) - goto out; - } else if (DB_UNDO(op)) { - /* - * We want to undo this page creation. The page creation - * happened in two parts. First, we called __db_pg_alloc which - * was logged separately. Then we wrote the meta-data onto - * the page. So long as we restore the LSN, then the recovery - * for __db_pg_alloc will do everything else. - * - * Don't bother checking the lsn on the page. If we are - * rolling back the next thing is that this page will get - * freed. Opening the subdb will have reinitialized the - * page, but not the lsn. - */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - LSN(pagep) = argp->lsn; - } - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL && (t_ret = __memp_fput(mpf, - ip, pagep, file_dbp->priority)) != 0 && - ret == 0) - ret = t_ret; - - REC_CLOSE; -} - -/* - * __crdel_inmem_create_recover -- - * Recovery function for inmem_create. - * - * PUBLIC: int __crdel_inmem_create_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__crdel_inmem_create_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __crdel_inmem_create_args *argp; - DB *dbp; - int do_close, ret, t_ret; - - COMPQUIET(info, NULL); - - dbp = NULL; - do_close = 0; - REC_PRINT(__crdel_inmem_create_print); - REC_NOOP_INTRO(__crdel_inmem_create_read); - - /* First, see if the DB handle already exists. */ - if (argp->fileid == DB_LOGFILEID_INVALID) { - if (DB_REDO(op)) - ret = ENOENT; - else - ret = 0; - } else - ret = __dbreg_id_to_db(env, argp->txnp, &dbp, argp->fileid, 0); - - if (DB_REDO(op)) { - /* - * If the dbreg failed, that means that we're creating a - * tmp file. - */ - if (ret != 0) { - if ((ret = __db_create_internal(&dbp, env, 0)) != 0) - goto out; - - F_SET(dbp, DB_AM_RECOVER | DB_AM_INMEM); - memcpy(dbp->fileid, argp->fid.data, DB_FILE_ID_LEN); - if (((ret = __os_strdup(env, - argp->name.data, &dbp->dname)) != 0)) - goto out; - - /* - * This DBP is never going to be entered into the - * dbentry table, so if we leave it open here, - * then we're going to lose it. - */ - do_close = 1; - } - - /* Now, set the fileid. */ - memcpy(dbp->fileid, argp->fid.data, argp->fid.size); - if ((ret = __memp_set_fileid(dbp->mpf, dbp->fileid)) != 0) - goto out; - dbp->preserve_fid = 1; - MAKE_INMEM(dbp); - if ((ret = __env_setup(dbp, - NULL, NULL, argp->name.data, TXN_INVALID, 0)) != 0) - goto out; - ret = __env_mpool(dbp, argp->name.data, 0); - - if (ret == ENOENT) { - dbp->pgsize = argp->pgsize; - if ((ret = __env_mpool(dbp, - argp->name.data, DB_CREATE)) != 0) - goto out; - } else if (ret != 0) - goto out; - } - - if (DB_UNDO(op)) { - if (ret == 0) - ret = __memp_nameop(env, argp->fid.data, NULL, - (const char *)argp->name.data, NULL, 1); - - if (ret == ENOENT || ret == DB_DELETED) - ret = 0; - else - goto out; - } - - *lsnp = argp->prev_lsn; - -out: if (dbp != NULL) { - t_ret = 0; - - if (do_close || ret != 0) - t_ret = __db_close(dbp, NULL, DB_NOSYNC); - if (t_ret != 0 && ret == 0) - ret = t_ret; - } - REC_NOOP_CLOSE; -} - -/* - * __crdel_inmem_rename_recover -- - * Recovery function for inmem_rename. - * - * PUBLIC: int __crdel_inmem_rename_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__crdel_inmem_rename_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __crdel_inmem_rename_args *argp; - u_int8_t *fileid; - int ret; - - COMPQUIET(info, NULL); - - REC_PRINT(__crdel_inmem_rename_print); - REC_NOOP_INTRO(__crdel_inmem_rename_read); - fileid = argp->fid.data; - - /* Void out errors because the files may or may not still exist. */ - if (DB_REDO(op)) - (void)__memp_nameop(env, fileid, - (const char *)argp->newname.data, - (const char *)argp->oldname.data, - (const char *)argp->newname.data, 1); - - if (DB_UNDO(op)) - (void)__memp_nameop(env, fileid, - (const char *)argp->oldname.data, - (const char *)argp->newname.data, - (const char *)argp->oldname.data, 1); - - *lsnp = argp->prev_lsn; - ret = 0; - - REC_NOOP_CLOSE; -} - -/* - * __crdel_inmem_remove_recover -- - * Recovery function for inmem_remove. - * - * PUBLIC: int __crdel_inmem_remove_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__crdel_inmem_remove_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __crdel_inmem_remove_args *argp; - int ret; - - COMPQUIET(info, NULL); - - REC_PRINT(__crdel_inmem_remove_print); - REC_NOOP_INTRO(__crdel_inmem_remove_read); - - /* - * Since removes are delayed; there is no undo for a remove; only redo. - * The remove may fail, which is OK. - */ - if (DB_REDO(op)) { - (void)__memp_nameop(env, - argp->fid.data, NULL, argp->name.data, NULL, 1); - } - - *lsnp = argp->prev_lsn; - ret = 0; - - REC_NOOP_CLOSE; -} @@ -1,14 +1,5 @@ /*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 + * Copyright (c) 1991, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -19,7 +10,11 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * @@ -34,1506 +29,71 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_swap.h" -#include "dbinc/btree.h" -#include "dbinc/fop.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/partition.h" -#include "dbinc/qam.h" -#include "dbinc/txn.h" - -static int __db_disassociate __P((DB *)); -static int __db_disassociate_foreign __P ((DB *)); - -#ifdef CONFIG_TEST -static int __db_makecopy __P((ENV *, const char *, const char *)); -static int __qam_testdocopy __P((DB *, const char *)); -#endif - -/* - * DB.C -- - * This file contains the utility functions for the DBP layer. - */ - -/* - * __db_master_open -- - * Open up a handle on a master database. - * - * PUBLIC: int __db_master_open __P((DB *, DB_THREAD_INFO *, - * PUBLIC: DB_TXN *, const char *, u_int32_t, int, DB **)); - */ -int -__db_master_open(subdbp, ip, txn, name, flags, mode, dbpp) - DB *subdbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name; - u_int32_t flags; - int mode; - DB **dbpp; -{ - DB *dbp; - int ret; - - *dbpp = NULL; - - /* Open up a handle on the main database. */ - if ((ret = __db_create_internal(&dbp, subdbp->env, 0)) != 0) - return (ret); - - /* - * It's always a btree. - * Run in the transaction we've created. - * Set the pagesize in case we're creating a new database. - * Flag that we're creating a database with subdatabases. - */ - dbp->pgsize = subdbp->pgsize; - F_SET(dbp, DB_AM_SUBDB); - F_SET(dbp, F_ISSET(subdbp, - DB_AM_RECOVER | DB_AM_SWAP | - DB_AM_ENCRYPT | DB_AM_CHKSUM | DB_AM_NOT_DURABLE)); - - /* - * If there was a subdb specified, then we only want to apply - * DB_EXCL to the subdb, not the actual file. We only got here - * because there was a subdb specified. - */ - LF_CLR(DB_EXCL); - LF_SET(DB_RDWRMASTER); - if ((ret = __db_open(dbp, ip, - txn, name, NULL, DB_BTREE, flags, mode, PGNO_BASE_MD)) != 0) - goto err; - - /* - * The items in dbp are initialized from the master file's meta page. - * Other items such as checksum and encryption are checked when we - * read the meta-page, so we do not check those here. However, if - * the meta-page caused checksumming to be turned on and it wasn't - * already, set it here. - */ - if (F_ISSET(dbp, DB_AM_CHKSUM)) - F_SET(subdbp, DB_AM_CHKSUM); - - /* - * The user may have specified a page size for an existing file, - * which we want to ignore. - */ - subdbp->pgsize = dbp->pgsize; - *dbpp = dbp; - - if (0) { -err: if (!F_ISSET(dbp, DB_AM_DISCARD)) - (void)__db_close(dbp, txn, 0); - } - - return (ret); -} - -/* - * __db_master_update -- - * Add/Open/Remove a subdatabase from a master database. - * - * PUBLIC: int __db_master_update __P((DB *, DB *, DB_THREAD_INFO *, DB_TXN *, - * PUBLIC: const char *, DBTYPE, mu_action, const char *, u_int32_t)); - */ -int -__db_master_update(mdbp, sdbp, ip, txn, subdb, type, action, newname, flags) - DB *mdbp, *sdbp; - DB_TXN *txn; - DB_THREAD_INFO *ip; - const char *subdb; - DBTYPE type; - mu_action action; - const char *newname; - u_int32_t flags; -{ - DBC *dbc, *ndbc; - DBT key, data, ndata; - ENV *env; - PAGE *p, *r; - db_pgno_t t_pgno; - int modify, ret, t_ret; - - env = mdbp->env; - dbc = ndbc = NULL; - p = NULL; - - /* - * Open up a cursor. If this is CDB and we're creating the database, - * make it an update cursor. - * - * Might we modify the master database? If so, we'll need to lock. - */ - modify = (action != MU_OPEN || LF_ISSET(DB_CREATE)) ? 1 : 0; - - if ((ret = __db_cursor(mdbp, ip, txn, &dbc, - (CDB_LOCKING(env) && modify) ? DB_WRITECURSOR : 0)) != 0) - return (ret); - - /* - * Point the cursor at the record. - * - * If we're removing or potentially creating an entry, lock the page - * with DB_RMW. - * - * We do multiple cursor operations with the cursor in some cases and - * subsequently access the data DBT information. Set DB_DBT_MALLOC so - * we don't risk modification of the data between our uses of it. - * - * !!! - * We don't include the name's nul termination in the database. - */ - DB_INIT_DBT(key, subdb, strlen(subdb)); - memset(&data, 0, sizeof(data)); - F_SET(&data, DB_DBT_MALLOC); - - ret = __dbc_get(dbc, &key, &data, - DB_SET | ((STD_LOCKING(dbc) && modify) ? DB_RMW : 0)); - - /* - * What we do next--whether or not we found a record for the - * specified subdatabase--depends on what the specified action is. - * Handle ret appropriately as the first statement of each case. - */ - switch (action) { - case MU_REMOVE: - /* - * We should have found something if we're removing it. Note - * that in the common case where the DB we're asking to remove - * doesn't exist, we won't get this far; __db_subdb_remove - * will already have returned an error from __db_open. - */ - if (ret != 0) - goto err; - - /* - * Delete the subdatabase entry first; if this fails, - * we don't want to touch the actual subdb pages. - */ - if ((ret = __dbc_del(dbc, 0)) != 0) - goto err; - - /* - * We're handling actual data, not on-page meta-data, - * so it hasn't been converted to/from opposite - * endian architectures. Do it explicitly, now. - */ - memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t)); - DB_NTOHL_SWAP(env, &sdbp->meta_pgno); - if ((ret = __memp_fget(mdbp->mpf, &sdbp->meta_pgno, - ip, dbc->txn, DB_MPOOL_DIRTY, &p)) != 0) - goto err; - - /* Free the root on the master db if it was created. */ - if (TYPE(p) == P_BTREEMETA && - ((BTMETA *)p)->root != PGNO_INVALID) { - if ((ret = __memp_fget(mdbp->mpf, - &((BTMETA *)p)->root, ip, dbc->txn, - DB_MPOOL_DIRTY, &r)) != 0) - goto err; - - /* Free and put the page. */ - if ((ret = __db_free(dbc, r)) != 0) { - r = NULL; - goto err; - } - } - /* Free and put the page. */ - if ((ret = __db_free(dbc, p)) != 0) { - p = NULL; - goto err; - } - p = NULL; - break; - case MU_RENAME: - /* We should have found something if we're renaming it. */ - if (ret != 0) - goto err; - - /* - * Before we rename, we need to make sure we're not - * overwriting another subdatabase, or else this operation - * won't be undoable. Open a second cursor and check - * for the existence of newname; it shouldn't appear under - * us since we hold the metadata lock. - */ - if ((ret = __db_cursor(mdbp, ip, txn, &ndbc, - CDB_LOCKING(env) ? DB_WRITECURSOR : 0)) != 0) - goto err; - DB_SET_DBT(key, newname, strlen(newname)); - - /* - * We don't actually care what the meta page of the potentially- - * overwritten DB is; we just care about existence. - */ - memset(&ndata, 0, sizeof(ndata)); - F_SET(&ndata, DB_DBT_USERMEM | DB_DBT_PARTIAL); - - if ((ret = __dbc_get(ndbc, &key, &ndata, DB_SET)) == 0) { - /* A subdb called newname exists. Bail. */ - ret = EEXIST; - __db_errx(env, "rename: database %s exists", newname); - goto err; - } else if (ret != DB_NOTFOUND) - goto err; - - /* - * Now do the put first; we don't want to lose our only - * reference to the subdb. Use the second cursor so the - * first one continues to point to the old record. - */ - if ((ret = __dbc_put(ndbc, &key, &data, DB_KEYFIRST)) != 0) - goto err; - if ((ret = __dbc_del(dbc, 0)) != 0) { - /* - * If the delete fails, try to delete the record - * we just put, in case we're not txn-protected. - */ - (void)__dbc_del(ndbc, 0); - goto err; - } - - break; - case MU_OPEN: - /* - * Get the subdatabase information. If it already exists, - * copy out the page number and we're done. - */ - switch (ret) { - case 0: - if (LF_ISSET(DB_CREATE) && LF_ISSET(DB_EXCL)) { - ret = EEXIST; - goto err; - } - memcpy(&sdbp->meta_pgno, data.data, sizeof(db_pgno_t)); - DB_NTOHL_SWAP(env, &sdbp->meta_pgno); - goto done; - case DB_NOTFOUND: - if (LF_ISSET(DB_CREATE)) - break; - /* - * No db_err, it is reasonable to remove a - * nonexistent db. - */ - ret = ENOENT; - goto err; - default: - goto err; - } - - /* Create a subdatabase. */ - if ((ret = __db_new(dbc, - type == DB_HASH ? P_HASHMETA : P_BTREEMETA, NULL, &p)) != 0) - goto err; - sdbp->meta_pgno = PGNO(p); - - /* - * XXX - * We're handling actual data, not on-page meta-data, so it - * hasn't been converted to/from opposite endian architectures. - * Do it explicitly, now. - */ - t_pgno = PGNO(p); - DB_HTONL_SWAP(env, &t_pgno); - memset(&ndata, 0, sizeof(ndata)); - ndata.data = &t_pgno; - ndata.size = sizeof(db_pgno_t); - if ((ret = __dbc_put(dbc, &key, &ndata, 0)) != 0) - goto err; - F_SET(sdbp, DB_AM_CREATED); - break; - } - -err: -done: /* - * If we allocated a page: if we're successful, mark the page dirty - * and return it to the cache, otherwise, discard/free it. - */ - if (p != NULL && (t_ret = __memp_fput(mdbp->mpf, - dbc->thread_info, p, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - - /* Discard the cursor(s) and data. */ - if (data.data != NULL) - __os_ufree(env, data.data); - if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - if (ndbc != NULL && (t_ret = __dbc_close(ndbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __env_setup -- - * Set up the underlying environment during a db_open. - * - * PUBLIC: int __env_setup __P((DB *, - * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t, u_int32_t)); */ -int -__env_setup(dbp, txn, fname, dname, id, flags) - DB *dbp; - DB_TXN *txn; - const char *fname, *dname; - u_int32_t id, flags; -{ - DB *ldbp; - DB_ENV *dbenv; - ENV *env; - u_int32_t maxid; - int ret; - - env = dbp->env; - dbenv = env->dbenv; - /* If we don't yet have an environment, it's time to create it. */ - if (!F_ISSET(env, ENV_OPEN_CALLED)) { - /* Make sure we have at least DB_MINCACHE pages in our cache. */ - if (dbenv->mp_gbytes == 0 && - dbenv->mp_bytes < dbp->pgsize * DB_MINPAGECACHE && - (ret = __memp_set_cachesize( - dbenv, 0, dbp->pgsize * DB_MINPAGECACHE, 0)) != 0) - return (ret); +#if defined(LIBC_SCCS) && !defined(lint) +static char sccsid[] = "@(#)db.c 8.4 (Berkeley) 2/21/94"; +#endif /* LIBC_SCCS and not lint */ - if ((ret = __env_open(dbenv, NULL, DB_CREATE | - DB_INIT_MPOOL | DB_PRIVATE | LF_ISSET(DB_THREAD), 0)) != 0) - return (ret); - } +#include <sys/types.h> - /* Join the underlying cache. */ - if ((!F_ISSET(dbp, DB_AM_INMEM) || dname == NULL) && - (ret = __env_mpool(dbp, fname, flags)) != 0) - return (ret); +#include <errno.h> +#include <fcntl.h> +#include <stddef.h> +#include <stdio.h> - /* We may need a per-thread mutex. */ - if (LF_ISSET(DB_THREAD) && (ret = __mutex_alloc( - env, MTX_DB_HANDLE, DB_MUTEX_PROCESS_ONLY, &dbp->mutex)) != 0) - return (ret); - - /* - * Set up a bookkeeping entry for this database in the log region, - * if such a region exists. Note that even if we're in recovery - * or a replication client, where we won't log registries, we'll - * still need an FNAME struct, so LOGGING_ON is the correct macro. - */ - if (LOGGING_ON(env) && dbp->log_filename == NULL -#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) && !defined(DIAGNOSTIC) - && (txn != NULL || F_ISSET(dbp, DB_AM_RECOVER)) -#endif -#if !defined(DEBUG_ROP) - && !F_ISSET(dbp, DB_AM_RDONLY) -#endif - ) { - if ((ret = __dbreg_setup(dbp, - F_ISSET(dbp, DB_AM_INMEM) ? dname : fname, - F_ISSET(dbp, DB_AM_INMEM) ? NULL : dname, id)) != 0) - return (ret); - - /* - * If we're actively logging and our caller isn't a - * recovery function that already did so, then assign - * this dbp a log fileid. - */ - if (DBENV_LOGGING(env) && !F_ISSET(dbp, DB_AM_RECOVER) && - (ret = __dbreg_new_id(dbp, txn)) != 0) - return (ret); - } - - /* - * Insert ourselves into the ENV's dblist. We allocate a - * unique ID to each {fileid, meta page number} pair, and to - * each temporary file (since they all have a zero fileid). - * This ID gives us something to use to tell which DB handles - * go with which databases in all the cursor adjustment - * routines, where we don't want to do a lot of ugly and - * expensive memcmps. - */ - MUTEX_LOCK(env, env->mtx_dblist); - maxid = 0; - TAILQ_FOREACH(ldbp, &env->dblist, dblistlinks) { - /* - * There are three cases: on-disk database (first clause), - * named in-memory database (second clause), temporary database - * (never matches; no clause). - */ - if (!F_ISSET(dbp, DB_AM_INMEM)) { - if (memcmp(ldbp->fileid, dbp->fileid, DB_FILE_ID_LEN) - == 0 && ldbp->meta_pgno == dbp->meta_pgno) - break; - } else if (dname != NULL) { - if (F_ISSET(ldbp, DB_AM_INMEM) && - ldbp->dname != NULL && - strcmp(ldbp->dname, dname) == 0) - break; - } - if (ldbp->adj_fileid > maxid) - maxid = ldbp->adj_fileid; - } - - /* - * If ldbp is NULL, we didn't find a match. Assign the dbp an - * adj_fileid one higher than the largest we found, and - * insert it at the head of the master dbp list. - * - * If ldbp is not NULL, it is a match for our dbp. Give dbp - * the same ID that ldbp has, and add it after ldbp so they're - * together in the list. - */ - if (ldbp == NULL) { - dbp->adj_fileid = maxid + 1; - TAILQ_INSERT_HEAD(&env->dblist, dbp, dblistlinks); - } else { - dbp->adj_fileid = ldbp->adj_fileid; - TAILQ_INSERT_AFTER(&env->dblist, ldbp, dbp, dblistlinks); - } - MUTEX_UNLOCK(env, env->mtx_dblist); - - return (0); -} +#include <db.h> -/* - * __env_mpool -- - * Set up the underlying environment cache during a db_open. - * - * PUBLIC: int __env_mpool __P((DB *, const char *, u_int32_t)); - */ -int -__env_mpool(dbp, fname, flags) - DB *dbp; +DB * +dbopen(fname, flags, mode, type, openinfo) const char *fname; - u_int32_t flags; -{ - DBT pgcookie; - DB_MPOOLFILE *mpf; - DB_PGINFO pginfo; - ENV *env; - int fidset, ftype, ret; - int32_t lsn_off; - u_int8_t nullfid[DB_FILE_ID_LEN]; - u_int32_t clear_len; - - env = dbp->env; - - /* The LSN is the first entry on a DB page, byte offset 0. */ - lsn_off = F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LSN_OFF_NOTSET : 0; - - /* It's possible that this database is already open. */ - if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) - return (0); - - /* - * If we need to pre- or post-process a file's pages on I/O, set the - * file type. If it's a hash file, always call the pgin and pgout - * routines. This means that hash files can never be mapped into - * process memory. If it's a btree file and requires swapping, we - * need to page the file in and out. This has to be right -- we can't - * mmap files that are being paged in and out. - */ - switch (dbp->type) { - case DB_BTREE: - case DB_RECNO: - ftype = F_ISSET(dbp, DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM) - ? DB_FTYPE_SET : DB_FTYPE_NOTSET; - clear_len = CRYPTO_ON(env) ? - (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) : - DB_PAGE_DB_LEN; - break; - case DB_HASH: - ftype = DB_FTYPE_SET; - clear_len = CRYPTO_ON(env) ? - (dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET) : - DB_PAGE_DB_LEN; - break; - case DB_QUEUE: - ftype = F_ISSET(dbp, - DB_AM_SWAP | DB_AM_ENCRYPT | DB_AM_CHKSUM) ? - DB_FTYPE_SET : DB_FTYPE_NOTSET; - - /* - * If we came in here without a pagesize set, then we need - * to mark the in-memory handle as having clear_len not - * set, because we don't really know the clear length or - * the page size yet (since the file doesn't yet exist). - */ - clear_len = dbp->pgsize != 0 ? dbp->pgsize : DB_CLEARLEN_NOTSET; - break; - case DB_UNKNOWN: - /* - * If we're running in the verifier, our database might - * be corrupt and we might not know its type--but we may - * still want to be able to verify and salvage. - * - * If we can't identify the type, it's not going to be safe - * to call __db_pgin--we pretty much have to give up all - * hope of salvaging cross-endianness. Proceed anyway; - * at worst, the database will just appear more corrupt - * than it actually is, but at best, we may be able - * to salvage some data even with no metadata page. - */ - if (F_ISSET(dbp, DB_AM_VERIFYING)) { - ftype = DB_FTYPE_NOTSET; - clear_len = DB_PAGE_DB_LEN; - break; - } - - /* - * This might be an in-memory file and we won't know its - * file type until after we open it and read the meta-data - * page. - */ - if (F_ISSET(dbp, DB_AM_INMEM)) { - clear_len = DB_CLEARLEN_NOTSET; - ftype = DB_FTYPE_NOTSET; - lsn_off = DB_LSN_OFF_NOTSET; - break; - } - /* FALLTHROUGH */ - default: - return (__db_unknown_type(env, "DB->open", dbp->type)); - } - - mpf = dbp->mpf; - - memset(nullfid, 0, DB_FILE_ID_LEN); - fidset = memcmp(nullfid, dbp->fileid, DB_FILE_ID_LEN); - if (fidset) - (void)__memp_set_fileid(mpf, dbp->fileid); - - (void)__memp_set_clear_len(mpf, clear_len); - (void)__memp_set_ftype(mpf, ftype); - (void)__memp_set_lsn_offset(mpf, lsn_off); - - pginfo.db_pagesize = dbp->pgsize; - pginfo.flags = - F_ISSET(dbp, (DB_AM_CHKSUM | DB_AM_ENCRYPT | DB_AM_SWAP)); - pginfo.type = dbp->type; - pgcookie.data = &pginfo; - pgcookie.size = sizeof(DB_PGINFO); - (void)__memp_set_pgcookie(mpf, &pgcookie); - -#ifndef DIAG_MVCC - if (F_ISSET(env->dbenv, DB_ENV_MULTIVERSION)) -#endif - if (F_ISSET(dbp, DB_AM_TXN) && - dbp->type != DB_QUEUE && dbp->type != DB_UNKNOWN) - LF_SET(DB_MULTIVERSION); - - if ((ret = __memp_fopen(mpf, NULL, fname, &dbp->dirname, - LF_ISSET(DB_CREATE | DB_DURABLE_UNKNOWN | DB_MULTIVERSION | - DB_NOMMAP | DB_ODDFILESIZE | DB_RDONLY | DB_TRUNCATE) | - (F_ISSET(env->dbenv, DB_ENV_DIRECT_DB) ? DB_DIRECT : 0) | - (F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_TXN_NOT_DURABLE : 0), - 0, dbp->pgsize)) != 0) { - /* - * The open didn't work; we need to reset the mpf, - * retaining the in-memory semantics (if any). - */ - (void)__memp_fclose(dbp->mpf, 0); - (void)__memp_fcreate(env, &dbp->mpf); - if (F_ISSET(dbp, DB_AM_INMEM)) - MAKE_INMEM(dbp); - return (ret); - } - - /* - * Set the open flag. We use it to mean that the dbp has gone - * through mpf setup, including dbreg_register. Also, below, - * the underlying access method open functions may want to do - * things like acquire cursors, so the open flag has to be set - * before calling them. - */ - F_SET(dbp, DB_AM_OPEN_CALLED); - if (!fidset && fname != NULL) { - (void)__memp_get_fileid(dbp->mpf, dbp->fileid); - dbp->preserve_fid = 1; - } - - return (0); -} - -/* - * __db_close -- - * DB->close method. - * - * PUBLIC: int __db_close __P((DB *, DB_TXN *, u_int32_t)); - */ -int -__db_close(dbp, txn, flags) - DB *dbp; - DB_TXN *txn; - u_int32_t flags; -{ - ENV *env; - int db_ref, deferred_close, ret, t_ret; - - env = dbp->env; - deferred_close = ret = 0; - - /* - * Validate arguments, but as a DB handle destructor, we can't fail. - * - * Check for consistent transaction usage -- ignore errors. Only - * internal callers specify transactions, so it's a serious problem - * if we get error messages. - */ - if (txn != NULL) - (void)__db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0); - - /* Refresh the structure and close any underlying resources. */ - ret = __db_refresh(dbp, txn, flags, &deferred_close, 0); - - /* - * If we've deferred the close because the logging of the close failed, - * return our failure right away without destroying the handle. - */ - if (deferred_close) - return (ret); - - /* !!! - * This code has an apparent race between the moment we read and - * decrement env->db_ref and the moment we check whether it's 0. - * However, if the environment is DBLOCAL, the user shouldn't have a - * reference to the env handle anyway; the only way we can get - * multiple dbps sharing a local env is if we open them internally - * during something like a subdatabase open. If any such thing is - * going on while the user is closing the original dbp with a local - * env, someone's already badly screwed up, so there's no reason - * to bother engineering around this possibility. - */ - MUTEX_LOCK(env, env->mtx_dblist); - db_ref = --env->db_ref; - MUTEX_UNLOCK(env, env->mtx_dblist); - if (F_ISSET(env, ENV_DBLOCAL) && db_ref == 0 && - (t_ret = __env_close(env->dbenv, 0)) != 0 && ret == 0) - ret = t_ret; - - /* Free the database handle. */ - memset(dbp, CLEAR_BYTE, sizeof(*dbp)); - __os_free(env, dbp); - - return (ret); -} - -/* - * __db_refresh -- - * Refresh the DB structure, releasing any allocated resources. - * This does most of the work of closing files now because refresh - * is what is used during abort processing (since we can't destroy - * the actual handle) and during abort processing, we may have a - * fully opened handle. - * - * PUBLIC: int __db_refresh __P((DB *, DB_TXN *, u_int32_t, int *, int)); - */ -int -__db_refresh(dbp, txn, flags, deferred_closep, reuse) - DB *dbp; - DB_TXN *txn; - u_int32_t flags; - int *deferred_closep, reuse; + int flags, mode; + DBTYPE type; + const void *openinfo; { - DB *sdbp; - DBC *dbc; - DB_FOREIGN_INFO *f_info, *tmp; - DB_LOCKER *locker; - DB_LOCKREQ lreq; - ENV *env; - REGENV *renv; - REGINFO *infop; - u_int32_t save_flags; - int resync, ret, t_ret; - - ret = 0; - env = dbp->env; - infop = env->reginfo; - if (infop != NULL) - renv = infop->primary; - else - renv = NULL; - - /* - * If this dbp is not completely open, avoid trapping by trying to - * sync without an mpool file. - */ - if (dbp->mpf == NULL) - LF_SET(DB_NOSYNC); - - /* If never opened, or not currently open, it's easy. */ - if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) - goto never_opened; - - /* - * If we have any secondary indices, disassociate them from us. - * We don't bother with the mutex here; it only protects some - * of the ops that will make us core-dump mid-close anyway, and - * if you're trying to do something with a secondary *while* you're - * closing the primary, you deserve what you get. The disassociation - * is mostly done just so we can close primaries and secondaries in - * any order--but within one thread of control. - */ - LIST_FOREACH(sdbp, &dbp->s_secondaries, s_links) { - LIST_REMOVE(sdbp, s_links); - if ((t_ret = __db_disassociate(sdbp)) != 0 && ret == 0) - ret = t_ret; - } - - /* - * Disassociate ourself from any databases using us as a foreign key - * database by clearing the referring db's pointer. Reclaim memory. - */ - f_info = LIST_FIRST(&dbp->f_primaries); - while (f_info != NULL) { - tmp = LIST_NEXT(f_info, f_links); - LIST_REMOVE(f_info, f_links); - f_info->dbp->s_foreign = NULL; - __os_free(env, f_info); - f_info = tmp; - } - - if (dbp->s_foreign != NULL && - (t_ret = __db_disassociate_foreign(dbp)) != 0 && ret == 0) - ret = t_ret; - - /* - * Sync the underlying access method. Do before closing the cursors - * because DB->sync allocates cursors in order to write Recno backing - * source text files. - * - * Sync is slow on some systems, notably Solaris filesystems where the - * entire buffer cache is searched. If we're in recovery, don't flush - * the file, it's not necessary. - */ - if (!LF_ISSET(DB_NOSYNC) && - !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) && - (t_ret = __db_sync(dbp)) != 0 && ret == 0) - ret = t_ret; - - /* - * Go through the active cursors and call the cursor recycle routine, - * which resolves pending operations and moves the cursors onto the - * free list. Then, walk the free list and call the cursor destroy - * routine. Note that any failure on a close is considered "really - * bad" and we just break out of the loop and force forward. - */ - resync = TAILQ_FIRST(&dbp->active_queue) == NULL ? 0 : 1; - while ((dbc = TAILQ_FIRST(&dbp->active_queue)) != NULL) - if ((t_ret = __dbc_close(dbc)) != 0) { - if (ret == 0) - ret = t_ret; - break; - } - - while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL) - if ((t_ret = __dbc_destroy(dbc)) != 0) { - if (ret == 0) - ret = t_ret; - break; - } - - /* - * Close any outstanding join cursors. Join cursors destroy themselves - * on close and have no separate destroy routine. We don't have to set - * the resync flag here, because join cursors aren't write cursors. - */ - while ((dbc = TAILQ_FIRST(&dbp->join_queue)) != NULL) - if ((t_ret = __db_join_close(dbc)) != 0) { - if (ret == 0) - ret = t_ret; - break; - } - - /* - * Sync the memory pool, even though we've already called DB->sync, - * because closing cursors can dirty pages by deleting items they - * referenced. - * - * Sync is slow on some systems, notably Solaris filesystems where the - * entire buffer cache is searched. If we're in recovery, don't flush - * the file, it's not necessary. - */ - if (resync && !LF_ISSET(DB_NOSYNC) && - !F_ISSET(dbp, DB_AM_DISCARD | DB_AM_RECOVER) && - (t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0) - ret = t_ret; - -never_opened: - /* - * At this point, we haven't done anything to render the DB handle - * unusable, at least by a transaction abort. Take the opportunity - * now to log the file close if we have initialized the logging - * information. If this log fails and we're in a transaction, - * we have to bail out of the attempted close; we'll need a dbp in - * order to successfully abort the transaction, and we can't conjure - * a new one up because we haven't gotten out the dbreg_register - * record that represents the close. In this case, we put off - * actually closing the dbp until we've performed the abort. - */ - if (!reuse && LOGGING_ON(dbp->env) && dbp->log_filename != NULL) { - /* - * Discard the log file id, if any. We want to log the close - * if and only if this is not a recovery dbp or a client dbp, - * or a dead dbp handle. - */ - DB_ASSERT(env, renv != NULL); - if (F_ISSET(dbp, DB_AM_RECOVER) || IS_REP_CLIENT(env) || - dbp->timestamp != renv->rep_timestamp) { - if ((t_ret = __dbreg_revoke_id(dbp, - 0, DB_LOGFILEID_INVALID)) == 0 && ret == 0) - ret = t_ret; - if ((t_ret = __dbreg_teardown(dbp)) != 0 && ret == 0) - ret = t_ret; - } else { - if ((t_ret = __dbreg_close_id(dbp, - txn, DBREG_CLOSE)) != 0 && txn != NULL) { - /* - * We're in a txn and the attempt to log the - * close failed; let the txn subsystem know - * that we need to destroy this dbp once we're - * done with the abort, then bail from the - * close. - * - * Note that if the attempt to put off the - * close -also- fails--which it won't unless - * we're out of heap memory--we're really - * screwed. Panic. - */ - if ((ret = - __txn_closeevent(env, txn, dbp)) != 0) - return (__env_panic(env, ret)); - if (deferred_closep != NULL) - *deferred_closep = 1; - return (t_ret); - } - /* - * If dbreg_close_id failed and we were not in a - * transaction, then we need to finish this close - * because the caller can't do anything with the - * handle after we return an error. We rely on - * dbreg_close_id to mark the entry in some manner - * so that we do not do a clean shutdown of this - * environment. If shutdown isn't clean, then the - * application *must* run recovery and that will - * generate the RCLOSE record. - */ - } - - } - - /* Close any handle we've been holding since the open. */ - if (dbp->saved_open_fhp != NULL && - (t_ret = __os_closehandle(env, dbp->saved_open_fhp)) != 0 && - ret == 0) - ret = t_ret; - - /* - * Remove this DB handle from the ENV's dblist, if it's been added. - * - * Close our reference to the underlying cache while locked, we don't - * want to race with a thread searching for our underlying cache link - * while opening a DB handle. - * - * The DB handle may not yet have been added to the ENV list, don't - * blindly call the underlying TAILQ_REMOVE macro. Explicitly reset - * the field values to NULL so that we can't call TAILQ_REMOVE twice. - */ - MUTEX_LOCK(env, env->mtx_dblist); - if (!reuse && - (dbp->dblistlinks.tqe_next != NULL || - dbp->dblistlinks.tqe_prev != NULL)) { - TAILQ_REMOVE(&env->dblist, dbp, dblistlinks); - dbp->dblistlinks.tqe_next = NULL; - dbp->dblistlinks.tqe_prev = NULL; - } - - /* Close the memory pool file handle. */ - if (dbp->mpf != NULL) { - if ((t_ret = __memp_fclose(dbp->mpf, - F_ISSET(dbp, DB_AM_DISCARD) ? DB_MPOOL_DISCARD : 0)) != 0 && - ret == 0) - ret = t_ret; - dbp->mpf = NULL; - if (reuse && - (t_ret = __memp_fcreate(env, &dbp->mpf)) != 0 && - ret == 0) - ret = t_ret; - } - - MUTEX_UNLOCK(env, env->mtx_dblist); - - /* - * Call the access specific close function. - * - * We do this here rather than in __db_close as we need to do this when - * aborting an open so that file descriptors are closed and abort of - * renames can succeed on platforms that lock open files (such as - * Windows). In particular, we need to ensure that all the extents - * associated with a queue are closed so that queue renames can be - * aborted. - * - * It is also important that we do this before releasing the handle - * lock, because dbremove and dbrename assume that once they have the - * handle lock, it is safe to modify the underlying file(s). - * - * !!! - * Because of where these functions are called in the DB handle close - * process, these routines can't do anything that would dirty pages or - * otherwise affect closing down the database. Specifically, we can't - * abort and recover any of the information they control. - */ -#ifdef HAVE_PARTITION - if (dbp->p_internal != NULL && - (t_ret = __partition_close(dbp, txn, flags)) != 0 && ret == 0) - ret = t_ret; -#endif - if ((t_ret = __bam_db_close(dbp)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __ham_db_close(dbp)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __qam_db_close(dbp, dbp->flags)) != 0 && ret == 0) - ret = t_ret; - - /* - * !!! - * At this point, the access-method specific information has been - * freed. From now on, we can use the dbp, but not touch any - * access-method specific data. - */ - - if (!reuse && dbp->locker != NULL) { - /* We may have pending trade operations on this dbp. */ - if (txn == NULL) - txn = dbp->cur_txn; - if (IS_REAL_TXN(txn)) - __txn_remlock(env, - txn, &dbp->handle_lock, dbp->locker); - - /* We may be holding the handle lock; release it. */ - lreq.op = DB_LOCK_PUT_ALL; - lreq.obj = NULL; - if ((t_ret = __lock_vec(env, - dbp->locker, 0, &lreq, 1, NULL)) != 0 && ret == 0) - ret = t_ret; - - if ((t_ret = - __lock_id_free(env, dbp->locker)) != 0 && ret == 0) - ret = t_ret; - dbp->locker = NULL; - LOCK_INIT(dbp->handle_lock); - } - - /* - * If this is a temporary file (un-named in-memory file), then - * discard the locker ID allocated as the fileid. - */ - if (LOCKING_ON(env) && - F_ISSET(dbp, DB_AM_INMEM) && !dbp->preserve_fid && - *(u_int32_t *)dbp->fileid != DB_LOCK_INVALIDID) { - if ((t_ret = __lock_getlocker(env->lk_handle, - *(u_int32_t *)dbp->fileid, 0, &locker)) == 0) - t_ret = __lock_id_free(env, locker); - if (ret == 0) - ret = t_ret; - } - - if (reuse) { - /* - * If we are reusing this dbp, then we're done now. Re-init - * the handle, preserving important flags, and then return. - * This code is borrowed from __db_init, which does more - * than we can do here. - */ - save_flags = F_ISSET(dbp, DB_AM_INMEM | DB_AM_TXN); - - if ((ret = __bam_db_create(dbp)) != 0) - return (ret); - if ((ret = __ham_db_create(dbp)) != 0) - return (ret); - if ((ret = __qam_db_create(dbp)) != 0) - return (ret); - - /* Restore flags */ - dbp->flags = dbp->orig_flags | save_flags; - - if (FLD_ISSET(save_flags, DB_AM_INMEM)) { - /* - * If this is inmem, then it may have a fileid - * even if it was never opened, and we need to - * clear out that fileid. - */ - memset(dbp->fileid, 0, sizeof(dbp->fileid)); - MAKE_INMEM(dbp); +#define DB_FLAGS (DB_LOCK | DB_SHMEM | DB_TXN) +#define USE_OPEN_FLAGS \ + (O_CREAT | O_EXCL | O_EXLOCK | O_NONBLOCK | O_RDONLY | \ + O_RDWR | O_SHLOCK | O_TRUNC) + + if ((flags & ~(USE_OPEN_FLAGS | DB_FLAGS)) == 0) + switch (type) { + case DB_BTREE: + return (__bt_open(fname, flags & USE_OPEN_FLAGS, + mode, openinfo, flags & DB_FLAGS)); + case DB_HASH: + return (__hash_open(fname, flags & USE_OPEN_FLAGS, + mode, openinfo, flags & DB_FLAGS)); + case DB_RECNO: + return (__rec_open(fname, flags & USE_OPEN_FLAGS, + mode, openinfo, flags & DB_FLAGS)); } - return (ret); - } - - dbp->type = DB_UNKNOWN; - - /* - * The thread mutex may have been invalidated in __dbreg_close_id if the - * fname refcount did not go to 0. If not, discard the thread mutex. - */ - if ((t_ret = __mutex_free(env, &dbp->mutex)) != 0 && ret == 0) - ret = t_ret; - - /* Discard any memory allocated for the file and database names. */ - if (dbp->fname != NULL) { - __os_free(dbp->env, dbp->fname); - dbp->fname = NULL; - } - if (dbp->dname != NULL) { - __os_free(dbp->env, dbp->dname); - dbp->dname = NULL; - } - - /* Discard any memory used to store returned data. */ - if (dbp->my_rskey.data != NULL) - __os_free(dbp->env, dbp->my_rskey.data); - if (dbp->my_rkey.data != NULL) - __os_free(dbp->env, dbp->my_rkey.data); - if (dbp->my_rdata.data != NULL) - __os_free(dbp->env, dbp->my_rdata.data); - - /* For safety's sake; we may refresh twice. */ - memset(&dbp->my_rskey, 0, sizeof(DBT)); - memset(&dbp->my_rkey, 0, sizeof(DBT)); - memset(&dbp->my_rdata, 0, sizeof(DBT)); - - /* Clear out fields that normally get set during open. */ - memset(dbp->fileid, 0, sizeof(dbp->fileid)); - dbp->adj_fileid = 0; - dbp->meta_pgno = 0; - dbp->cur_locker = NULL; - dbp->cur_txn = NULL; - dbp->associate_locker = NULL; - dbp->cl_id = 0; - dbp->open_flags = 0; - - /* - * If we are being refreshed with a txn specified, then we need - * to make sure that we clear out the lock handle field, because - * releasing all the locks for this transaction will release this - * lock and we don't want close to stumble upon this handle and - * try to close it. - */ - if (txn != NULL) - LOCK_INIT(dbp->handle_lock); - - /* Reset flags to whatever the user configured. */ - dbp->flags = dbp->orig_flags; - - return (ret); + errno = EINVAL; + return (NULL); } -/* - * __db_disassociate -- - * Destroy the association between a given secondary and its primary. - */ static int -__db_disassociate(sdbp) - DB *sdbp; -{ - DBC *dbc; - int ret, t_ret; - - ret = 0; - - sdbp->s_callback = NULL; - sdbp->s_primary = NULL; - sdbp->get = sdbp->stored_get; - sdbp->close = sdbp->stored_close; - - /* - * Complain, but proceed, if we have any active cursors. (We're in - * the middle of a close, so there's really no turning back.) - */ - if (sdbp->s_refcnt != 1 || - TAILQ_FIRST(&sdbp->active_queue) != NULL || - TAILQ_FIRST(&sdbp->join_queue) != NULL) { - __db_errx(sdbp->env, - "Closing a primary DB while a secondary DB has active cursors is unsafe"); - ret = EINVAL; - } - sdbp->s_refcnt = 0; - - while ((dbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL) - if ((t_ret = __dbc_destroy(dbc)) != 0 && ret == 0) - ret = t_ret; - - F_CLR(sdbp, DB_AM_SECONDARY); - return (ret); -} - -/* - * __db_disassociate_foreign -- - * Destroy the association between a given secondary and its foreign. - */ -static int -__db_disassociate_foreign(sdbp) - DB *sdbp; -{ - DB *fdbp; - DB_FOREIGN_INFO *f_info, *tmp; - int ret; - - if (sdbp->s_foreign == NULL) - return (0); - if ((ret = __os_malloc(sdbp->env, sizeof(DB_FOREIGN_INFO), &tmp)) != 0) - return (ret); - - fdbp = sdbp->s_foreign; - ret = 0; - f_info = LIST_FIRST(&fdbp->f_primaries); - while (f_info != NULL) { - tmp = LIST_NEXT(f_info, f_links); - if (f_info ->dbp == sdbp) { - LIST_REMOVE(f_info, f_links); - __os_free(sdbp->env, f_info); - } - f_info = tmp; - } - - return (ret); -} - -/* - * __db_log_page - * Log a meta-data or root page during a subdatabase create operation. - * - * PUBLIC: int __db_log_page __P((DB *, DB_TXN *, DB_LSN *, db_pgno_t, PAGE *)); - */ -int -__db_log_page(dbp, txn, lsn, pgno, page) - DB *dbp; - DB_TXN *txn; - DB_LSN *lsn; - db_pgno_t pgno; - PAGE *page; +__dberr() { - DBT page_dbt; - DB_LSN new_lsn; - int ret; - - if (!LOGGING_ON(dbp->env) || txn == NULL) - return (0); - - memset(&page_dbt, 0, sizeof(page_dbt)); - page_dbt.size = dbp->pgsize; - page_dbt.data = page; - - ret = __crdel_metasub_log(dbp, txn, &new_lsn, 0, pgno, &page_dbt, lsn); - - if (ret == 0) - page->lsn = new_lsn; - return (ret); + return (RET_ERROR); } /* - * __db_backup_name - * Create the backup file name for a given file. + * __DBPANIC -- Stop. * - * PUBLIC: int __db_backup_name __P((ENV *, - * PUBLIC: const char *, DB_TXN *, char **)); + * Parameters: + * dbp: pointer to the DB structure. */ -#undef BACKUP_PREFIX -#define BACKUP_PREFIX "__db." - -#undef MAX_INT_TO_HEX -#define MAX_INT_TO_HEX 8 - -int -__db_backup_name(env, name, txn, backup) - ENV *env; - const char *name; - DB_TXN *txn; - char **backup; -{ - u_int32_t id; - size_t len; - int ret; - char *p, *retp; - - *backup = NULL; - - /* - * Part of the name may be a full path, so we need to make sure that - * we allocate enough space for it, even in the case where we don't - * use the entire filename for the backup name. - */ - len = strlen(name) + strlen(BACKUP_PREFIX) + 2 * MAX_INT_TO_HEX + 1; - if ((ret = __os_malloc(env, len, &retp)) != 0) - return (ret); - - /* - * Create the name. Backup file names are in one of 2 forms: in a - * transactional env "__db.TXNID.ID", where ID is a random number, - * and in any other env "__db.FILENAME". - * - * In addition, the name passed may contain an env-relative path. - * In that case, put the "__db." in the right place (in the last - * component of the pathname). - * - * There are four cases here: - * 1. simple path w/out transaction - * 2. simple path + transaction - * 3. multi-component path w/out transaction - * 4. multi-component path + transaction - */ - p = __db_rpath(name); - if (IS_REAL_TXN(txn)) { - __os_unique_id(env, &id); - if (p == NULL) /* Case 2. */ - snprintf(retp, len, "%s%x.%x", - BACKUP_PREFIX, txn->txnid, id); - else /* Case 4. */ - snprintf(retp, len, "%.*s%x.%x", - (int)(p - name) + 1, name, txn->txnid, id); - } else { - if (p == NULL) /* Case 1. */ - snprintf(retp, len, "%s%s", BACKUP_PREFIX, name); - else /* Case 3. */ - snprintf(retp, len, "%.*s%s%s", - (int)(p - name) + 1, name, BACKUP_PREFIX, p + 1); - } - - *backup = retp; - return (0); -} - -#ifdef CONFIG_TEST -/* - * __db_testcopy - * Create a copy of all backup files and our "main" DB. - * - * PUBLIC: #ifdef CONFIG_TEST - * PUBLIC: int __db_testcopy __P((ENV *, DB *, const char *)); - * PUBLIC: #endif - */ -int -__db_testcopy(env, dbp, name) - ENV *env; - DB *dbp; - const char *name; -{ - DB_MPOOL *dbmp; - DB_MPOOLFILE *mpf; - - DB_ASSERT(env, dbp != NULL || name != NULL); - - if (name == NULL) { - dbmp = env->mp_handle; - mpf = dbp->mpf; - name = R_ADDR(dbmp->reginfo, mpf->mfp->path_off); - } - - if (dbp != NULL && dbp->type == DB_QUEUE) - return (__qam_testdocopy(dbp, name)); - else -#ifdef HAVE_PARTITION - if (dbp != NULL && DB_IS_PARTITIONED(dbp)) - return (__part_testdocopy(dbp, name)); - else -#endif - return (__db_testdocopy(env, name)); -} - -static int -__qam_testdocopy(dbp, name) +void +__dbpanic(dbp) DB *dbp; - const char *name; { - DB_THREAD_INFO *ip; - QUEUE_FILELIST *filelist, *fp; - int ret; - char buf[DB_MAXPATHLEN], *dir; - - filelist = NULL; - if ((ret = __db_testdocopy(dbp->env, name)) != 0) - return (ret); - - /* Call ENV_GET_THREAD_INFO to get a valid DB_THREAD_INFO */ - ENV_GET_THREAD_INFO(dbp->env, ip); - if (dbp->mpf != NULL && - (ret = __qam_gen_filelist(dbp, ip, &filelist)) != 0) - goto done; - - if (filelist == NULL) - return (0); - dir = ((QUEUE *)dbp->q_internal)->dir; - for (fp = filelist; fp->mpf != NULL; fp++) { - snprintf(buf, sizeof(buf), - QUEUE_EXTENT, dir, PATH_SEPARATOR[0], name, fp->id); - if ((ret = __db_testdocopy(dbp->env, buf)) != 0) - return (ret); - } - -done: __os_free(dbp->env, filelist); - return (0); -} - -/* - * __db_testdocopy - * Create a copy of all backup files and our "main" DB. - * PUBLIC: int __db_testdocopy __P((ENV *, const char *)); - */ -int -__db_testdocopy(env, name) - ENV *env; - const char *name; -{ - size_t len; - int dircnt, i, ret; - char *copy, **namesp, *p, *real_name; - - dircnt = 0; - copy = NULL; - namesp = NULL; - - /* Create the real backing file name. */ - if ((ret = __db_appname(env, - DB_APP_DATA, name, NULL, &real_name)) != 0) - return (ret); - - /* - * !!! - * There are tests that attempt to copy non-existent files. I'd guess - * it's a testing bug, but I don't have time to figure it out. Block - * the case here. - */ - if (__os_exists(env, real_name, NULL) != 0) { - __os_free(env, real_name); - return (0); - } - - /* - * Copy the file itself. - * - * Allocate space for the file name, including adding an ".afterop" and - * trailing nul byte. - */ - len = strlen(real_name) + sizeof(".afterop"); - if ((ret = __os_malloc(env, len, ©)) != 0) - goto err; - snprintf(copy, len, "%s.afterop", real_name); - if ((ret = __db_makecopy(env, real_name, copy)) != 0) - goto err; - - /* - * Get the directory path to call __os_dirlist(). - */ - if ((p = __db_rpath(real_name)) != NULL) - *p = '\0'; - if ((ret = __os_dirlist(env, real_name, 0, &namesp, &dircnt)) != 0) - goto err; - - /* - * Walk the directory looking for backup files. Backup file names in - * transactional environments are of the form: - * - * BACKUP_PREFIX.TXNID.ID - */ - for (i = 0; i < dircnt; i++) { - /* Check for a related backup file name. */ - if (strncmp( - namesp[i], BACKUP_PREFIX, sizeof(BACKUP_PREFIX) - 1) != 0) - continue; - p = namesp[i] + sizeof(BACKUP_PREFIX); - p += strspn(p, "0123456789ABCDEFabcdef"); - if (*p != '.') - continue; - ++p; - p += strspn(p, "0123456789ABCDEFabcdef"); - if (*p != '\0') - continue; - - /* - * Copy the backup file. - * - * Allocate space for the file name, including adding a - * ".afterop" and trailing nul byte. - */ - if (real_name != NULL) { - __os_free(env, real_name); - real_name = NULL; - } - if ((ret = __db_appname(env, - DB_APP_DATA, namesp[i], NULL, &real_name)) != 0) - goto err; - if (copy != NULL) { - __os_free(env, copy); - copy = NULL; - } - len = strlen(real_name) + sizeof(".afterop"); - if ((ret = __os_malloc(env, len, ©)) != 0) - goto err; - snprintf(copy, len, "%s.afterop", real_name); - if ((ret = __db_makecopy(env, real_name, copy)) != 0) - goto err; - } - -err: if (namesp != NULL) - __os_dirfree(env, namesp, dircnt); - if (copy != NULL) - __os_free(env, copy); - if (real_name != NULL) - __os_free(env, real_name); - return (ret); -} - -static int -__db_makecopy(env, src, dest) - ENV *env; - const char *src, *dest; -{ - DB_FH *rfhp, *wfhp; - size_t rcnt, wcnt; - int ret; - char *buf; - - rfhp = wfhp = NULL; - - if ((ret = __os_malloc(env, 64 * 1024, &buf)) != 0) - goto err; - - if ((ret = __os_open(env, src, 0, - DB_OSO_RDONLY, DB_MODE_600, &rfhp)) != 0) - goto err; - if ((ret = __os_open(env, dest, 0, - DB_OSO_CREATE | DB_OSO_TRUNC, DB_MODE_600, &wfhp)) != 0) - goto err; - - for (;;) { - if ((ret = - __os_read(env, rfhp, buf, sizeof(buf), &rcnt)) != 0) - goto err; - if (rcnt == 0) - break; - if ((ret = - __os_write(env, wfhp, buf, sizeof(buf), &wcnt)) != 0) - goto err; - } - - if (0) { -err: __db_err(env, ret, "__db_makecopy: %s -> %s", src, dest); - } - - if (buf != NULL) - __os_free(env, buf); - if (rfhp != NULL) - (void)__os_closehandle(env, rfhp); - if (wfhp != NULL) - (void)__os_closehandle(env, wfhp); - return (ret); + /* The only thing that can succeed is a close. */ + dbp->del = (int (*)())__dberr; + dbp->fd = (int (*)())__dberr; + dbp->get = (int (*)())__dberr; + dbp->put = (int (*)())__dberr; + dbp->seq = (int (*)())__dberr; + dbp->sync = (int (*)())__dberr; } -#endif diff --git a/db/db.src b/db/db.src deleted file mode 100644 index 2136b79..0000000 --- a/db/db.src +++ /dev/null @@ -1,328 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -DBPRIVATE -PREFIX __db - -INCLUDE #include "db_int.h" -INCLUDE #include "dbinc/crypto.h" -INCLUDE #include "dbinc/db_page.h" -INCLUDE #include "dbinc/db_dispatch.h" -INCLUDE #include "dbinc/db_am.h" -INCLUDE #include "dbinc/log.h" -INCLUDE #include "dbinc/txn.h" -INCLUDE - -/* - * addrem -- Add or remove an entry from a duplicate page. - * - * opcode: identifies if this is an add or delete. - * fileid: file identifier of the file being modified. - * pgno: duplicate page number. - * indx: location at which to insert or delete. - * nbytes: number of bytes added/removed to/from the page. - * hdr: header for the data item. - * dbt: data that is deleted or is to be added. - * pagelsn: former lsn of the page. - * - * If the hdr was NULL then, the dbt is a regular B_KEYDATA. - * If the dbt was NULL then the hdr is a complete item to be - * pasted on the page. - */ -BEGIN addrem 42 41 -ARG opcode u_int32_t lu -DB fileid int32_t ld -ARG pgno db_pgno_t lu -ARG indx u_int32_t lu -ARG nbytes u_int32_t lu -DBT hdr DBT s -DBT dbt DBT s -POINTER pagelsn DB_LSN * lu -END - -/* - * big -- Handles addition and deletion of big key/data items. - * - * opcode: identifies get/put. - * fileid: file identifier of the file being modified. - * pgno: page onto which data is being added/removed. - * prev_pgno: the page before the one we are logging. - * next_pgno: the page after the one we are logging. - * dbt: data being written onto the page. - * pagelsn: former lsn of the orig_page. - * prevlsn: former lsn of the prev_pgno. - * nextlsn: former lsn of the next_pgno. This is not currently used, but - * may be used later if we actually do overwrites of big key/ - * data items in place. - */ -BEGIN big 42 43 -ARG opcode u_int32_t lu -DB fileid int32_t ld -ARG pgno db_pgno_t lu -ARG prev_pgno db_pgno_t lu -ARG next_pgno db_pgno_t lu -DBT dbt DBT s -POINTER pagelsn DB_LSN * lu -POINTER prevlsn DB_LSN * lu -POINTER nextlsn DB_LSN * lu -END - -/* - * ovref -- Handles increment/decrement of overflow page reference count. - * - * fileid: identifies the file being modified. - * pgno: page number whose ref count is being incremented/decremented. - * adjust: the adjustment being made. - * lsn: the page's original lsn. - */ -BEGIN ovref 42 44 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -ARG adjust int32_t ld -POINTER lsn DB_LSN * lu -END - -/* - * relink -- Handles relinking around a page. - * - * opcode: indicates if this is an addpage or delete page - * pgno: the page being changed. - * lsn the page's original lsn. - * prev: the previous page. - * lsn_prev: the previous page's original lsn. - * next: the next page. - * lsn_next: the previous page's original lsn. - */ -BEGIN_COMPAT relink 42 45 -ARG opcode u_int32_t lu -DB fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER lsn DB_LSN * lu -ARG prev db_pgno_t lu -POINTER lsn_prev DB_LSN * lu -ARG next db_pgno_t lu -POINTER lsn_next DB_LSN * lu -END - -/* - * Debug -- log an operation upon entering an access method. - * op: Operation (cursor, c_close, c_get, c_put, c_del, - * get, put, delete). - * fileid: identifies the file being acted upon. - * key: key paramater - * data: data parameter - * flags: flags parameter - */ -BEGIN debug 42 47 -DBT op DBT s -ARG fileid int32_t ld -DBT key DBT s -DBT data DBT s -ARG arg_flags u_int32_t lu -END - -/* - * noop -- do nothing, but get an LSN. - */ -BEGIN noop 42 48 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER prevlsn DB_LSN * lu -END - -/* - * pg_alloc: used to record allocating a new page. - * - * meta_lsn: the original lsn of the page reference by meta_pgno. - * meta_pgno the page pointing at the allocated page in the free list. - * If the list is unsorted this is the metadata page. - * page_lsn: the allocated page's original lsn. - * pgno: the page allocated. - * ptype: the type of the page allocated. - * next: the next page on the free list. - * last_pgno: the last page in the file after this op (4.3+). - */ -BEGIN_COMPAT pg_alloc 42 49 -DB fileid int32_t ld -POINTER meta_lsn DB_LSN * lu -ARG meta_pgno db_pgno_t lu -POINTER page_lsn DB_LSN * lu -ARG pgno db_pgno_t lu -ARG ptype u_int32_t lu -ARG next db_pgno_t lu -END - -BEGIN pg_alloc 43 49 -DB fileid int32_t ld -POINTER meta_lsn DB_LSN * lu -ARG meta_pgno db_pgno_t lu -POINTER page_lsn DB_LSN * lu -ARG pgno db_pgno_t lu -ARG ptype u_int32_t lu -ARG next db_pgno_t lu -ARG last_pgno db_pgno_t lu -END - -/* - * pg_free: used to record freeing a page. - * If we are maintaining a sorted free list (during compact) meta_pgno - * will be non-zero and refer to the page that preceeds the one we are freeing - * in the free list. Meta_lsn will then be the lsn of that page. - * - * pgno: the page being freed. - * meta_lsn: the meta-data page's original lsn. - * meta_pgno: the meta-data page number. - * header: the header from the free'd page. - * next: the previous next pointer on the metadata page. - * last_pgno: the last page in the file before this op (4.3+). - */ -BEGIN_COMPAT pg_free 42 50 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER meta_lsn DB_LSN * lu -ARG meta_pgno db_pgno_t lu -PGDBT header DBT s -ARG next db_pgno_t lu -END - -BEGIN pg_free 43 50 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER meta_lsn DB_LSN * lu -ARG meta_pgno db_pgno_t lu -PGDBT header DBT s -ARG next db_pgno_t lu -ARG last_pgno db_pgno_t lu -END - -/* - * cksum -- - * This log record is written when we're unable to checksum a page, - * before returning DB_RUNRECOVERY. This log record causes normal - * recovery to itself return DB_RUNRECOVERY, as only catastrophic - * recovery can fix things. - */ -BEGIN cksum 42 51 -END - -/* - * pg_freedata: used to record freeing a page with data on it. - * - * pgno: the page being freed. - * meta_lsn: the meta-data page's original lsn. - * meta_pgno: the meta-data page number. - * header: the header and index entries from the free'd page. - * data: the data from the free'd page. - * next: the previous next pointer on the metadata page. - * last_pgno: the last page in the file before this op (4.3+). - */ -BEGIN_COMPAT pg_freedata 42 52 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER meta_lsn DB_LSN * lu -ARG meta_pgno db_pgno_t lu -PGDBT header DBT s -ARG next db_pgno_t lu -PGDDBT data DBT s -END - -BEGIN pg_freedata 43 52 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -POINTER meta_lsn DB_LSN * lu -ARG meta_pgno db_pgno_t lu -PGDBT header DBT s -ARG next db_pgno_t lu -ARG last_pgno db_pgno_t lu -PGDDBT data DBT s -END - -/* - * pg_prepare: used to record an aborted page in a prepared transaction. - * - * pgno: the page being freed. - */ -X BEGIN pg_prepare 42 53 -X DB fileid int32_t ld -X ARG pgno db_pgno_t lu -X END - -/* - * pg_new: used to record a new page put on the free list. - * - * pgno: the page being freed. - * meta_lsn: the meta-data page's original lsn. - * meta_pgno: the meta-data page number. - * header: the header from the free'd page. - * next: the previous next pointer on the metadata page. - */ -X BEGIN pg_new 42 54 -X DB fileid int32_t ld -X ARG pgno db_pgno_t lu -X POINTER meta_lsn DB_LSN * lu -X ARG meta_pgno db_pgno_t lu -X PGDBT header DBT s -X ARG next db_pgno_t lu -X END - -/* - * pg_init: used to reinitialize a page during truncate. - * - * pgno: the page being initialized. - * header: the header from the page. - * data: data that used to be on the page. - */ -BEGIN pg_init 43 60 -DB fileid int32_t ld -ARG pgno db_pgno_t lu -PGDBT header DBT s -PGDDBT data DBT s -END - -/* - * pg_sort: sort the free list - * - * meta: meta page number - * meta_lsn: lsn on meta page. - * last_free: page number of new last free page. - * last_lsn; lsn of last free page. - * last_pgno: current last page number. - * list: list of pages and lsns to sort. - */ -BEGIN_COMPAT pg_sort 44 61 -DB fileid int32_t ld -ARG meta db_pgno_t lu -POINTER meta_lsn DB_LSN * lu -ARG last_free db_pgno_t lu -POINTER last_lsn DB_LSN * lu -ARG last_pgno db_pgno_t lu -DBT list DBT s -END - - -/* - * pg_truc: truncate the free list - * - * meta: meta page number - * meta_lsn: lsn on meta page. - * last_free: page number of new last free page. - * last_lsn; lsn of last free page. - * last_pgno: current last page number. - * list: list of pages and lsns on free list. - */ -BEGIN pg_trunc 49 66 -DB fileid int32_t ld -ARG meta db_pgno_t lu -POINTER meta_lsn DB_LSN * lu -ARG last_free db_pgno_t lu -POINTER last_lsn DB_LSN * lu -ARG next_free db_pgno_t lu -ARG last_pgno db_pgno_t lu -DBT list DBT s -END - diff --git a/db/db_am.c b/db/db_am.c deleted file mode 100644 index c453ea9..0000000 --- a/db/db_am.c +++ /dev/null @@ -1,1015 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1998-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/partition.h" -#include "dbinc/qam.h" -#include "dbinc/txn.h" - -static int __db_secondary_get __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); -static int __dbc_set_priority __P((DBC *, DB_CACHE_PRIORITY)); -static int __dbc_get_priority __P((DBC *, DB_CACHE_PRIORITY* )); - -/* - * __db_cursor_int -- - * Internal routine to create a cursor. - * - * PUBLIC: int __db_cursor_int __P((DB *, DB_THREAD_INFO *, - * PUBLIC: DB_TXN *, DBTYPE, db_pgno_t, int, DB_LOCKER *, DBC **)); - */ -int -__db_cursor_int(dbp, ip, txn, dbtype, root, flags, locker, dbcp) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - DBTYPE dbtype; - db_pgno_t root; - int flags; - DB_LOCKER *locker; - DBC **dbcp; -{ - DBC *dbc; - DBC_INTERNAL *cp; - ENV *env; - db_threadid_t tid; - int allocated, ret; - pid_t pid; - - env = dbp->env; - allocated = 0; - - /* - * If dbcp is non-NULL it is assumed to point to an area to initialize - * as a cursor. - * - * Take one from the free list if it's available. Take only the - * right type. With off page dups we may have different kinds - * of cursors on the queue for a single database. - */ - MUTEX_LOCK(env, dbp->mutex); - -#ifndef HAVE_NO_DB_REFCOUNT - /* - * If this DBP is being logged then refcount the log filename - * relative to this transaction. We do this here because we have - * the dbp->mutex which protects the refcount. We want to avoid - * calling the function if we are duplicating a cursor. This includes - * the case of creating an off page duplicate cursor. If we know this - * cursor will not be used in an update, we could avoid this, - * but we don't have that information. - */ - if (txn != NULL && !LF_ISSET(DBC_OPD | DBC_DUPLICATE) - && !F_ISSET(dbp, DB_AM_RECOVER) && - dbp->log_filename != NULL && !IS_REP_CLIENT(env) && - (ret = __txn_record_fname(env, txn, dbp->log_filename)) != 0) { - MUTEX_UNLOCK(env, dbp->mutex); - return (ret); - } - -#endif - - TAILQ_FOREACH(dbc, &dbp->free_queue, links) - if (dbtype == dbc->dbtype) { - TAILQ_REMOVE(&dbp->free_queue, dbc, links); - F_CLR(dbc, ~DBC_OWN_LID); - break; - } - MUTEX_UNLOCK(env, dbp->mutex); - - if (dbc == NULL) { - if ((ret = __os_calloc(env, 1, sizeof(DBC), &dbc)) != 0) - return (ret); - allocated = 1; - dbc->flags = 0; - - dbc->dbp = dbp; - dbc->dbenv = dbp->dbenv; - dbc->env = dbp->env; - - /* Set up locking information. */ - if (LOCKING_ON(env)) { - /* - * If we are not threaded, we share a locker ID among - * all cursors opened in the environment handle, - * allocating one if this is the first cursor. - * - * This relies on the fact that non-threaded DB handles - * always have non-threaded environment handles, since - * we set DB_THREAD on DB handles created with threaded - * environment handles. - */ - if (!DB_IS_THREADED(dbp)) { - if (env->env_lref == NULL && (ret = - __lock_id(env, NULL, &env->env_lref)) != 0) - goto err; - dbc->lref = env->env_lref; - } else { - if ((ret = - __lock_id(env, NULL, &dbc->lref)) != 0) - goto err; - F_SET(dbc, DBC_OWN_LID); - } - - /* - * In CDB, secondary indices should share a lock file - * ID with the primary; otherwise we're susceptible - * to deadlocks. We also use __db_cursor_int rather - * than __db_cursor to create secondary update cursors - * in c_put and c_del; these won't acquire a new lock. - * - * !!! - * Since this is in the one-time cursor allocation - * code, we need to be sure to destroy, not just - * close, all cursors in the secondary when we - * associate. - */ - if (CDB_LOCKING(env) && - F_ISSET(dbp, DB_AM_SECONDARY)) - memcpy(dbc->lock.fileid, - dbp->s_primary->fileid, DB_FILE_ID_LEN); - else - memcpy(dbc->lock.fileid, - dbp->fileid, DB_FILE_ID_LEN); - - if (CDB_LOCKING(env)) { - if (F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) { - /* - * If we are doing a single lock per - * environment, set up the global - * lock object just like we do to - * single thread creates. - */ - DB_ASSERT(env, sizeof(db_pgno_t) == - sizeof(u_int32_t)); - dbc->lock_dbt.size = sizeof(u_int32_t); - dbc->lock_dbt.data = &dbc->lock.pgno; - dbc->lock.pgno = 0; - } else { - dbc->lock_dbt.size = DB_FILE_ID_LEN; - dbc->lock_dbt.data = dbc->lock.fileid; - } - } else { - dbc->lock.type = DB_PAGE_LOCK; - dbc->lock_dbt.size = sizeof(dbc->lock); - dbc->lock_dbt.data = &dbc->lock; - } - } - /* Init the DBC internal structure. */ -#ifdef HAVE_PARTITION - if (DB_IS_PARTITIONED(dbp)) { - if ((ret = __partc_init(dbc)) != 0) - goto err; - } else -#endif - switch (dbtype) { - case DB_BTREE: - case DB_RECNO: - if ((ret = __bamc_init(dbc, dbtype)) != 0) - goto err; - break; - case DB_HASH: - if ((ret = __hamc_init(dbc)) != 0) - goto err; - break; - case DB_QUEUE: - if ((ret = __qamc_init(dbc)) != 0) - goto err; - break; - case DB_UNKNOWN: - default: - ret = __db_unknown_type(env, "DB->cursor", dbtype); - goto err; - } - - cp = dbc->internal; - } - - /* Refresh the DBC structure. */ - dbc->dbtype = dbtype; - RESET_RET_MEM(dbc); - dbc->set_priority = __dbc_set_priority; - dbc->get_priority = __dbc_get_priority; - dbc->priority = dbp->priority; - - if ((dbc->txn = txn) != NULL) - dbc->locker = txn->locker; - else if (LOCKING_ON(env)) { - /* - * There are certain cases in which we want to create a - * new cursor with a particular locker ID that is known - * to be the same as (and thus not conflict with) an - * open cursor. - * - * The most obvious case is cursor duplication; when we - * call DBC->dup or __dbc_idup, we want to use the original - * cursor's locker ID. - * - * Another case is when updating secondary indices. Standard - * CDB locking would mean that we might block ourself: we need - * to open an update cursor in the secondary while an update - * cursor in the primary is open, and when the secondary and - * primary are subdatabases or we're using env-wide locking, - * this is disastrous. - * - * In these cases, our caller will pass a nonzero locker - * ID into this function. Use this locker ID instead of - * the default as the locker ID for our new cursor. - */ - if (locker != NULL) - dbc->locker = locker; - else { - /* - * If we are threaded then we need to set the - * proper thread id into the locker. - */ - if (DB_IS_THREADED(dbp)) { - env->dbenv->thread_id(env->dbenv, &pid, &tid); - __lock_set_thread_id(dbc->lref, pid, tid); - } - dbc->locker = dbc->lref; - } - } - - /* - * These fields change when we are used as a secondary index, so - * if the DB is a secondary, make sure they're set properly just - * in case we opened some cursors before we were associated. - * - * __dbc_get is used by all access methods, so this should be safe. - */ - if (F_ISSET(dbp, DB_AM_SECONDARY)) - dbc->get = dbc->c_get = __dbc_secondary_get_pp; - - if (LF_ISSET(DB_CURSOR_BULK) && dbtype == DB_BTREE) - F_SET(dbc, DBC_BULK); - if (LF_ISSET(DB_CURSOR_TRANSIENT)) - F_SET(dbc, DBC_TRANSIENT); - if (LF_ISSET(DBC_OPD)) - F_SET(dbc, DBC_OPD); - if (F_ISSET(dbp, DB_AM_RECOVER)) - F_SET(dbc, DBC_RECOVER); - if (F_ISSET(dbp, DB_AM_COMPENSATE)) - F_SET(dbc, DBC_DONTLOCK); -#ifdef HAVE_REPLICATION - /* - * If we are replicating from a down rev version then we must - * use old locking protocols. - */ - if (LOGGING_ON(env) && - ((LOG *)env->lg_handle-> - reginfo.primary)->persist.version < DB_LOGVERSION_LATCHING) - F_SET(dbc, DBC_DOWNREV); -#endif - - /* Refresh the DBC internal structure. */ - cp = dbc->internal; - cp->opd = NULL; - cp->pdbc = NULL; - - cp->indx = 0; - cp->page = NULL; - cp->pgno = PGNO_INVALID; - cp->root = root; - cp->stream_start_pgno = cp->stream_curr_pgno = PGNO_INVALID; - cp->stream_off = 0; - - if (DB_IS_PARTITIONED(dbp)) { - DBC_PART_REFRESH(dbc); - } else switch (dbtype) { - case DB_BTREE: - case DB_RECNO: - if ((ret = __bamc_refresh(dbc)) != 0) - goto err; - break; - case DB_HASH: - case DB_QUEUE: - break; - case DB_UNKNOWN: - default: - ret = __db_unknown_type(env, "DB->cursor", dbp->type); - goto err; - } - - /* - * The transaction keeps track of how many cursors were opened within - * it to catch application errors where the cursor isn't closed when - * the transaction is resolved. - */ - if (txn != NULL) - ++txn->cursors; - if (ip != NULL) - dbc->thread_info = ip; - else if (txn != NULL) - dbc->thread_info = txn->thread_info; - else - ENV_GET_THREAD_INFO(env, dbc->thread_info); - - MUTEX_LOCK(env, dbp->mutex); - TAILQ_INSERT_TAIL(&dbp->active_queue, dbc, links); - F_SET(dbc, DBC_ACTIVE); - MUTEX_UNLOCK(env, dbp->mutex); - - *dbcp = dbc; - return (0); - -err: if (allocated) - __os_free(env, dbc); - return (ret); -} - -/* - * __db_put -- - * Store a key/data pair. - * - * PUBLIC: int __db_put __P((DB *, - * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t)); - */ -int -__db_put(dbp, ip, txn, key, data, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - DBT *key, *data; - u_int32_t flags; -{ - DBC *dbc; - DBT tdata, tkey; - ENV *env; - void *bulk_kptr, *bulk_ptr; - db_recno_t recno; - u_int32_t cursor_flags; - int ret, t_ret; - - env = dbp->env; - - /* - * See the comment in __db_get() regarding DB_CURSOR_TRANSIENT. - * - * Note that the get in the DB_NOOVERWRITE case is safe to do with this - * flag set; if it errors in any way other than DB_NOTFOUND, we're - * going to close the cursor without doing anything else, and if it - * returns DB_NOTFOUND then it's safe to do a c_put(DB_KEYLAST) even if - * an access method moved the cursor, since that's not - * position-dependent. - */ - cursor_flags = DB_WRITELOCK; - if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) - cursor_flags |= DB_CURSOR_BULK; - else - cursor_flags |= DB_CURSOR_TRANSIENT; - if ((ret = __db_cursor(dbp, ip, txn, &dbc, cursor_flags)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, txn, "DB->put", key, data, flags); - - SET_RET_MEM(dbc, dbp); - - if (flags == DB_APPEND && !DB_IS_PRIMARY(dbp)) { - /* - * If there is an append callback, the value stored in - * data->data may be replaced and then freed. To avoid - * passing a freed pointer back to the user, just operate - * on a copy of the data DBT. - */ - tdata = *data; - - /* - * Append isn't a normal put operation; call the appropriate - * access method's append function. - */ - switch (dbp->type) { - case DB_QUEUE: - if ((ret = __qam_append(dbc, key, &tdata)) != 0) - goto err; - break; - case DB_RECNO: - if ((ret = __ram_append(dbc, key, &tdata)) != 0) - goto err; - break; - case DB_BTREE: - case DB_HASH: - case DB_UNKNOWN: - default: - /* The interface should prevent this. */ - DB_ASSERT(env, - dbp->type == DB_QUEUE || dbp->type == DB_RECNO); - - ret = __db_ferr(env, "DB->put", 0); - goto err; - } - - /* - * The append callback, if one exists, may have allocated - * a new tdata.data buffer. If so, free it. - */ - FREE_IF_NEEDED(env, &tdata); - - /* No need for a cursor put; we're done. */ -#ifdef HAVE_COMPRESSION - } else if (DB_IS_COMPRESSED(dbp) && !F_ISSET(dbp, DB_AM_SECONDARY) && - !DB_IS_PRIMARY(dbp) && LIST_FIRST(&dbp->f_primaries) == NULL) { - ret = __dbc_put(dbc, key, data, flags); -#endif - } else if (LF_ISSET(DB_MULTIPLE)) { - ret = 0; - memset(&tkey, 0, sizeof(tkey)); - if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) { - tkey.data = &recno; - tkey.size = sizeof(recno); - } - memset(&tdata, 0, sizeof(tdata)); - DB_MULTIPLE_INIT(bulk_kptr, key); - DB_MULTIPLE_INIT(bulk_ptr, data); - key->doff = 0; - while (ret == 0) { - if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) - DB_MULTIPLE_RECNO_NEXT(bulk_kptr, key, - recno, tdata.data, tdata.size); - else - DB_MULTIPLE_NEXT(bulk_kptr, key, - tkey.data, tkey.size); - DB_MULTIPLE_NEXT(bulk_ptr, data, - tdata.data, tdata.size); - if (bulk_kptr == NULL || bulk_ptr == NULL) - break; - ret = __dbc_put(dbc, &tkey, &tdata, - LF_ISSET(DB_OPFLAGS_MASK)); - if (ret == 0) - ++key->doff; - } - } else if (LF_ISSET(DB_MULTIPLE_KEY)) { - ret = 0; - memset(&tkey, 0, sizeof(tkey)); - if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) { - tkey.data = &recno; - tkey.size = sizeof(recno); - } - memset(&tdata, 0, sizeof(tdata)); - DB_MULTIPLE_INIT(bulk_ptr, key); - while (ret == 0) { - if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) - DB_MULTIPLE_RECNO_NEXT(bulk_ptr, key, recno, - tdata.data, tdata.size); - else - DB_MULTIPLE_KEY_NEXT(bulk_ptr, key, tkey.data, - tkey.size, tdata.data, tdata.size); - if (bulk_ptr == NULL) - break; - ret = __dbc_put(dbc, &tkey, &tdata, - LF_ISSET(DB_OPFLAGS_MASK)); - if (ret == 0) - ++key->doff; - } - } else - ret = __dbc_put(dbc, key, data, flags); - -err: /* Close the cursor. */ - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_del -- - * Delete the items referenced by a key. - * - * PUBLIC: int __db_del __P((DB *, - * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, u_int32_t)); - */ -int -__db_del(dbp, ip, txn, key, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - DBT *key; - u_int32_t flags; -{ - DBC *dbc; - DBT data, tkey; - void *bulk_ptr; - db_recno_t recno; - u_int32_t cursor_flags, f_init, f_next; - int ret, t_ret; - - COMPQUIET(bulk_ptr, NULL); - /* Allocate a cursor. */ - cursor_flags = DB_WRITELOCK; - if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) - cursor_flags |= DB_CURSOR_BULK; - if ((ret = __db_cursor(dbp, ip, txn, &dbc, cursor_flags)) != 0) - goto err; - - DEBUG_LWRITE(dbc, txn, "DB->del", key, NULL, flags); - -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(dbp) && !F_ISSET(dbp, DB_AM_SECONDARY) && - !DB_IS_PRIMARY(dbp) && LIST_FIRST(&dbp->f_primaries) == NULL) { - F_SET(dbc, DBC_TRANSIENT); - ret = __dbc_bulk_del(dbc, key, flags); - goto err; - } -#endif - - /* - * Walk a cursor through the key/data pairs, deleting as we go. Set - * the DB_DBT_USERMEM flag, as this might be a threaded application - * and the flags checking will catch us. We don't actually want the - * keys or data, set DB_DBT_ISSET. We rely on __dbc_get to clear - * this. - */ - memset(&data, 0, sizeof(data)); - F_SET(&data, DB_DBT_USERMEM); - tkey = *key; - - f_init = LF_ISSET(DB_MULTIPLE_KEY) ? DB_GET_BOTH : DB_SET; - f_next = DB_NEXT_DUP; - - /* - * If locking (and we haven't already acquired CDB locks), set the - * read-modify-write flag. - */ - if (STD_LOCKING(dbc)) { - f_init |= DB_RMW; - f_next |= DB_RMW; - } - - if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { - if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) { - memset(&tkey, 0, sizeof(tkey)); - tkey.data = &recno; - tkey.size = sizeof(recno); - } - DB_MULTIPLE_INIT(bulk_ptr, key); - /* We return the number of keys deleted in doff. */ - key->doff = 0; -bulk_next: if (dbp->type == DB_QUEUE || dbp->type == DB_RECNO) - DB_MULTIPLE_RECNO_NEXT(bulk_ptr, key, - recno, data.data, data.size); - else if (LF_ISSET(DB_MULTIPLE)) - DB_MULTIPLE_NEXT(bulk_ptr, key, tkey.data, tkey.size); - else - DB_MULTIPLE_KEY_NEXT(bulk_ptr, key, - tkey.data, tkey.size, data.data, data.size); - if (bulk_ptr == NULL) - goto err; - } - - /* We're not interested in the data -- do not return it. */ - F_SET(&tkey, DB_DBT_ISSET); - F_SET(&data, DB_DBT_ISSET); - - /* - * Optimize the simple cases. For all AMs if we don't have secondaries - * and are not a secondary and we aren't a foreign database and there - * are no dups then we can avoid a bunch of overhead. For queue we - * don't need to fetch the record since we delete by direct calculation - * from the record number. - * - * Hash permits an optimization in DB->del: since on-page duplicates are - * stored in a single HKEYDATA structure, it's possible to delete an - * entire set of them at once, and as the HKEYDATA has to be rebuilt - * and re-put each time it changes, this is much faster than deleting - * the duplicates one by one. Thus, if not pointing at an off-page - * duplicate set, and we're not using secondary indices (in which case - * we'd have to examine the items one by one anyway), let hash do this - * "quick delete". - * - * !!! - * Note that this is the only application-executed delete call in - * Berkeley DB that does not go through the __dbc_del function. - * If anything other than the delete itself (like a secondary index - * update) has to happen there in a particular situation, the - * conditions here should be modified not to use these optimizations. - * The ordinary AM-independent alternative will work just fine; - * it'll just be slower. - */ - if (!F_ISSET(dbp, DB_AM_SECONDARY) && !DB_IS_PRIMARY(dbp) && - LIST_FIRST(&dbp->f_primaries) == NULL) { -#ifdef HAVE_QUEUE - if (dbp->type == DB_QUEUE) { - ret = __qam_delete(dbc, &tkey, flags); - goto next; - } -#endif - - /* Fetch the first record. */ - if ((ret = __dbc_get(dbc, &tkey, &data, f_init)) != 0) - goto err; - -#ifdef HAVE_HASH - /* - * Hash "quick delete" removes all on-page duplicates. We - * can't do that if deleting specific key/data pairs. - */ - if (dbp->type == DB_HASH && !LF_ISSET(DB_MULTIPLE_KEY)) { - DBC *sdbc; - sdbc = dbc; -#ifdef HAVE_PARTITION - if (F_ISSET(dbc, DBC_PARTITIONED)) - sdbc = - ((PART_CURSOR*)dbc->internal)->sub_cursor; -#endif - if (sdbc->internal->opd == NULL) { - ret = __ham_quick_delete(sdbc); - goto next; - } - } -#endif - - if (!F_ISSET(dbp, DB_AM_DUP)) { - ret = dbc->am_del(dbc, 0); - goto next; - } - } else if ((ret = __dbc_get(dbc, &tkey, &data, f_init)) != 0) - goto err; - - /* Walk through the set of key/data pairs, deleting as we go. */ - for (;;) { - if ((ret = __dbc_del(dbc, flags)) != 0) - break; - /* - * With DB_MULTIPLE_KEY, the application has specified the - * exact records they want deleted. We don't need to walk - * through a set of duplicates. - */ - if (LF_ISSET(DB_MULTIPLE_KEY)) - break; - - F_SET(&tkey, DB_DBT_ISSET); - F_SET(&data, DB_DBT_ISSET); - if ((ret = __dbc_get(dbc, &tkey, &data, f_next)) != 0) { - if (ret == DB_NOTFOUND) - ret = 0; - break; - } - } - -next: if (ret == 0 && LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { - ++key->doff; - goto bulk_next; - } -err: /* Discard the cursor. */ - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_sync -- - * Flush the database cache. - * - * PUBLIC: int __db_sync __P((DB *)); - */ -int -__db_sync(dbp) - DB *dbp; -{ - int ret, t_ret; - - ret = 0; - - /* If the database was read-only, we're done. */ - if (F_ISSET(dbp, DB_AM_RDONLY)) - return (0); - - /* If it's a Recno tree, write the backing source text file. */ - if (dbp->type == DB_RECNO) - ret = __ram_writeback(dbp); - - /* If the database was never backed by a database file, we're done. */ - if (F_ISSET(dbp, DB_AM_INMEM)) - return (ret); -#ifdef HAVE_PARTITION - if (DB_IS_PARTITIONED(dbp)) - ret = __partition_sync(dbp); - else -#endif - if (dbp->type == DB_QUEUE) - ret = __qam_sync(dbp); - else - /* Flush any dirty pages from the cache to the backing file. */ - if ((t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_associate -- - * Associate another database as a secondary index to this one. - * - * PUBLIC: int __db_associate __P((DB *, DB_THREAD_INFO *, DB_TXN *, DB *, - * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); - */ -int -__db_associate(dbp, ip, txn, sdbp, callback, flags) - DB *dbp, *sdbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); - u_int32_t flags; -{ - DBC *pdbc, *sdbc; - DBT key, data, skey, *tskeyp; - ENV *env; - int build, ret, t_ret; - u_int32_t nskey; - - env = dbp->env; - pdbc = sdbc = NULL; - ret = 0; - - memset(&skey, 0, sizeof(DBT)); - nskey = 0; - tskeyp = NULL; - - /* - * Check to see if the secondary is empty -- and thus if we should - * build it -- before we link it in and risk making it show up in other - * threads. Do this first so that the databases remain unassociated on - * error. - */ - build = 0; - if (LF_ISSET(DB_CREATE)) { - if ((ret = __db_cursor(sdbp, ip, txn, &sdbc, 0)) != 0) - goto err; - - /* - * We don't care about key or data; we're just doing - * an existence check. - */ - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - F_SET(&key, DB_DBT_PARTIAL | DB_DBT_USERMEM); - F_SET(&data, DB_DBT_PARTIAL | DB_DBT_USERMEM); - if ((ret = __dbc_get(sdbc, &key, &data, - (STD_LOCKING(sdbc) ? DB_RMW : 0) | - DB_FIRST)) == DB_NOTFOUND) { - build = 1; - ret = 0; - } - - if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) - ret = t_ret; - - /* Reset for later error check. */ - sdbc = NULL; - - if (ret != 0) - goto err; - } - - /* - * Set up the database handle as a secondary. - */ - sdbp->s_callback = callback; - sdbp->s_primary = dbp; - - sdbp->stored_get = sdbp->get; - sdbp->get = __db_secondary_get; - - sdbp->stored_close = sdbp->close; - sdbp->close = __db_secondary_close_pp; - - F_SET(sdbp, DB_AM_SECONDARY); - - if (LF_ISSET(DB_IMMUTABLE_KEY)) - FLD_SET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY); - - /* - * Add the secondary to the list on the primary. Do it here - * so that we see any updates that occur while we're walking - * the primary. - */ - MUTEX_LOCK(env, dbp->mutex); - - /* See __db_s_next for an explanation of secondary refcounting. */ - DB_ASSERT(env, sdbp->s_refcnt == 0); - sdbp->s_refcnt = 1; - LIST_INSERT_HEAD(&dbp->s_secondaries, sdbp, s_links); - MUTEX_UNLOCK(env, dbp->mutex); - - if (build) { - /* - * We loop through the primary, putting each item we - * find into the new secondary. - * - * If we're using CDB, opening these two cursors puts us - * in a bit of a locking tangle: CDB locks are done on the - * primary, so that we stay deadlock-free, but that means - * that updating the secondary while we have a read cursor - * open on the primary will self-block. To get around this, - * we force the primary cursor to use the same locker ID - * as the secondary, so they won't conflict. This should - * be harmless even if we're not using CDB. - */ - if ((ret = __db_cursor(sdbp, ip, txn, &sdbc, - CDB_LOCKING(sdbp->env) ? DB_WRITECURSOR : 0)) != 0) - goto err; - if ((ret = __db_cursor_int(dbp, ip, - txn, dbp->type, PGNO_INVALID, 0, sdbc->locker, &pdbc)) != 0) - goto err; - - /* Lock out other threads, now that we have a locker. */ - dbp->associate_locker = sdbc->locker; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - while ((ret = __dbc_get(pdbc, &key, &data, DB_NEXT)) == 0) { - if ((ret = callback(sdbp, &key, &data, &skey)) != 0) { - if (ret == DB_DONOTINDEX) - continue; - goto err; - } - if (F_ISSET(&skey, DB_DBT_MULTIPLE)) { -#ifdef DIAGNOSTIC - __db_check_skeyset(sdbp, &skey); -#endif - nskey = skey.size; - tskeyp = (DBT *)skey.data; - } else { - nskey = 1; - tskeyp = &skey; - } - SWAP_IF_NEEDED(sdbp, &key); - for (; nskey > 0; nskey--, tskeyp++) { - if ((ret = __dbc_put(sdbc, - tskeyp, &key, DB_UPDATE_SECONDARY)) != 0) - goto err; - FREE_IF_NEEDED(env, tskeyp); - } - SWAP_IF_NEEDED(sdbp, &key); - FREE_IF_NEEDED(env, &skey); - } - if (ret == DB_NOTFOUND) - ret = 0; - } - -err: if (sdbc != NULL && (t_ret = __dbc_close(sdbc)) != 0 && ret == 0) - ret = t_ret; - - if (pdbc != NULL && (t_ret = __dbc_close(pdbc)) != 0 && ret == 0) - ret = t_ret; - - dbp->associate_locker = NULL; - - for (; nskey > 0; nskey--, tskeyp++) - FREE_IF_NEEDED(env, tskeyp); - FREE_IF_NEEDED(env, &skey); - - return (ret); -} - -/* - * __db_secondary_get -- - * This wrapper function for DB->pget() is the DB->get() function - * on a database which has been made into a secondary index. - */ -static int -__db_secondary_get(sdbp, txn, skey, data, flags) - DB *sdbp; - DB_TXN *txn; - DBT *skey, *data; - u_int32_t flags; -{ - DB_ASSERT(sdbp->env, F_ISSET(sdbp, DB_AM_SECONDARY)); - return (__db_pget_pp(sdbp, txn, skey, NULL, data, flags)); -} - -/* - * __db_secondary_close -- - * Wrapper function for DB->close() which we use on secondaries to - * manage refcounting and make sure we don't close them underneath - * a primary that is updating. - * - * PUBLIC: int __db_secondary_close __P((DB *, u_int32_t)); - */ -int -__db_secondary_close(sdbp, flags) - DB *sdbp; - u_int32_t flags; -{ - DB *primary; - ENV *env; - int doclose; - - doclose = 0; - primary = sdbp->s_primary; - env = primary->env; - - MUTEX_LOCK(env, primary->mutex); - /* - * Check the refcount--if it was at 1 when we were called, no - * thread is currently updating this secondary through the primary, - * so it's safe to close it for real. - * - * If it's not safe to do the close now, we do nothing; the - * database will actually be closed when the refcount is decremented, - * which can happen in either __db_s_next or __db_s_done. - */ - DB_ASSERT(env, sdbp->s_refcnt != 0); - if (--sdbp->s_refcnt == 0) { - LIST_REMOVE(sdbp, s_links); - /* We don't want to call close while the mutex is held. */ - doclose = 1; - } - MUTEX_UNLOCK(env, primary->mutex); - - /* - * sdbp->close is this function; call the real one explicitly if - * need be. - */ - return (doclose ? __db_close(sdbp, NULL, flags) : 0); -} - -/* - * __db_associate_foreign -- - * Associate this database (fdbp) as a foreign constraint to another - * database (pdbp). That is, dbp's keys appear as foreign key values in - * pdbp. - * - * PUBLIC: int __db_associate_foreign __P((DB *, DB *, - * PUBLIC: int (*)(DB *, const DBT *, DBT *, const DBT *, int *), - * PUBLIC: u_int32_t)); - */ -int -__db_associate_foreign(fdbp, pdbp, callback, flags) - DB *fdbp, *pdbp; - int (*callback)(DB *, const DBT *, DBT *, const DBT *, int *); - u_int32_t flags; -{ - DB_FOREIGN_INFO *f_info; - ENV *env; - int ret; - - env = fdbp->env; - ret = 0; - - if ((ret = __os_malloc(env, sizeof(DB_FOREIGN_INFO), &f_info)) != 0) { - return ret; - } - memset(f_info, 0, sizeof(DB_FOREIGN_INFO)); - - f_info->dbp = pdbp; - f_info->callback = callback; - - /* - * It might be wise to filter this, but for now the flags only - * set the delete action type. - */ - FLD_SET(f_info->flags, flags); - - /* - * Add f_info to the foreign database's list of primaries. That is to - * say, fdbp->f_primaries lists all databases for which fdbp is a - * foreign constraint. - */ - MUTEX_LOCK(env, fdbp->mutex); - LIST_INSERT_HEAD(&fdbp->f_primaries, f_info, f_links); - MUTEX_UNLOCK(env, fdbp->mutex); - - /* - * Associate fdbp as pdbp's foreign db, for referential integrity - * checks. We don't allow the foreign db to be changed, because we - * currently have no way of removing pdbp from the old foreign db's list - * of primaries. - */ - if (pdbp->s_foreign != NULL) - return (EINVAL); - pdbp->s_foreign = fdbp; - - return (ret); -} - -static int -__dbc_set_priority(dbc, priority) - DBC *dbc; - DB_CACHE_PRIORITY priority; -{ - dbc->priority = priority; - return (0); -} - -static int -__dbc_get_priority(dbc, priority) - DBC *dbc; - DB_CACHE_PRIORITY *priority; -{ - *priority = dbc->priority; - return (0); -} diff --git a/db/db_auto.c b/db/db_auto.c deleted file mode 100644 index 2ce4199..0000000 --- a/db/db_auto.c +++ /dev/null @@ -1,3267 +0,0 @@ -/* Do not edit: automatically built by gen_rec.awk. */ - -#include "db_config.h" -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/db_page.h" -#include "dbinc/db_dispatch.h" -#include "dbinc/db_am.h" -#include "dbinc/log.h" -#include "dbinc/txn.h" - -/* - * PUBLIC: int __db_addrem_read __P((ENV *, DB **, void *, void *, - * PUBLIC: __db_addrem_args **)); - */ -int -__db_addrem_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_addrem_args **argpp; -{ - __db_addrem_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_addrem_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &argp->opcode, bp); - bp += sizeof(argp->opcode); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &argp->indx, bp); - bp += sizeof(argp->indx); - - LOGCOPY_32(env, &argp->nbytes, bp); - bp += sizeof(argp->nbytes); - - memset(&argp->hdr, 0, sizeof(argp->hdr)); - LOGCOPY_32(env,&argp->hdr.size, bp); - bp += sizeof(u_int32_t); - argp->hdr.data = bp; - bp += argp->hdr.size; - - memset(&argp->dbt, 0, sizeof(argp->dbt)); - LOGCOPY_32(env,&argp->dbt.size, bp); - bp += sizeof(u_int32_t); - argp->dbt.data = bp; - bp += argp->dbt.size; - - LOGCOPY_TOLSN(env, &argp->pagelsn, bp); - bp += sizeof(DB_LSN); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_addrem_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, u_int32_t, u_int32_t, - * PUBLIC: const DBT *, const DBT *, DB_LSN *)); - */ -int -__db_addrem_log(dbp, txnp, ret_lsnp, flags, - opcode, pgno, indx, nbytes, hdr, - dbt, pagelsn) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t opcode; - db_pgno_t pgno; - u_int32_t indx; - u_int32_t nbytes; - const DBT *hdr; - const DBT *dbt; - DB_LSN * pagelsn; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___db_addrem; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (hdr == NULL ? 0 : hdr->size) - + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size) - + sizeof(*pagelsn); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, bp, &opcode); - bp += sizeof(opcode); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - LOGCOPY_32(env, bp, &indx); - bp += sizeof(indx); - - LOGCOPY_32(env, bp, &nbytes); - bp += sizeof(nbytes); - - if (hdr == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &hdr->size); - bp += sizeof(hdr->size); - memcpy(bp, hdr->data, hdr->size); - bp += hdr->size; - } - - if (dbt == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &dbt->size); - bp += sizeof(dbt->size); - memcpy(bp, dbt->data, dbt->size); - bp += dbt->size; - } - - if (pagelsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, pagelsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, pagelsn); - } else - memset(bp, 0, sizeof(*pagelsn)); - bp += sizeof(*pagelsn); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_addrem_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_big_read __P((ENV *, DB **, void *, void *, - * PUBLIC: __db_big_args **)); - */ -int -__db_big_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_big_args **argpp; -{ - __db_big_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_big_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &argp->opcode, bp); - bp += sizeof(argp->opcode); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &uinttmp, bp); - argp->prev_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &uinttmp, bp); - argp->next_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->dbt, 0, sizeof(argp->dbt)); - LOGCOPY_32(env,&argp->dbt.size, bp); - bp += sizeof(u_int32_t); - argp->dbt.data = bp; - bp += argp->dbt.size; - - LOGCOPY_TOLSN(env, &argp->pagelsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_TOLSN(env, &argp->prevlsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_TOLSN(env, &argp->nextlsn, bp); - bp += sizeof(DB_LSN); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_big_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, u_int32_t, db_pgno_t, db_pgno_t, db_pgno_t, - * PUBLIC: const DBT *, DB_LSN *, DB_LSN *, DB_LSN *)); - */ -int -__db_big_log(dbp, txnp, ret_lsnp, flags, - opcode, pgno, prev_pgno, next_pgno, dbt, - pagelsn, prevlsn, nextlsn) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - u_int32_t opcode; - db_pgno_t pgno; - db_pgno_t prev_pgno; - db_pgno_t next_pgno; - const DBT *dbt; - DB_LSN * pagelsn; - DB_LSN * prevlsn; - DB_LSN * nextlsn; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___db_big; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (dbt == NULL ? 0 : dbt->size) - + sizeof(*pagelsn) - + sizeof(*prevlsn) - + sizeof(*nextlsn); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, bp, &opcode); - bp += sizeof(opcode); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)prev_pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)next_pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (dbt == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &dbt->size); - bp += sizeof(dbt->size); - memcpy(bp, dbt->data, dbt->size); - bp += dbt->size; - } - - if (pagelsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(pagelsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, pagelsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, pagelsn); - } else - memset(bp, 0, sizeof(*pagelsn)); - bp += sizeof(*pagelsn); - - if (prevlsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(prevlsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, prevlsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, prevlsn); - } else - memset(bp, 0, sizeof(*prevlsn)); - bp += sizeof(*prevlsn); - - if (nextlsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(nextlsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, nextlsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, nextlsn); - } else - memset(bp, 0, sizeof(*nextlsn)); - bp += sizeof(*nextlsn); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_big_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_ovref_read __P((ENV *, DB **, void *, void *, - * PUBLIC: __db_ovref_args **)); - */ -int -__db_ovref_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_ovref_args **argpp; -{ - __db_ovref_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_ovref_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &uinttmp, bp); - argp->adjust = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->lsn, bp); - bp += sizeof(DB_LSN); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_ovref_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, int32_t, DB_LSN *)); - */ -int -__db_ovref_log(dbp, txnp, ret_lsnp, flags, pgno, adjust, lsn) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t pgno; - int32_t adjust; - DB_LSN * lsn; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___db_ovref; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*lsn); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)adjust; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (lsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(lsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, lsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, lsn); - } else - memset(bp, 0, sizeof(*lsn)); - bp += sizeof(*lsn); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_ovref_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_relink_42_read __P((ENV *, DB **, void *, - * PUBLIC: void *, __db_relink_42_args **)); - */ -int -__db_relink_42_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_relink_42_args **argpp; -{ - __db_relink_42_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_relink_42_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &argp->opcode, bp); - bp += sizeof(argp->opcode); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->prev = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->lsn_prev, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->next = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->lsn_next, bp); - bp += sizeof(DB_LSN); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_debug_read __P((ENV *, void *, __db_debug_args **)); - */ -int -__db_debug_read(env, recbuf, argpp) - ENV *env; - void *recbuf; - __db_debug_args **argpp; -{ - __db_debug_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_debug_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - memset(&argp->op, 0, sizeof(argp->op)); - LOGCOPY_32(env,&argp->op.size, bp); - bp += sizeof(u_int32_t); - argp->op.data = bp; - bp += argp->op.size; - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->key, 0, sizeof(argp->key)); - LOGCOPY_32(env,&argp->key.size, bp); - bp += sizeof(u_int32_t); - argp->key.data = bp; - bp += argp->key.size; - - memset(&argp->data, 0, sizeof(argp->data)); - LOGCOPY_32(env,&argp->data.size, bp); - bp += sizeof(u_int32_t); - argp->data.data = bp; - bp += argp->data.size; - - LOGCOPY_32(env, &argp->arg_flags, bp); - bp += sizeof(argp->arg_flags); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_debug_log __P((ENV *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, const DBT *, int32_t, const DBT *, const DBT *, - * PUBLIC: u_int32_t)); - */ -int -__db_debug_log(env, txnp, ret_lsnp, flags, - op, fileid, key, data, arg_flags) - ENV *env; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - const DBT *op; - int32_t fileid; - const DBT *key; - const DBT *data; - u_int32_t arg_flags; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - rlsnp = ret_lsnp; - rectype = DB___db_debug; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) + (op == NULL ? 0 : op->size) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (key == NULL ? 0 : key->size) - + sizeof(u_int32_t) + (data == NULL ? 0 : data->size) - + sizeof(u_int32_t); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - if (op == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &op->size); - bp += sizeof(op->size); - memcpy(bp, op->data, op->size); - bp += op->size; - } - - uinttmp = (u_int32_t)fileid; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (key == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &key->size); - bp += sizeof(key->size); - memcpy(bp, key->data, key->size); - bp += key->size; - } - - if (data == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &data->size); - bp += sizeof(data->size); - memcpy(bp, data->data, data->size); - bp += data->size; - } - - LOGCOPY_32(env, bp, &arg_flags); - bp += sizeof(arg_flags); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_debug_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_noop_read __P((ENV *, DB **, void *, void *, - * PUBLIC: __db_noop_args **)); - */ -int -__db_noop_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_noop_args **argpp; -{ - __db_noop_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_noop_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->prevlsn, bp); - bp += sizeof(DB_LSN); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_noop_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *)); - */ -int -__db_noop_log(dbp, txnp, ret_lsnp, flags, pgno, prevlsn) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t pgno; - DB_LSN * prevlsn; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___db_noop; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*prevlsn); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (prevlsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(prevlsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, prevlsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, prevlsn); - } else - memset(bp, 0, sizeof(*prevlsn)); - bp += sizeof(*prevlsn); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_noop_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_pg_alloc_42_read __P((ENV *, DB **, void *, - * PUBLIC: void *, __db_pg_alloc_42_args **)); - */ -int -__db_pg_alloc_42_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_pg_alloc_42_args **argpp; -{ - __db_pg_alloc_42_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_pg_alloc_42_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_TOLSN(env, &argp->meta_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->meta_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->page_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &argp->ptype, bp); - bp += sizeof(argp->ptype); - - LOGCOPY_32(env, &uinttmp, bp); - argp->next = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_pg_alloc_read __P((ENV *, DB **, void *, void *, - * PUBLIC: __db_pg_alloc_args **)); - */ -int -__db_pg_alloc_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_pg_alloc_args **argpp; -{ - __db_pg_alloc_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_pg_alloc_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_TOLSN(env, &argp->meta_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->meta_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->page_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &argp->ptype, bp); - bp += sizeof(argp->ptype); - - LOGCOPY_32(env, &uinttmp, bp); - argp->next = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &uinttmp, bp); - argp->last_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_pg_alloc_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, DB_LSN *, db_pgno_t, DB_LSN *, db_pgno_t, u_int32_t, - * PUBLIC: db_pgno_t, db_pgno_t)); - */ -int -__db_pg_alloc_log(dbp, txnp, ret_lsnp, flags, meta_lsn, meta_pgno, page_lsn, pgno, ptype, - next, last_pgno) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - DB_LSN * meta_lsn; - db_pgno_t meta_pgno; - DB_LSN * page_lsn; - db_pgno_t pgno; - u_int32_t ptype; - db_pgno_t next; - db_pgno_t last_pgno; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___db_pg_alloc; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(*meta_lsn) - + sizeof(u_int32_t) - + sizeof(*page_lsn) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - if (meta_lsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(meta_lsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, meta_lsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, meta_lsn); - } else - memset(bp, 0, sizeof(*meta_lsn)); - bp += sizeof(*meta_lsn); - - uinttmp = (u_int32_t)meta_pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (page_lsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(page_lsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, page_lsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, page_lsn); - } else - memset(bp, 0, sizeof(*page_lsn)); - bp += sizeof(*page_lsn); - - uinttmp = (u_int32_t)pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - LOGCOPY_32(env, bp, &ptype); - bp += sizeof(ptype); - - uinttmp = (u_int32_t)next; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)last_pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_pg_alloc_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_pg_free_42_read __P((ENV *, DB **, void *, - * PUBLIC: void *, __db_pg_free_42_args **)); - */ -int -__db_pg_free_42_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_pg_free_42_args **argpp; -{ - __db_pg_free_42_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_pg_free_42_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->meta_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->meta_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->header, 0, sizeof(argp->header)); - LOGCOPY_32(env,&argp->header.size, bp); - bp += sizeof(u_int32_t); - argp->header.data = bp; - bp += argp->header.size; - - LOGCOPY_32(env, &uinttmp, bp); - argp->next = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_pg_free_read __P((ENV *, DB **, void *, void *, - * PUBLIC: __db_pg_free_args **)); - */ -int -__db_pg_free_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_pg_free_args **argpp; -{ - __db_pg_free_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_pg_free_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->meta_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->meta_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->header, 0, sizeof(argp->header)); - LOGCOPY_32(env,&argp->header.size, bp); - bp += sizeof(u_int32_t); - argp->header.data = bp; - bp += argp->header.size; - if (LOG_SWAPPED(env) && dbpp != NULL && *dbpp != NULL) { - int t_ret; - if ((t_ret = __db_pageswap(*dbpp, (PAGE *)argp->header.data, - (size_t)argp->header.size, NULL, 1)) != 0) - return (t_ret); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->next = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &uinttmp, bp); - argp->last_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_pg_free_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, const DBT *, - * PUBLIC: db_pgno_t, db_pgno_t)); - */ -int -__db_pg_free_log(dbp, txnp, ret_lsnp, flags, pgno, meta_lsn, meta_pgno, header, next, - last_pgno) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t pgno; - DB_LSN * meta_lsn; - db_pgno_t meta_pgno; - const DBT *header; - db_pgno_t next; - db_pgno_t last_pgno; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___db_pg_free; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*meta_lsn) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (header == NULL ? 0 : header->size) - + sizeof(u_int32_t) - + sizeof(u_int32_t); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (meta_lsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(meta_lsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, meta_lsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, meta_lsn); - } else - memset(bp, 0, sizeof(*meta_lsn)); - bp += sizeof(*meta_lsn); - - uinttmp = (u_int32_t)meta_pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (header == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &header->size); - bp += sizeof(header->size); - memcpy(bp, header->data, header->size); - if (LOG_SWAPPED(env)) - if ((ret = __db_pageswap(dbp, - (PAGE *)bp, (size_t)header->size, (DBT *)NULL, 0)) != 0) - return (ret); - bp += header->size; - } - - uinttmp = (u_int32_t)next; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)last_pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_pg_free_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_cksum_read __P((ENV *, void *, __db_cksum_args **)); - */ -int -__db_cksum_read(env, recbuf, argpp) - ENV *env; - void *recbuf; - __db_cksum_args **argpp; -{ - __db_cksum_args *argp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_cksum_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_cksum_log __P((ENV *, DB_TXN *, DB_LSN *, u_int32_t)); - */ -int -__db_cksum_log(env, txnp, ret_lsnp, flags) - ENV *env; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - u_int32_t rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - rlsnp = ret_lsnp; - rectype = DB___db_cksum; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_cksum_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_pg_freedata_42_read __P((ENV *, DB **, void *, - * PUBLIC: void *, __db_pg_freedata_42_args **)); - */ -int -__db_pg_freedata_42_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_pg_freedata_42_args **argpp; -{ - __db_pg_freedata_42_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_pg_freedata_42_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->meta_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->meta_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->header, 0, sizeof(argp->header)); - LOGCOPY_32(env,&argp->header.size, bp); - bp += sizeof(u_int32_t); - argp->header.data = bp; - bp += argp->header.size; - - LOGCOPY_32(env, &uinttmp, bp); - argp->next = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->data, 0, sizeof(argp->data)); - LOGCOPY_32(env,&argp->data.size, bp); - bp += sizeof(u_int32_t); - argp->data.data = bp; - bp += argp->data.size; - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_pg_freedata_read __P((ENV *, DB **, void *, - * PUBLIC: void *, __db_pg_freedata_args **)); - */ -int -__db_pg_freedata_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_pg_freedata_args **argpp; -{ - __db_pg_freedata_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_pg_freedata_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->meta_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->meta_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->header, 0, sizeof(argp->header)); - LOGCOPY_32(env,&argp->header.size, bp); - bp += sizeof(u_int32_t); - argp->header.data = bp; - bp += argp->header.size; - - LOGCOPY_32(env, &uinttmp, bp); - argp->next = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &uinttmp, bp); - argp->last_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->data, 0, sizeof(argp->data)); - LOGCOPY_32(env,&argp->data.size, bp); - bp += sizeof(u_int32_t); - argp->data.data = bp; - bp += argp->data.size; - if (LOG_SWAPPED(env) && dbpp != NULL && *dbpp != NULL) { - int t_ret; - if ((t_ret = __db_pageswap(*dbpp, - (PAGE *)argp->header.data, (size_t)argp->header.size, - &argp->data, 1)) != 0) - return (t_ret); - } - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_pg_freedata_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, const DBT *, - * PUBLIC: db_pgno_t, db_pgno_t, const DBT *)); - */ -int -__db_pg_freedata_log(dbp, txnp, ret_lsnp, flags, pgno, meta_lsn, meta_pgno, header, next, - last_pgno, data) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t pgno; - DB_LSN * meta_lsn; - db_pgno_t meta_pgno; - const DBT *header; - db_pgno_t next; - db_pgno_t last_pgno; - const DBT *data; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___db_pg_freedata; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*meta_lsn) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (header == NULL ? 0 : header->size) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (data == NULL ? 0 : data->size); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (meta_lsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(meta_lsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, meta_lsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, meta_lsn); - } else - memset(bp, 0, sizeof(*meta_lsn)); - bp += sizeof(*meta_lsn); - - uinttmp = (u_int32_t)meta_pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (header == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &header->size); - bp += sizeof(header->size); - memcpy(bp, header->data, header->size); - if (LOG_SWAPPED(env)) - if ((ret = __db_pageswap(dbp, - (PAGE *)bp, (size_t)header->size, (DBT *)data, 0)) != 0) - return (ret); - bp += header->size; - } - - uinttmp = (u_int32_t)next; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)last_pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (data == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &data->size); - bp += sizeof(data->size); - memcpy(bp, data->data, data->size); - if (LOG_SWAPPED(env) && F_ISSET(data, DB_DBT_APPMALLOC)) - __os_free(env, data->data); - bp += data->size; - } - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_pg_freedata_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_pg_init_read __P((ENV *, DB **, void *, void *, - * PUBLIC: __db_pg_init_args **)); - */ -int -__db_pg_init_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_pg_init_args **argpp; -{ - __db_pg_init_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_pg_init_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->header, 0, sizeof(argp->header)); - LOGCOPY_32(env,&argp->header.size, bp); - bp += sizeof(u_int32_t); - argp->header.data = bp; - bp += argp->header.size; - - memset(&argp->data, 0, sizeof(argp->data)); - LOGCOPY_32(env,&argp->data.size, bp); - bp += sizeof(u_int32_t); - argp->data.data = bp; - bp += argp->data.size; - if (LOG_SWAPPED(env) && dbpp != NULL && *dbpp != NULL) { - int t_ret; - if ((t_ret = __db_pageswap(*dbpp, - (PAGE *)argp->header.data, (size_t)argp->header.size, - &argp->data, 1)) != 0) - return (t_ret); - } - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_pg_init_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, const DBT *, const DBT *)); - */ -int -__db_pg_init_log(dbp, txnp, ret_lsnp, flags, pgno, header, data) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t pgno; - const DBT *header; - const DBT *data; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___db_pg_init; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (header == NULL ? 0 : header->size) - + sizeof(u_int32_t) + (data == NULL ? 0 : data->size); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (header == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &header->size); - bp += sizeof(header->size); - memcpy(bp, header->data, header->size); - if (LOG_SWAPPED(env)) - if ((ret = __db_pageswap(dbp, - (PAGE *)bp, (size_t)header->size, (DBT *)data, 0)) != 0) - return (ret); - bp += header->size; - } - - if (data == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &data->size); - bp += sizeof(data->size); - memcpy(bp, data->data, data->size); - if (LOG_SWAPPED(env) && F_ISSET(data, DB_DBT_APPMALLOC)) - __os_free(env, data->data); - bp += data->size; - } - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_pg_init_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_pg_sort_44_read __P((ENV *, DB **, void *, - * PUBLIC: void *, __db_pg_sort_44_args **)); - */ -int -__db_pg_sort_44_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_pg_sort_44_args **argpp; -{ - __db_pg_sort_44_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_pg_sort_44_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->meta = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->meta_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->last_free = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->last_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->last_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->list, 0, sizeof(argp->list)); - LOGCOPY_32(env,&argp->list.size, bp); - bp += sizeof(u_int32_t); - argp->list.data = bp; - bp += argp->list.size; - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_pg_trunc_read __P((ENV *, DB **, void *, void *, - * PUBLIC: __db_pg_trunc_args **)); - */ -int -__db_pg_trunc_read(env, dbpp, td, recbuf, argpp) - ENV *env; - DB **dbpp; - void *td; - void *recbuf; - __db_pg_trunc_args **argpp; -{ - __db_pg_trunc_args *argp; - u_int32_t uinttmp; - u_int8_t *bp; - int ret; - - if ((ret = __os_malloc(env, - sizeof(__db_pg_trunc_args) + sizeof(DB_TXN), &argp)) != 0) - return (ret); - bp = recbuf; - argp->txnp = (DB_TXN *)&argp[1]; - memset(argp->txnp, 0, sizeof(DB_TXN)); - - argp->txnp->td = td; - LOGCOPY_32(env, &argp->type, bp); - bp += sizeof(argp->type); - - LOGCOPY_32(env, &argp->txnp->txnid, bp); - bp += sizeof(argp->txnp->txnid); - - LOGCOPY_TOLSN(env, &argp->prev_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->fileid = (int32_t)uinttmp; - bp += sizeof(uinttmp); - if (dbpp != NULL) { - *dbpp = NULL; - ret = __dbreg_id_to_db( - env, argp->txnp, dbpp, argp->fileid, 1); - } - - LOGCOPY_32(env, &uinttmp, bp); - argp->meta = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->meta_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->last_free = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_TOLSN(env, &argp->last_lsn, bp); - bp += sizeof(DB_LSN); - - LOGCOPY_32(env, &uinttmp, bp); - argp->next_free = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - LOGCOPY_32(env, &uinttmp, bp); - argp->last_pgno = (db_pgno_t)uinttmp; - bp += sizeof(uinttmp); - - memset(&argp->list, 0, sizeof(argp->list)); - LOGCOPY_32(env,&argp->list.size, bp); - bp += sizeof(u_int32_t); - argp->list.data = bp; - bp += argp->list.size; - - *argpp = argp; - return (ret); -} - -/* - * PUBLIC: int __db_pg_trunc_log __P((DB *, DB_TXN *, DB_LSN *, - * PUBLIC: u_int32_t, db_pgno_t, DB_LSN *, db_pgno_t, DB_LSN *, db_pgno_t, - * PUBLIC: db_pgno_t, const DBT *)); - */ -int -__db_pg_trunc_log(dbp, txnp, ret_lsnp, flags, meta, meta_lsn, last_free, last_lsn, next_free, - last_pgno, list) - DB *dbp; - DB_TXN *txnp; - DB_LSN *ret_lsnp; - u_int32_t flags; - db_pgno_t meta; - DB_LSN * meta_lsn; - db_pgno_t last_free; - DB_LSN * last_lsn; - db_pgno_t next_free; - db_pgno_t last_pgno; - const DBT *list; -{ - DBT logrec; - DB_LSN *lsnp, null_lsn, *rlsnp; - DB_TXNLOGREC *lr; - ENV *env; - u_int32_t zero, uinttmp, rectype, txn_num; - u_int npad; - u_int8_t *bp; - int is_durable, ret; - - COMPQUIET(lr, NULL); - - env = dbp->env; - rlsnp = ret_lsnp; - rectype = DB___db_pg_trunc; - npad = 0; - ret = 0; - - if (LF_ISSET(DB_LOG_NOT_DURABLE) || - F_ISSET(dbp, DB_AM_NOT_DURABLE)) { - if (txnp == NULL) - return (0); - is_durable = 0; - } else - is_durable = 1; - - if (txnp == NULL) { - txn_num = 0; - lsnp = &null_lsn; - null_lsn.file = null_lsn.offset = 0; - } else { - if (TAILQ_FIRST(&txnp->kids) != NULL && - (ret = __txn_activekids(env, rectype, txnp)) != 0) - return (ret); - /* - * We need to assign begin_lsn while holding region mutex. - * That assignment is done inside the DbEnv->log_put call, - * so pass in the appropriate memory location to be filled - * in by the log_put code. - */ - DB_SET_TXN_LSNP(txnp, &rlsnp, &lsnp); - txn_num = txnp->txnid; - } - - DB_ASSERT(env, dbp->log_filename != NULL); - if (dbp->log_filename->id == DB_LOGFILEID_INVALID && - (ret = __dbreg_lazy_id(dbp)) != 0) - return (ret); - - logrec.size = sizeof(rectype) + sizeof(txn_num) + sizeof(DB_LSN) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(*meta_lsn) - + sizeof(u_int32_t) - + sizeof(*last_lsn) - + sizeof(u_int32_t) - + sizeof(u_int32_t) - + sizeof(u_int32_t) + (list == NULL ? 0 : list->size); - if (CRYPTO_ON(env)) { - npad = env->crypto_handle->adj_size(logrec.size); - logrec.size += npad; - } - - if (is_durable || txnp == NULL) { - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) - return (ret); - } else { - if ((ret = __os_malloc(env, - logrec.size + sizeof(DB_TXNLOGREC), &lr)) != 0) - return (ret); -#ifdef DIAGNOSTIC - if ((ret = - __os_malloc(env, logrec.size, &logrec.data)) != 0) { - __os_free(env, lr); - return (ret); - } -#else - logrec.data = lr->data; -#endif - } - if (npad > 0) - memset((u_int8_t *)logrec.data + logrec.size - npad, 0, npad); - - bp = logrec.data; - - LOGCOPY_32(env, bp, &rectype); - bp += sizeof(rectype); - - LOGCOPY_32(env, bp, &txn_num); - bp += sizeof(txn_num); - - LOGCOPY_FROMLSN(env, bp, lsnp); - bp += sizeof(DB_LSN); - - uinttmp = (u_int32_t)dbp->log_filename->id; - LOGCOPY_32(env, bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)meta; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (meta_lsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(meta_lsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, meta_lsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, meta_lsn); - } else - memset(bp, 0, sizeof(*meta_lsn)); - bp += sizeof(*meta_lsn); - - uinttmp = (u_int32_t)last_free; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (last_lsn != NULL) { - if (txnp != NULL) { - LOG *lp = env->lg_handle->reginfo.primary; - if (LOG_COMPARE(last_lsn, &lp->lsn) >= 0 && (ret = - __log_check_page_lsn(env, dbp, last_lsn)) != 0) - return (ret); - } - LOGCOPY_FROMLSN(env, bp, last_lsn); - } else - memset(bp, 0, sizeof(*last_lsn)); - bp += sizeof(*last_lsn); - - uinttmp = (u_int32_t)next_free; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - uinttmp = (u_int32_t)last_pgno; - LOGCOPY_32(env,bp, &uinttmp); - bp += sizeof(uinttmp); - - if (list == NULL) { - zero = 0; - LOGCOPY_32(env, bp, &zero); - bp += sizeof(u_int32_t); - } else { - LOGCOPY_32(env, bp, &list->size); - bp += sizeof(list->size); - memcpy(bp, list->data, list->size); - bp += list->size; - } - - DB_ASSERT(env, - (u_int32_t)(bp - (u_int8_t *)logrec.data) <= logrec.size); - - if (is_durable || txnp == NULL) { - if ((ret = __log_put(env, rlsnp,(DBT *)&logrec, - flags | DB_LOG_NOCOPY)) == 0 && txnp != NULL) { - *lsnp = *rlsnp; - if (rlsnp != ret_lsnp) - *ret_lsnp = *rlsnp; - } - } else { - ret = 0; -#ifdef DIAGNOSTIC - /* - * Set the debug bit if we are going to log non-durable - * transactions so they will be ignored by recovery. - */ - memcpy(lr->data, logrec.data, logrec.size); - rectype |= DB_debug_FLAG; - LOGCOPY_32(env, logrec.data, &rectype); - - if (!IS_REP_CLIENT(env)) - ret = __log_put(env, - rlsnp, (DBT *)&logrec, flags | DB_LOG_NOCOPY); -#endif - STAILQ_INSERT_HEAD(&txnp->logs, lr, links); - F_SET((TXN_DETAIL *)txnp->td, TXN_DTL_INMEMORY); - LSN_NOT_LOGGED(*ret_lsnp); - } - -#ifdef LOG_DIAGNOSTIC - if (ret != 0) - (void)__db_pg_trunc_print(env, - (DBT *)&logrec, ret_lsnp, DB_TXN_PRINT, NULL); -#endif - -#ifdef DIAGNOSTIC - __os_free(env, logrec.data); -#else - if (is_durable || txnp == NULL) - __os_free(env, logrec.data); -#endif - return (ret); -} - -/* - * PUBLIC: int __db_init_recover __P((ENV *, DB_DISTAB *)); - */ -int -__db_init_recover(env, dtabp) - ENV *env; - DB_DISTAB *dtabp; -{ - int ret; - - if ((ret = __db_add_recovery_int(env, dtabp, - __db_addrem_recover, DB___db_addrem)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_big_recover, DB___db_big)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_ovref_recover, DB___db_ovref)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_debug_recover, DB___db_debug)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_noop_recover, DB___db_noop)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_alloc_recover, DB___db_pg_alloc)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_free_recover, DB___db_pg_free)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_cksum_recover, DB___db_cksum)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_freedata_recover, DB___db_pg_freedata)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_init_recover, DB___db_pg_init)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_trunc_recover, DB___db_pg_trunc)) != 0) - return (ret); - return (0); -} diff --git a/db/db_autop.c b/db/db_autop.c deleted file mode 100644 index f3b0635..0000000 --- a/db/db_autop.c +++ /dev/null @@ -1,802 +0,0 @@ -/* Do not edit: automatically built by gen_rec.awk. */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/db_page.h" -#include "dbinc/db_dispatch.h" -#include "dbinc/db_am.h" -#include "dbinc/log.h" -#include "dbinc/txn.h" - -/* - * PUBLIC: int __db_addrem_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_addrem_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_addrem_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_addrem_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_addrem%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\topcode: %lu\n", (u_long)argp->opcode); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tindx: %lu\n", (u_long)argp->indx); - (void)printf("\tnbytes: %lu\n", (u_long)argp->nbytes); - (void)printf("\thdr: "); - for (i = 0; i < argp->hdr.size; i++) { - ch = ((u_int8_t *)argp->hdr.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tdbt: "); - for (i = 0; i < argp->dbt.size; i++) { - ch = ((u_int8_t *)argp->dbt.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tpagelsn: [%lu][%lu]\n", - (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_big_print __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_big_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_big_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_big_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_big%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\topcode: %lu\n", (u_long)argp->opcode); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tprev_pgno: %lu\n", (u_long)argp->prev_pgno); - (void)printf("\tnext_pgno: %lu\n", (u_long)argp->next_pgno); - (void)printf("\tdbt: "); - for (i = 0; i < argp->dbt.size; i++) { - ch = ((u_int8_t *)argp->dbt.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tpagelsn: [%lu][%lu]\n", - (u_long)argp->pagelsn.file, (u_long)argp->pagelsn.offset); - (void)printf("\tprevlsn: [%lu][%lu]\n", - (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset); - (void)printf("\tnextlsn: [%lu][%lu]\n", - (u_long)argp->nextlsn.file, (u_long)argp->nextlsn.offset); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_ovref_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_ovref_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_ovref_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_ovref_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_ovref%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tadjust: %ld\n", (long)argp->adjust); - (void)printf("\tlsn: [%lu][%lu]\n", - (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_relink_42_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_relink_42_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_relink_42_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_relink_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_relink_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\topcode: %lu\n", (u_long)argp->opcode); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tlsn: [%lu][%lu]\n", - (u_long)argp->lsn.file, (u_long)argp->lsn.offset); - (void)printf("\tprev: %lu\n", (u_long)argp->prev); - (void)printf("\tlsn_prev: [%lu][%lu]\n", - (u_long)argp->lsn_prev.file, (u_long)argp->lsn_prev.offset); - (void)printf("\tnext: %lu\n", (u_long)argp->next); - (void)printf("\tlsn_next: [%lu][%lu]\n", - (u_long)argp->lsn_next.file, (u_long)argp->lsn_next.offset); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_debug_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_debug_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_debug_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __db_debug_read(env, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_debug%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\top: "); - for (i = 0; i < argp->op.size; i++) { - ch = ((u_int8_t *)argp->op.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tkey: "); - for (i = 0; i < argp->key.size; i++) { - ch = ((u_int8_t *)argp->key.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tdata: "); - for (i = 0; i < argp->data.size; i++) { - ch = ((u_int8_t *)argp->data.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\targ_flags: %lu\n", (u_long)argp->arg_flags); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_noop_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_noop_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_noop_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_noop_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_noop%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tprevlsn: [%lu][%lu]\n", - (u_long)argp->prevlsn.file, (u_long)argp->prevlsn.offset); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_pg_alloc_42_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_pg_alloc_42_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_pg_alloc_42_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_pg_alloc_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_pg_alloc_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); - (void)printf("\tpage_lsn: [%lu][%lu]\n", - (u_long)argp->page_lsn.file, (u_long)argp->page_lsn.offset); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tptype: %lu\n", (u_long)argp->ptype); - (void)printf("\tnext: %lu\n", (u_long)argp->next); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_pg_alloc_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_pg_alloc_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_pg_alloc_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_pg_alloc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_pg_alloc%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); - (void)printf("\tpage_lsn: [%lu][%lu]\n", - (u_long)argp->page_lsn.file, (u_long)argp->page_lsn.offset); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tptype: %lu\n", (u_long)argp->ptype); - (void)printf("\tnext: %lu\n", (u_long)argp->next); - (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_pg_free_42_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_pg_free_42_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_pg_free_42_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_pg_free_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_pg_free_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); - (void)printf("\theader: "); - for (i = 0; i < argp->header.size; i++) { - ch = ((u_int8_t *)argp->header.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tnext: %lu\n", (u_long)argp->next); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_pg_free_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_pg_free_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_pg_free_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_pg_free_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_pg_free%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); - (void)printf("\theader: "); - for (i = 0; i < argp->header.size; i++) { - ch = ((u_int8_t *)argp->header.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tnext: %lu\n", (u_long)argp->next); - (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_cksum_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_cksum_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_cksum_args *argp; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_cksum%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_pg_freedata_42_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_pg_freedata_42_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_pg_freedata_42_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_pg_freedata_42_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_pg_freedata_42%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); - (void)printf("\theader: "); - for (i = 0; i < argp->header.size; i++) { - ch = ((u_int8_t *)argp->header.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tnext: %lu\n", (u_long)argp->next); - (void)printf("\tdata: "); - for (i = 0; i < argp->data.size; i++) { - ch = ((u_int8_t *)argp->data.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_pg_freedata_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_pg_freedata_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_pg_freedata_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_pg_freedata_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_pg_freedata%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tmeta_pgno: %lu\n", (u_long)argp->meta_pgno); - (void)printf("\theader: "); - for (i = 0; i < argp->header.size; i++) { - ch = ((u_int8_t *)argp->header.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tnext: %lu\n", (u_long)argp->next); - (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno); - (void)printf("\tdata: "); - for (i = 0; i < argp->data.size; i++) { - ch = ((u_int8_t *)argp->data.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_pg_init_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_pg_init_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_pg_init_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_pg_init_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_pg_init%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tpgno: %lu\n", (u_long)argp->pgno); - (void)printf("\theader: "); - for (i = 0; i < argp->header.size; i++) { - ch = ((u_int8_t *)argp->header.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\tdata: "); - for (i = 0; i < argp->data.size; i++) { - ch = ((u_int8_t *)argp->data.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_pg_sort_44_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_pg_sort_44_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_pg_sort_44_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_pg_sort_44_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_pg_sort_44%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tmeta: %lu\n", (u_long)argp->meta); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tlast_free: %lu\n", (u_long)argp->last_free); - (void)printf("\tlast_lsn: [%lu][%lu]\n", - (u_long)argp->last_lsn.file, (u_long)argp->last_lsn.offset); - (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno); - (void)printf("\tlist: "); - for (i = 0; i < argp->list.size; i++) { - ch = ((u_int8_t *)argp->list.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_pg_trunc_print __P((ENV *, DBT *, DB_LSN *, - * PUBLIC: db_recops, void *)); - */ -int -__db_pg_trunc_print(env, dbtp, lsnp, notused2, notused3) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops notused2; - void *notused3; -{ - __db_pg_trunc_args *argp; - u_int32_t i; - int ch; - int ret; - - notused2 = DB_TXN_PRINT; - notused3 = NULL; - - if ((ret = - __db_pg_trunc_read(env, NULL, NULL, dbtp->data, &argp)) != 0) - return (ret); - (void)printf( - "[%lu][%lu]__db_pg_trunc%s: rec: %lu txnp %lx prevlsn [%lu][%lu]\n", - (u_long)lsnp->file, (u_long)lsnp->offset, - (argp->type & DB_debug_FLAG) ? "_debug" : "", - (u_long)argp->type, - (u_long)argp->txnp->txnid, - (u_long)argp->prev_lsn.file, (u_long)argp->prev_lsn.offset); - (void)printf("\tfileid: %ld\n", (long)argp->fileid); - (void)printf("\tmeta: %lu\n", (u_long)argp->meta); - (void)printf("\tmeta_lsn: [%lu][%lu]\n", - (u_long)argp->meta_lsn.file, (u_long)argp->meta_lsn.offset); - (void)printf("\tlast_free: %lu\n", (u_long)argp->last_free); - (void)printf("\tlast_lsn: [%lu][%lu]\n", - (u_long)argp->last_lsn.file, (u_long)argp->last_lsn.offset); - (void)printf("\tnext_free: %lu\n", (u_long)argp->next_free); - (void)printf("\tlast_pgno: %lu\n", (u_long)argp->last_pgno); - (void)printf("\tlist: "); - for (i = 0; i < argp->list.size; i++) { - ch = ((u_int8_t *)argp->list.data)[i]; - printf(isprint(ch) || ch == 0x0a ? "%c" : "%#x ", ch); - } - (void)printf("\n"); - (void)printf("\n"); - __os_free(env, argp); - return (0); -} - -/* - * PUBLIC: int __db_init_print __P((ENV *, DB_DISTAB *)); - */ -int -__db_init_print(env, dtabp) - ENV *env; - DB_DISTAB *dtabp; -{ - int ret; - - if ((ret = __db_add_recovery_int(env, dtabp, - __db_addrem_print, DB___db_addrem)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_big_print, DB___db_big)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_ovref_print, DB___db_ovref)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_debug_print, DB___db_debug)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_noop_print, DB___db_noop)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_alloc_print, DB___db_pg_alloc)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_free_print, DB___db_pg_free)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_cksum_print, DB___db_cksum)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_freedata_print, DB___db_pg_freedata)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_init_print, DB___db_pg_init)) != 0) - return (ret); - if ((ret = __db_add_recovery_int(env, dtabp, - __db_pg_trunc_print, DB___db_pg_trunc)) != 0) - return (ret); - return (0); -} diff --git a/db/db_cam.c b/db/db_cam.c deleted file mode 100644 index 4c1322d..0000000 --- a/db/db_cam.c +++ /dev/null @@ -1,3460 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/mp.h" -#include "dbinc/partition.h" -#include "dbinc/qam.h" -#include "dbinc/txn.h" - -static int __db_s_count __P((DB *)); -static int __db_wrlock_err __P((ENV *)); -static int __dbc_del_foreign __P((DBC *)); -static int __dbc_del_oldskey __P((DB *, DBC *, DBT *, DBT *, DBT *)); -static int __dbc_del_secondary __P((DBC *)); -static int __dbc_pget_recno __P((DBC *, DBT *, DBT *, u_int32_t)); -static inline int __dbc_put_append __P((DBC *, - DBT *, DBT *, u_int32_t *, u_int32_t)); -static inline int __dbc_put_fixed_len __P((DBC *, DBT *, DBT *)); -static inline int __dbc_put_partial __P((DBC *, - DBT *, DBT *, DBT *, DBT *, u_int32_t *, u_int32_t)); -static int __dbc_put_primary __P((DBC *, DBT *, DBT *, u_int32_t)); -static inline int __dbc_put_resolve_key __P((DBC *, - DBT *, DBT *, u_int32_t *, u_int32_t)); -static inline int __dbc_put_secondaries __P((DBC *, - DBT *, DBT *, DBT *, int, DBT *, u_int32_t *)); - -#define CDB_LOCKING_INIT(env, dbc) \ - /* \ - * If we are running CDB, this had better be either a write \ - * cursor or an immediate writer. If it's a regular writer, \ - * that means we have an IWRITE lock and we need to upgrade \ - * it to a write lock. \ - */ \ - if (CDB_LOCKING(env)) { \ - if (!F_ISSET(dbc, DBC_WRITECURSOR | DBC_WRITER)) \ - return (__db_wrlock_err(env)); \ - \ - if (F_ISSET(dbc, DBC_WRITECURSOR) && \ - (ret = __lock_get(env, \ - (dbc)->locker, DB_LOCK_UPGRADE, &(dbc)->lock_dbt, \ - DB_LOCK_WRITE, &(dbc)->mylock)) != 0) \ - return (ret); \ - } -#define CDB_LOCKING_DONE(env, dbc) \ - /* Release the upgraded lock. */ \ - if (F_ISSET(dbc, DBC_WRITECURSOR)) \ - (void)__lock_downgrade( \ - env, &(dbc)->mylock, DB_LOCK_IWRITE, 0); - -#define SET_READ_LOCKING_FLAGS(dbc, var) do { \ - var = 0; \ - if (!F_ISSET(dbc, DBC_READ_COMMITTED | DBC_READ_UNCOMMITTED)) { \ - if (LF_ISSET(DB_READ_COMMITTED)) \ - var = DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED; \ - if (LF_ISSET(DB_READ_UNCOMMITTED)) \ - var = DBC_READ_UNCOMMITTED; \ - } \ - LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED); \ -} while (0) - -/* - * __dbc_close -- - * DBC->close. - * - * PUBLIC: int __dbc_close __P((DBC *)); - */ -int -__dbc_close(dbc) - DBC *dbc; -{ - DB *dbp; - DBC *opd; - DBC_INTERNAL *cp; - DB_TXN *txn; - ENV *env; - int ret, t_ret; - - dbp = dbc->dbp; - env = dbp->env; - cp = dbc->internal; - opd = cp->opd; - ret = 0; - - /* - * Remove the cursor(s) from the active queue. We may be closing two - * cursors at once here, a top-level one and a lower-level, off-page - * duplicate one. The access-method specific cursor close routine must - * close both of them in a single call. - * - * !!! - * Cursors must be removed from the active queue before calling the - * access specific cursor close routine, btree depends on having that - * order of operations. - */ - MUTEX_LOCK(env, dbp->mutex); - - if (opd != NULL) { - DB_ASSERT(env, F_ISSET(opd, DBC_ACTIVE)); - F_CLR(opd, DBC_ACTIVE); - TAILQ_REMOVE(&dbp->active_queue, opd, links); - } - DB_ASSERT(env, F_ISSET(dbc, DBC_ACTIVE)); - F_CLR(dbc, DBC_ACTIVE); - TAILQ_REMOVE(&dbp->active_queue, dbc, links); - - MUTEX_UNLOCK(env, dbp->mutex); - - /* Call the access specific cursor close routine. */ - if ((t_ret = - dbc->am_close(dbc, PGNO_INVALID, NULL)) != 0 && ret == 0) - ret = t_ret; - - /* - * Release the lock after calling the access method specific close - * routine, a Btree cursor may have had pending deletes. - */ - if (CDB_LOCKING(env)) { - /* - * Also, be sure not to free anything if mylock.off is - * INVALID; in some cases, such as idup'ed read cursors - * and secondary update cursors, a cursor in a CDB - * environment may not have a lock at all. - */ - if ((t_ret = __LPUT(dbc, dbc->mylock)) != 0 && ret == 0) - ret = t_ret; - - /* For safety's sake, since this is going on the free queue. */ - memset(&dbc->mylock, 0, sizeof(dbc->mylock)); - if (opd != NULL) - memset(&opd->mylock, 0, sizeof(opd->mylock)); - } - - if ((txn = dbc->txn) != NULL) - txn->cursors--; - - /* Move the cursor(s) to the free queue. */ - MUTEX_LOCK(env, dbp->mutex); - if (opd != NULL) { - if (txn != NULL) - txn->cursors--; - TAILQ_INSERT_TAIL(&dbp->free_queue, opd, links); - opd = NULL; - } - TAILQ_INSERT_TAIL(&dbp->free_queue, dbc, links); - MUTEX_UNLOCK(env, dbp->mutex); - - if (txn != NULL && F_ISSET(txn, TXN_PRIVATE) && txn->cursors == 0 && - (t_ret = __txn_commit(txn, 0)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __dbc_destroy -- - * Destroy the cursor, called after DBC->close. - * - * PUBLIC: int __dbc_destroy __P((DBC *)); - */ -int -__dbc_destroy(dbc) - DBC *dbc; -{ - DB *dbp; - ENV *env; - int ret, t_ret; - - dbp = dbc->dbp; - env = dbp->env; - - /* Remove the cursor from the free queue. */ - MUTEX_LOCK(env, dbp->mutex); - TAILQ_REMOVE(&dbp->free_queue, dbc, links); - MUTEX_UNLOCK(env, dbp->mutex); - - /* Free up allocated memory. */ - if (dbc->my_rskey.data != NULL) - __os_free(env, dbc->my_rskey.data); - if (dbc->my_rkey.data != NULL) - __os_free(env, dbc->my_rkey.data); - if (dbc->my_rdata.data != NULL) - __os_free(env, dbc->my_rdata.data); - - /* Call the access specific cursor destroy routine. */ - ret = dbc->am_destroy == NULL ? 0 : dbc->am_destroy(dbc); - - /* - * Release the lock id for this cursor. - */ - if (LOCKING_ON(env) && - F_ISSET(dbc, DBC_OWN_LID) && - (t_ret = __lock_id_free(env, dbc->lref)) != 0 && ret == 0) - ret = t_ret; - - __os_free(env, dbc); - - return (ret); -} - -/* - * __dbc_cmp -- - * Compare the position of two cursors. Return whether two cursors are - * pointing to the same key/data pair. - * - * result == 0 if both cursors refer to the same item. - * result == 1 otherwise - * - * PUBLIC: int __dbc_cmp __P((DBC *, DBC *, int *)); - */ -int -__dbc_cmp(dbc, other_dbc, result) - DBC *dbc, *other_dbc; - int *result; -{ - DBC *curr_dbc, *curr_odbc; - DBC_INTERNAL *dbc_int, *odbc_int; - ENV *env; - int ret; - - env = dbc->env; - ret = 0; - -#ifdef HAVE_PARTITION - if (DB_IS_PARTITIONED(dbc->dbp)) { - dbc = ((PART_CURSOR *)dbc->internal)->sub_cursor; - other_dbc = ((PART_CURSOR *)other_dbc->internal)->sub_cursor; - } - /* Both cursors must still be valid. */ - if (dbc == NULL || other_dbc == NULL) { - __db_errx(env, -"Both cursors must be initialized before calling DBC->cmp."); - return (EINVAL); - } - - if (dbc->dbp != other_dbc->dbp) { - *result = 1; - return (0); - } -#endif - -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(dbc->dbp)) - return (__bamc_compress_cmp(dbc, other_dbc, result)); -#endif - - curr_dbc = dbc; - curr_odbc = other_dbc; - dbc_int = dbc->internal; - odbc_int = other_dbc->internal; - - /* Both cursors must be on valid positions. */ - if (dbc_int->pgno == PGNO_INVALID || odbc_int->pgno == PGNO_INVALID) { - __db_errx(env, -"Both cursors must be initialized before calling DBC->cmp."); - return (EINVAL); - } - - /* - * Use a loop since cursors can be nested. Off page duplicate - * sets can only be nested one level deep, so it is safe to use a - * while (true) loop. - */ - while (1) { - if (dbc_int->pgno == odbc_int->pgno && - dbc_int->indx == odbc_int->indx) { - /* - * If one cursor is sitting on an off page duplicate - * set, the other will be pointing to the same set. Be - * careful, and check anyway. - */ - if (dbc_int->opd != NULL && odbc_int->opd != NULL) { - curr_dbc = dbc_int->opd; - curr_odbc = odbc_int->opd; - dbc_int = dbc_int->opd->internal; - odbc_int= odbc_int->opd->internal; - continue; - } else if (dbc_int->opd == NULL && - odbc_int->opd == NULL) - *result = 0; - else { - __db_errx(env, - "DBCursor->cmp mismatched off page duplicate cursor pointers."); - return (EINVAL); - } - - switch (curr_dbc->dbtype) { - case DB_HASH: - /* - * Make sure that on-page duplicate data - * indexes match, and that the deleted - * flags are consistent. - */ - ret = __hamc_cmp(curr_dbc, curr_odbc, result); - break; - case DB_BTREE: - case DB_RECNO: - /* - * Check for consisted deleted flags on btree - * specific cursors. - */ - ret = __bamc_cmp(curr_dbc, curr_odbc, result); - break; - default: - /* NO-OP break out. */ - break; - } - } else - *result = 1; - return (ret); - } - /* NOTREACHED. */ - return (ret); -} - -/* - * __dbc_count -- - * Return a count of duplicate data items. - * - * PUBLIC: int __dbc_count __P((DBC *, db_recno_t *)); - */ -int -__dbc_count(dbc, recnop) - DBC *dbc; - db_recno_t *recnop; -{ - ENV *env; - int ret; - - env = dbc->env; - -#ifdef HAVE_PARTITION - if (DB_IS_PARTITIONED(dbc->dbp)) - dbc = ((PART_CURSOR *)dbc->internal)->sub_cursor; -#endif - /* - * Cursor Cleanup Note: - * All of the cursors passed to the underlying access methods by this - * routine are not duplicated and will not be cleaned up on return. - * So, pages/locks that the cursor references must be resolved by the - * underlying functions. - */ - switch (dbc->dbtype) { - case DB_QUEUE: - case DB_RECNO: - *recnop = 1; - break; - case DB_HASH: - if (dbc->internal->opd == NULL) { - if ((ret = __hamc_count(dbc, recnop)) != 0) - return (ret); - break; - } - /* FALLTHROUGH */ - case DB_BTREE: -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(dbc->dbp)) - return (__bamc_compress_count(dbc, recnop)); -#endif - if ((ret = __bamc_count(dbc, recnop)) != 0) - return (ret); - break; - case DB_UNKNOWN: - default: - return (__db_unknown_type(env, "__dbc_count", dbc->dbtype)); - } - return (0); -} - -/* - * __dbc_del -- - * DBC->del. - * - * PUBLIC: int __dbc_del __P((DBC *, u_int32_t)); - */ -int -__dbc_del(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - DB *dbp; - ENV *env; - int ret; - - dbp = dbc->dbp; - env = dbp->env; - - CDB_LOCKING_INIT(env, dbc); - - /* - * If we're a secondary index, and DB_UPDATE_SECONDARY isn't set - * (which it only is if we're being called from a primary update), - * then we need to call through to the primary and delete the item. - * - * Note that this will delete the current item; we don't need to - * delete it ourselves as well, so we can just goto done. - */ - if (flags != DB_UPDATE_SECONDARY && F_ISSET(dbp, DB_AM_SECONDARY)) { - ret = __dbc_del_secondary(dbc); - goto done; - } - - /* - * If we are a foreign db, go through and check any foreign key - * constraints first, which will make rolling back changes on an abort - * simpler. - */ - if (LIST_FIRST(&dbp->f_primaries) != NULL && - (ret = __dbc_del_foreign(dbc)) != 0) - goto done; - - /* - * If we are a primary and have secondary indices, go through - * and delete any secondary keys that point at the current record. - */ - if (DB_IS_PRIMARY(dbp) && - (ret = __dbc_del_primary(dbc)) != 0) - goto done; - -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(dbp)) - ret = __bamc_compress_del(dbc, flags); - else -#endif - ret = __dbc_idel(dbc, flags); - -done: CDB_LOCKING_DONE(env, dbc); - - return (ret); -} - -/* - * __dbc_del -- - * Implemenation of DBC->del. - * - * PUBLIC: int __dbc_idel __P((DBC *, u_int32_t)); - */ -int -__dbc_idel(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - DB *dbp; - DBC *opd; - int ret, t_ret; - - COMPQUIET(flags, 0); - - dbp = dbc->dbp; - - /* - * Cursor Cleanup Note: - * All of the cursors passed to the underlying access methods by this - * routine are not duplicated and will not be cleaned up on return. - * So, pages/locks that the cursor references must be resolved by the - * underlying functions. - */ - - /* - * Off-page duplicate trees are locked in the primary tree, that is, - * we acquire a write lock in the primary tree and no locks in the - * off-page dup tree. If the del operation is done in an off-page - * duplicate tree, call the primary cursor's upgrade routine first. - */ - opd = dbc->internal->opd; - if (opd == NULL) - ret = dbc->am_del(dbc, flags); - else if ((ret = dbc->am_writelock(dbc)) == 0) - ret = opd->am_del(opd, flags); - - /* - * If this was an update that is supporting dirty reads - * then we may have just swapped our read for a write lock - * which is held by the surviving cursor. We need - * to explicitly downgrade this lock. The closed cursor - * may only have had a read lock. - */ - if (F_ISSET(dbp, DB_AM_READ_UNCOMMITTED) && - dbc->internal->lock_mode == DB_LOCK_WRITE) { - if ((t_ret = - __TLPUT(dbc, dbc->internal->lock)) != 0 && ret == 0) - ret = t_ret; - if (t_ret == 0) - dbc->internal->lock_mode = DB_LOCK_WWRITE; - if (dbc->internal->page != NULL && (t_ret = - __memp_shared(dbp->mpf, dbc->internal->page)) != 0 && - ret == 0) - ret = t_ret; - } - - return (ret); -} - -#ifdef HAVE_COMPRESSION -/* - * __dbc_bulk_del -- - * Bulk del for a cursor. - * - * Only implemented for compressed BTrees. In this file in order to - * use the CDB_LOCKING_* macros. - * - * PUBLIC: #ifdef HAVE_COMPRESSION - * PUBLIC: int __dbc_bulk_del __P((DBC *, DBT *, u_int32_t)); - * PUBLIC: #endif - */ -int -__dbc_bulk_del(dbc, key, flags) - DBC *dbc; - DBT *key; - u_int32_t flags; -{ - ENV *env; - int ret; - - env = dbc->env; - - DB_ASSERT(env, DB_IS_COMPRESSED(dbc->dbp)); - - CDB_LOCKING_INIT(env, dbc); - - ret = __bamc_compress_bulk_del(dbc, key, flags); - - CDB_LOCKING_DONE(env, dbc); - - return (ret); -} -#endif - -/* - * __dbc_dup -- - * Duplicate a cursor - * - * PUBLIC: int __dbc_dup __P((DBC *, DBC **, u_int32_t)); - */ -int -__dbc_dup(dbc_orig, dbcp, flags) - DBC *dbc_orig; - DBC **dbcp; - u_int32_t flags; -{ - DBC *dbc_n, *dbc_nopd; - int ret; - - dbc_n = dbc_nopd = NULL; - - /* Allocate a new cursor and initialize it. */ - if ((ret = __dbc_idup(dbc_orig, &dbc_n, flags)) != 0) - goto err; - *dbcp = dbc_n; - - /* - * If the cursor references an off-page duplicate tree, allocate a - * new cursor for that tree and initialize it. - */ - if (dbc_orig->internal->opd != NULL) { - if ((ret = - __dbc_idup(dbc_orig->internal->opd, &dbc_nopd, flags)) != 0) - goto err; - dbc_n->internal->opd = dbc_nopd; - dbc_nopd->internal->pdbc = dbc_n; - } - return (0); - -err: if (dbc_n != NULL) - (void)__dbc_close(dbc_n); - if (dbc_nopd != NULL) - (void)__dbc_close(dbc_nopd); - - return (ret); -} - -/* - * __dbc_idup -- - * Internal version of __dbc_dup. - * - * PUBLIC: int __dbc_idup __P((DBC *, DBC **, u_int32_t)); - */ -int -__dbc_idup(dbc_orig, dbcp, flags) - DBC *dbc_orig, **dbcp; - u_int32_t flags; -{ - DB *dbp; - DBC *dbc_n; - DBC_INTERNAL *int_n, *int_orig; - ENV *env; - int ret; - - dbp = dbc_orig->dbp; - dbc_n = *dbcp; - env = dbp->env; - - if ((ret = __db_cursor_int(dbp, dbc_orig->thread_info, - dbc_orig->txn, dbc_orig->dbtype, dbc_orig->internal->root, - F_ISSET(dbc_orig, DBC_OPD) | DBC_DUPLICATE, - dbc_orig->locker, &dbc_n)) != 0) - return (ret); - - /* Position the cursor if requested, acquiring the necessary locks. */ - if (LF_ISSET(DB_POSITION)) { - int_n = dbc_n->internal; - int_orig = dbc_orig->internal; - - dbc_n->flags |= dbc_orig->flags & ~DBC_OWN_LID; - - int_n->indx = int_orig->indx; - int_n->pgno = int_orig->pgno; - int_n->root = int_orig->root; - int_n->lock_mode = int_orig->lock_mode; - - int_n->stream_start_pgno = int_orig->stream_start_pgno; - int_n->stream_off = int_orig->stream_off; - int_n->stream_curr_pgno = int_orig->stream_curr_pgno; - - switch (dbc_orig->dbtype) { - case DB_QUEUE: - if ((ret = __qamc_dup(dbc_orig, dbc_n)) != 0) - goto err; - break; - case DB_BTREE: - case DB_RECNO: - if ((ret = __bamc_dup(dbc_orig, dbc_n, flags)) != 0) - goto err; - break; - case DB_HASH: - if ((ret = __hamc_dup(dbc_orig, dbc_n)) != 0) - goto err; - break; - case DB_UNKNOWN: - default: - ret = __db_unknown_type(env, - "__dbc_idup", dbc_orig->dbtype); - goto err; - } - } else if (F_ISSET(dbc_orig, DBC_BULK)) { - /* - * For bulk cursors, remember what page were on, even if we - * don't know that the next operation will be nearby. - */ - dbc_n->internal->pgno = dbc_orig->internal->pgno; - } - - /* Copy the locking flags to the new cursor. */ - F_SET(dbc_n, F_ISSET(dbc_orig, DBC_BULK | - DBC_READ_COMMITTED | DBC_READ_UNCOMMITTED | DBC_WRITECURSOR)); - - /* - * If we're in CDB and this isn't an offpage dup cursor, then - * we need to get a lock for the duplicated cursor. - */ - if (CDB_LOCKING(env) && !F_ISSET(dbc_n, DBC_OPD) && - (ret = __lock_get(env, dbc_n->locker, 0, - &dbc_n->lock_dbt, F_ISSET(dbc_orig, DBC_WRITECURSOR) ? - DB_LOCK_IWRITE : DB_LOCK_READ, &dbc_n->mylock)) != 0) - goto err; - - dbc_n->priority = dbc_orig->priority; - dbc_n->internal->pdbc = dbc_orig->internal->pdbc; - *dbcp = dbc_n; - return (0); - -err: (void)__dbc_close(dbc_n); - return (ret); -} - -/* - * __dbc_newopd -- - * Create a new off-page duplicate cursor. - * - * PUBLIC: int __dbc_newopd __P((DBC *, db_pgno_t, DBC *, DBC **)); - */ -int -__dbc_newopd(dbc_parent, root, oldopd, dbcp) - DBC *dbc_parent; - db_pgno_t root; - DBC *oldopd; - DBC **dbcp; -{ - DB *dbp; - DBC *opd; - DBTYPE dbtype; - int ret; - - dbp = dbc_parent->dbp; - dbtype = (dbp->dup_compare == NULL) ? DB_RECNO : DB_BTREE; - - /* - * On failure, we want to default to returning the old off-page dup - * cursor, if any; our caller can't be left with a dangling pointer - * to a freed cursor. On error the only allowable behavior is to - * close the cursor (and the old OPD cursor it in turn points to), so - * this should be safe. - */ - *dbcp = oldopd; - - if ((ret = __db_cursor_int(dbp, dbc_parent->thread_info, - dbc_parent->txn, - dbtype, root, DBC_OPD, dbc_parent->locker, &opd)) != 0) - return (ret); - - opd->priority = dbc_parent->priority; - opd->internal->pdbc = dbc_parent; - *dbcp = opd; - - /* - * Check to see if we already have an off-page dup cursor that we've - * passed in. If we do, close it. It'd be nice to use it again - * if it's a cursor belonging to the right tree, but if we're doing - * a cursor-relative operation this might not be safe, so for now - * we'll take the easy way out and always close and reopen. - * - * Note that under no circumstances do we want to close the old - * cursor without returning a valid new one; we don't want to - * leave the main cursor in our caller with a non-NULL pointer - * to a freed off-page dup cursor. - */ - if (oldopd != NULL && (ret = __dbc_close(oldopd)) != 0) - return (ret); - - return (0); -} - -/* - * __dbc_get -- - * Get using a cursor. - * - * PUBLIC: int __dbc_get __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_get(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ -#ifdef HAVE_PARTITION - if (F_ISSET(dbc, DBC_PARTITIONED)) - return (__partc_get(dbc, key, data, flags)); -#endif - -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(dbc->dbp)) - return (__bamc_compress_get(dbc, key, data, flags)); -#endif - - return (__dbc_iget(dbc, key, data, flags)); -} - -/* - * __dbc_iget -- - * Implementation of get using a cursor. - * - * PUBLIC: int __dbc_iget __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_iget(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp; - DBC *ddbc, *dbc_n, *opd; - DBC_INTERNAL *cp, *cp_n; - DB_MPOOLFILE *mpf; - ENV *env; - db_pgno_t pgno; - db_indx_t indx_off; - u_int32_t multi, orig_ulen, tmp_flags, tmp_read_locking, tmp_rmw; - u_int8_t type; - int key_small, ret, t_ret; - - COMPQUIET(orig_ulen, 0); - - key_small = 0; - - /* - * Cursor Cleanup Note: - * All of the cursors passed to the underlying access methods by this - * routine are duplicated cursors. On return, any referenced pages - * will be discarded, and, if the cursor is not intended to be used - * again, the close function will be called. So, pages/locks that - * the cursor references do not need to be resolved by the underlying - * functions. - */ - dbp = dbc->dbp; - env = dbp->env; - mpf = dbp->mpf; - dbc_n = NULL; - opd = NULL; - - /* Clear OR'd in additional bits so we can check for flag equality. */ - tmp_rmw = LF_ISSET(DB_RMW); - LF_CLR(DB_RMW); - - SET_READ_LOCKING_FLAGS(dbc, tmp_read_locking); - - multi = LF_ISSET(DB_MULTIPLE|DB_MULTIPLE_KEY); - LF_CLR(DB_MULTIPLE|DB_MULTIPLE_KEY); - - /* - * Return a cursor's record number. It has nothing to do with the - * cursor get code except that it was put into the interface. - */ - if (flags == DB_GET_RECNO) { - if (tmp_rmw) - F_SET(dbc, DBC_RMW); - F_SET(dbc, tmp_read_locking); - ret = __bamc_rget(dbc, data); - if (tmp_rmw) - F_CLR(dbc, DBC_RMW); - /* Clear the temp flags, but leave WAS_READ_COMMITTED. */ - F_CLR(dbc, tmp_read_locking & ~DBC_WAS_READ_COMMITTED); - return (ret); - } - - if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT) - CDB_LOCKING_INIT(env, dbc); - - /* Don't return the key or data if it was passed to us. */ - if (!DB_RETURNS_A_KEY(dbp, flags)) - F_SET(key, DB_DBT_ISSET); - if (flags == DB_GET_BOTH && - (dbp->dup_compare == NULL || dbp->dup_compare == __bam_defcmp)) - F_SET(data, DB_DBT_ISSET); - - /* - * If we have an off-page duplicates cursor, and the operation applies - * to it, perform the operation. Duplicate the cursor and call the - * underlying function. - * - * Off-page duplicate trees are locked in the primary tree, that is, - * we acquire a write lock in the primary tree and no locks in the - * off-page dup tree. If the DB_RMW flag was specified and the get - * operation is done in an off-page duplicate tree, call the primary - * cursor's upgrade routine first. - */ - cp = dbc->internal; - if (cp->opd != NULL && - (flags == DB_CURRENT || flags == DB_GET_BOTHC || - flags == DB_NEXT || flags == DB_NEXT_DUP || - flags == DB_PREV || flags == DB_PREV_DUP)) { - if (tmp_rmw && (ret = dbc->am_writelock(dbc)) != 0) - goto err; - if (F_ISSET(dbc, DBC_TRANSIENT)) - opd = cp->opd; - else if ((ret = __dbc_idup(cp->opd, &opd, DB_POSITION)) != 0) - goto err; - - if ((ret = opd->am_get(opd, key, data, flags, NULL)) == 0) - goto done; - /* - * Another cursor may have deleted all of the off-page - * duplicates, so for operations that are moving a cursor, we - * need to skip the empty tree and retry on the parent cursor. - */ - if (ret == DB_NOTFOUND && - (flags == DB_PREV || flags == DB_NEXT)) { - ret = __dbc_close(opd); - opd = NULL; - if (F_ISSET(dbc, DBC_TRANSIENT)) - cp->opd = NULL; - } - if (ret != 0) - goto err; - } else if (cp->opd != NULL && F_ISSET(dbc, DBC_TRANSIENT)) { - if ((ret = __dbc_close(cp->opd)) != 0) - goto err; - cp->opd = NULL; - } - - /* - * Perform an operation on the main cursor. Duplicate the cursor, - * upgrade the lock as required, and call the underlying function. - */ - switch (flags) { - case DB_CURRENT: - case DB_GET_BOTHC: - case DB_NEXT: - case DB_NEXT_DUP: - case DB_NEXT_NODUP: - case DB_PREV: - case DB_PREV_DUP: - case DB_PREV_NODUP: - tmp_flags = DB_POSITION; - break; - default: - tmp_flags = 0; - break; - } - - /* - * If this cursor is going to be closed immediately, we don't - * need to take precautions to clean it up on error. - */ - if (F_ISSET(dbc, DBC_TRANSIENT | DBC_PARTITIONED)) - dbc_n = dbc; - else { - ret = __dbc_idup(dbc, &dbc_n, tmp_flags); - - if (ret != 0) - goto err; - COPY_RET_MEM(dbc, dbc_n); - } - - if (tmp_rmw) - F_SET(dbc_n, DBC_RMW); - F_SET(dbc_n, tmp_read_locking); - - switch (multi) { - case DB_MULTIPLE: - F_SET(dbc_n, DBC_MULTIPLE); - break; - case DB_MULTIPLE_KEY: - F_SET(dbc_n, DBC_MULTIPLE_KEY); - break; - case DB_MULTIPLE | DB_MULTIPLE_KEY: - F_SET(dbc_n, DBC_MULTIPLE|DBC_MULTIPLE_KEY); - break; - case 0: - default: - break; - } - -retry: pgno = PGNO_INVALID; - ret = dbc_n->am_get(dbc_n, key, data, flags, &pgno); - if (tmp_rmw) - F_CLR(dbc_n, DBC_RMW); - /* - * Clear the temporary locking flags in the new cursor. The user's - * (old) cursor needs to have the WAS_READ_COMMITTED flag because this - * is used on the next call on that cursor. - */ - F_CLR(dbc_n, tmp_read_locking); - F_SET(dbc, tmp_read_locking & DBC_WAS_READ_COMMITTED); - F_CLR(dbc_n, DBC_MULTIPLE|DBC_MULTIPLE_KEY); - if (ret != 0) - goto err; - - cp_n = dbc_n->internal; - - /* - * We may be referencing a new off-page duplicates tree. Acquire - * a new cursor and call the underlying function. - */ - if (pgno != PGNO_INVALID) { - if ((ret = __dbc_newopd(dbc, - pgno, cp_n->opd, &cp_n->opd)) != 0) - goto err; - - switch (flags) { - case DB_FIRST: - case DB_NEXT: - case DB_NEXT_NODUP: - case DB_SET: - case DB_SET_RECNO: - case DB_SET_RANGE: - tmp_flags = DB_FIRST; - break; - case DB_LAST: - case DB_PREV: - case DB_PREV_NODUP: - tmp_flags = DB_LAST; - break; - case DB_GET_BOTH: - case DB_GET_BOTHC: - case DB_GET_BOTH_RANGE: - tmp_flags = flags; - break; - default: - ret = __db_unknown_flag(env, "__dbc_get", flags); - goto err; - } - ret = cp_n->opd->am_get(cp_n->opd, key, data, tmp_flags, NULL); - /* - * Another cursor may have deleted all of the off-page - * duplicates, so for operations that are moving a cursor, we - * need to skip the empty tree and retry on the parent cursor. - */ - if (ret == DB_NOTFOUND) { - switch (flags) { - case DB_FIRST: - case DB_NEXT: - case DB_NEXT_NODUP: - flags = DB_NEXT; - break; - case DB_LAST: - case DB_PREV: - case DB_PREV_NODUP: - flags = DB_PREV; - break; - default: - goto err; - } - - ret = __dbc_close(cp_n->opd); - cp_n->opd = NULL; - if (ret == 0) - goto retry; - } - if (ret != 0) - goto err; - } - -done: /* - * Return a key/data item. The only exception is that we don't return - * a key if the user already gave us one, that is, if the DB_SET flag - * was set. The DB_SET flag is necessary. In a Btree, the user's key - * doesn't have to be the same as the key stored the tree, depending on - * the magic performed by the comparison function. As we may not have - * done any key-oriented operation here, the page reference may not be - * valid. Fill it in as necessary. We don't have to worry about any - * locks, the cursor must already be holding appropriate locks. - * - * XXX - * If not a Btree and DB_SET_RANGE is set, we shouldn't return a key - * either, should we? - */ - cp_n = dbc_n == NULL ? dbc->internal : dbc_n->internal; - if (!F_ISSET(key, DB_DBT_ISSET)) { - if (cp_n->page == NULL && (ret = __memp_fget(mpf, &cp_n->pgno, - dbc->thread_info, dbc->txn, 0, &cp_n->page)) != 0) - goto err; - - if ((ret = __db_ret(dbc, cp_n->page, cp_n->indx, key, - &dbc->rkey->data, &dbc->rkey->ulen)) != 0) { - /* - * If the key DBT is too small, we still want to return - * the size of the data. Otherwise applications are - * forced to check each one with a separate call. We - * don't want to copy the data, so we set the ulen to - * zero before calling __db_ret. - */ - if (ret == DB_BUFFER_SMALL && - F_ISSET(data, DB_DBT_USERMEM)) { - key_small = 1; - orig_ulen = data->ulen; - data->ulen = 0; - } else - goto err; - } - } - if (multi != 0 && dbc->am_bulk != NULL) { - /* - * Even if fetching from the OPD cursor we need a duplicate - * primary cursor if we are going after multiple keys. - */ - if (dbc_n == NULL) { - /* - * Non-"_KEY" DB_MULTIPLE doesn't move the main cursor, - * so it's safe to just use dbc, unless the cursor - * has an open off-page duplicate cursor whose state - * might need to be preserved. - */ - if ((!(multi & DB_MULTIPLE_KEY) && - dbc->internal->opd == NULL) || - F_ISSET(dbc, DBC_TRANSIENT | DBC_PARTITIONED)) - dbc_n = dbc; - else { - if ((ret = __dbc_idup(dbc, - &dbc_n, DB_POSITION)) != 0) - goto err; - if ((ret = dbc_n->am_get(dbc_n, - key, data, DB_CURRENT, &pgno)) != 0) - goto err; - } - cp_n = dbc_n->internal; - } - - /* - * If opd is set then we dupped the opd that we came in with. - * When we return we may have a new opd if we went to another - * key. - */ - if (opd != NULL) { - DB_ASSERT(env, cp_n->opd == NULL); - cp_n->opd = opd; - opd = NULL; - } - - /* - * Bulk get doesn't use __db_retcopy, so data.size won't - * get set up unless there is an error. Assume success - * here. This is the only call to am_bulk, and it avoids - * setting it exactly the same everywhere. If we have an - * DB_BUFFER_SMALL error, it'll get overwritten with the - * needed value. - */ - data->size = data->ulen; - ret = dbc_n->am_bulk(dbc_n, data, flags | multi); - } else if (!F_ISSET(data, DB_DBT_ISSET)) { - ddbc = opd != NULL ? opd : - cp_n->opd != NULL ? cp_n->opd : dbc_n; - cp = ddbc->internal; - if (cp->page == NULL && - (ret = __memp_fget(mpf, &cp->pgno, - dbc->thread_info, ddbc->txn, 0, &cp->page)) != 0) - goto err; - - type = TYPE(cp->page); - indx_off = ((type == P_LBTREE || - type == P_HASH || type == P_HASH_UNSORTED) ? O_INDX : 0); - ret = __db_ret(ddbc, cp->page, cp->indx + indx_off, - data, &dbc->rdata->data, &dbc->rdata->ulen); - } - -err: /* Don't pass DB_DBT_ISSET back to application level, error or no. */ - F_CLR(key, DB_DBT_ISSET); - F_CLR(data, DB_DBT_ISSET); - - /* Cleanup and cursor resolution. */ - if (opd != NULL) { - /* - * To support dirty reads we must reget the write lock - * if we have just stepped off a deleted record. - * Since the OPD cursor does not know anything - * about the referencing page or cursor we need - * to peek at the OPD cursor and get the lock here. - */ - if (F_ISSET(dbp, DB_AM_READ_UNCOMMITTED) && - F_ISSET((BTREE_CURSOR *) - dbc->internal->opd->internal, C_DELETED)) - if ((t_ret = - dbc->am_writelock(dbc)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __dbc_cleanup( - dbc->internal->opd, opd, ret)) != 0 && ret == 0) - ret = t_ret; - } - - if (key_small) { - data->ulen = orig_ulen; - if (ret == 0) - ret = DB_BUFFER_SMALL; - } - - if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && - (ret == 0 || ret == DB_BUFFER_SMALL)) - ret = t_ret; - - if (flags == DB_CONSUME || flags == DB_CONSUME_WAIT) - CDB_LOCKING_DONE(env, dbc); - return (ret); -} - -/* Internal flags shared by the dbc_put functions. */ -#define DBC_PUT_RMW 0x001 -#define DBC_PUT_NODEL 0x002 -#define DBC_PUT_HAVEREC 0x004 - -/* - * __dbc_put_resolve_key -- - * Get the current key and data so that we can correctly update the - * secondary and foreign databases. - */ -static inline int -__dbc_put_resolve_key(dbc, oldkey, olddata, put_statep, flags) - DBC *dbc; - DBT *oldkey, *olddata; - u_int32_t flags, *put_statep; -{ - DB *dbp; - ENV *env; - int ret, rmw; - - dbp = dbc->dbp; - env = dbp->env; - rmw = FLD_ISSET(*put_statep, DBC_PUT_RMW) ? DB_RMW : 0; - - DB_ASSERT(env, flags == DB_CURRENT); - COMPQUIET(flags, 0); - - /* - * This is safe to do on the cursor we already have; - * error or no, it won't move. - * - * We use DB_RMW for all of these gets because we'll be - * writing soon enough in the "normal" put code. In - * transactional databases we'll hold those write locks - * even if we close the cursor we're reading with. - * - * The DB_KEYEMPTY return needs special handling -- if the - * cursor is on a deleted key, we return DB_NOTFOUND. - */ - memset(oldkey, 0, sizeof(DBT)); - if ((ret = __dbc_get(dbc, oldkey, olddata, rmw | DB_CURRENT)) != 0) - return (ret == DB_KEYEMPTY ? DB_NOTFOUND : ret); - - /* Record that we've looked for the old record. */ - FLD_SET(*put_statep, DBC_PUT_HAVEREC); - return (0); -} - -/* - * __dbc_put_append -- - * Handle an append to a primary. - */ -static inline int -__dbc_put_append(dbc, key, data, put_statep, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags, *put_statep; -{ - DB *dbp; - ENV *env; - DBC *dbc_n; - DBT tdata; - int ret, t_ret; - - dbp = dbc->dbp; - env = dbp->env; - ret = 0; - dbc_n = NULL; - - DB_ASSERT(env, flags == DB_APPEND); - COMPQUIET(flags, 0); - - /* - * With DB_APPEND, we need to do the insert to populate the key value. - * So we swap the 'normal' order of updating secondary / verifying - * foreign databases and inserting. - * - * If there is an append callback, the value stored in data->data may - * be replaced and then freed. To avoid passing a freed pointer back - * to the user, just operate on a copy of the data DBT. - */ - tdata = *data; - - /* - * If this cursor is going to be closed immediately, we don't - * need to take precautions to clean it up on error. - */ - if (F_ISSET(dbc, DBC_TRANSIENT)) - dbc_n = dbc; - else if ((ret = __dbc_idup(dbc, &dbc_n, 0)) != 0) - goto err; - - /* - * Append isn't a normal put operation; call the appropriate access - * method's append function. - */ - switch (dbp->type) { - case DB_QUEUE: - if ((ret = __qam_append(dbc_n, key, &tdata)) != 0) - goto err; - break; - case DB_RECNO: - if ((ret = __ram_append(dbc_n, key, &tdata)) != 0) - goto err; - break; - default: - /* The interface should prevent this. */ - DB_ASSERT(env, - dbp->type == DB_QUEUE || dbp->type == DB_RECNO); - - ret = __db_ferr(env, "DBC->put", 0); - goto err; - } - - /* - * The append callback, if one exists, may have allocated a new - * tdata.data buffer. If so, free it. - */ - FREE_IF_NEEDED(env, &tdata); - - /* - * The key value may have been generated by the above operation, but - * not set in the data buffer. Make sure it is there so that secondary - * updates can complete. - */ - if ((ret = __dbt_usercopy(env, key)) != 0) - goto err; - - /* An append cannot be replacing an existing item. */ - FLD_SET(*put_statep, DBC_PUT_NODEL); - -err: if (dbc_n != NULL && - (t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __dbc_put_partial -- - * Ensure that the data item we are using is complete and correct. - * Otherwise we could break the secondary constraints. - */ -static inline int -__dbc_put_partial(dbc, pkey, data, orig_data, out_data, put_statep, flags) - DBC *dbc; - DBT *pkey, *data, *orig_data, *out_data; - u_int32_t *put_statep, flags; -{ - DB *dbp; - DBC *pdbc; - ENV *env; - int ret, rmw, t_ret; - - dbp = dbc->dbp; - env = dbp->env; - ret = t_ret = 0; - rmw = FLD_ISSET(*put_statep, DBC_PUT_RMW) ? DB_RMW : 0; - - if (!FLD_ISSET(*put_statep, DBC_PUT_HAVEREC) && - !FLD_ISSET(*put_statep, DBC_PUT_NODEL)) { - /* - * We're going to have to search the tree for the - * specified key. Dup a cursor (so we have the same - * locking info) and do a c_get. - */ - if ((ret = __dbc_idup(dbc, &pdbc, 0)) != 0) - return (ret); - - /* - * When doing a put with DB_CURRENT, partial data items have - * already been resolved. - */ - DB_ASSERT(env, flags != DB_CURRENT); - - F_SET(pkey, DB_DBT_ISSET); - ret = __dbc_get(pdbc, pkey, orig_data, rmw | DB_SET); - if (ret == DB_KEYEMPTY || ret == DB_NOTFOUND) { - FLD_SET(*put_statep, DBC_PUT_NODEL); - ret = 0; - } - if ((t_ret = __dbc_close(pdbc)) != 0) - ret = t_ret; - if (ret != 0) - return (ret); - - FLD_SET(*put_statep, DBC_PUT_HAVEREC); - } - - COMPQUIET(flags, 0); - - /* - * Now build the new datum from orig_data and the partial data - * we were given. It's okay to do this if no record was - * returned above: a partial put on an empty record is allowed, - * if a little strange. The data is zero-padded. - */ - return (__db_buildpartial(dbp, orig_data, data, out_data)); -} - -/* - * __dbc_put_fixed_len -- - * Handle padding for fixed-length records. - */ -static inline int -__dbc_put_fixed_len(dbc, data, out_data) - DBC *dbc; - DBT *data, *out_data; -{ - DB *dbp; - ENV *env; - int re_pad, ret; - u_int32_t re_len, size; - - dbp = dbc->dbp; - env = dbp->env; - ret = 0; - - /* - * Handle fixed-length records. If the primary database has - * fixed-length records, we need to pad out the datum before - * we pass it into the callback function; we always index the - * "real" record. - */ - if (dbp->type == DB_QUEUE) { - re_len = ((QUEUE *)dbp->q_internal)->re_len; - re_pad = ((QUEUE *)dbp->q_internal)->re_pad; - } else { - re_len = ((BTREE *)dbp->bt_internal)->re_len; - re_pad = ((BTREE *)dbp->bt_internal)->re_pad; - } - - size = data->size; - if (size > re_len) { - ret = __db_rec_toobig(env, size, re_len); - return (ret); - } else if (size < re_len) { - /* - * If we're not doing a partial put, copy data->data into - * out_data->data, then pad out out_data->data. This overrides - * the assignment made above, which is used in the more common - * case when padding is not needed. - * - * If we're doing a partial put, the data we want are already - * in out_data.data; we just need to pad. - */ - if (F_ISSET(data, DB_DBT_PARTIAL)) { - if ((ret = __os_realloc( - env, re_len, &out_data->data)) != 0) - return (ret); - /* - * In the partial case, we have built the item into - * out_data already using __db_buildpartial. Just need - * to pad from the end of out_data, not from data->size. - */ - size = out_data->size; - } else { - if ((ret = __os_malloc( - env, re_len, &out_data->data)) != 0) - return (ret); - memcpy(out_data->data, data->data, size); - } - memset((u_int8_t *)out_data->data + size, re_pad, - re_len - size); - out_data->size = re_len; - } - - return (ret); -} - -/* - * __dbc_put_secondaries -- - * Insert the secondary keys, and validate the foreign key constraints. - */ -static inline int -__dbc_put_secondaries(dbc, - pkey, data, orig_data, s_count, s_keys_buf, put_statep) - DBC *dbc; - DBT *pkey, *data, *orig_data, *s_keys_buf; - int s_count; - u_int32_t *put_statep; -{ - DB *dbp, *sdbp; - DBC *fdbc, *sdbc; - DBT fdata, oldpkey, *skeyp, temppkey, tempskey, *tskeyp; - ENV *env; - int cmp, ret, rmw, t_ret; - u_int32_t nskey; - - dbp = dbc->dbp; - env = dbp->env; - fdbc = sdbc = NULL; - sdbp = NULL; - ret = t_ret = 0; - rmw = FLD_ISSET(*put_statep, DBC_PUT_RMW) ? DB_RMW : 0; - - /* - * Loop through the secondaries. (Step 3.) - * - * Note that __db_s_first and __db_s_next will take care of - * thread-locking and refcounting issues. - */ - for (ret = __db_s_first(dbp, &sdbp), skeyp = s_keys_buf; - sdbp != NULL && ret == 0; - ret = __db_s_next(&sdbp, dbc->txn), ++skeyp) { - DB_ASSERT(env, skeyp - s_keys_buf < s_count); - /* - * Don't process this secondary if the key is immutable and we - * know that the old record exists. This optimization can't be - * used if we have not checked for the old record yet. - */ - if (FLD_ISSET(*put_statep, DBC_PUT_HAVEREC) && - !FLD_ISSET(*put_statep, DBC_PUT_NODEL) && - FLD_ISSET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY)) - continue; - - /* - * Call the callback for this secondary, to get the - * appropriate secondary key. - */ - if ((ret = sdbp->s_callback(sdbp, - pkey, data, skeyp)) != 0) { - /* Not indexing is equivalent to an empty key set. */ - if (ret == DB_DONOTINDEX) { - F_SET(skeyp, DB_DBT_MULTIPLE); - skeyp->size = 0; - ret = 0; - } else - goto err; - } - - if (sdbp->s_foreign != NULL && - (ret = __db_cursor_int(sdbp->s_foreign, - dbc->thread_info, dbc->txn, sdbp->s_foreign->type, - PGNO_INVALID, 0, dbc->locker, &fdbc)) != 0) - goto err; - - /* - * Mark the secondary key DBT(s) as set -- that is, the - * callback returned at least one secondary key. - * - * Also, if this secondary index is associated with a foreign - * database, check that the foreign db contains the key(s) to - * maintain referential integrity. Set flags in fdata to avoid - * mem copying, we just need to know existence. We need to do - * this check before setting DB_DBT_ISSET, otherwise __dbc_get - * will overwrite the flag values. - */ - if (F_ISSET(skeyp, DB_DBT_MULTIPLE)) { -#ifdef DIAGNOSTIC - __db_check_skeyset(sdbp, skeyp); -#endif - for (tskeyp = (DBT *)skeyp->data, nskey = skeyp->size; - nskey > 0; nskey--, tskeyp++) { - if (fdbc != NULL) { - memset(&fdata, 0, sizeof(DBT)); - F_SET(&fdata, - DB_DBT_PARTIAL | DB_DBT_USERMEM); - if ((ret = __dbc_get( - fdbc, tskeyp, &fdata, - DB_SET | rmw)) == DB_NOTFOUND || - ret == DB_KEYEMPTY) { - ret = DB_FOREIGN_CONFLICT; - break; - } - } - F_SET(tskeyp, DB_DBT_ISSET); - } - tskeyp = (DBT *)skeyp->data; - nskey = skeyp->size; - } else { - if (fdbc != NULL) { - memset(&fdata, 0, sizeof(DBT)); - F_SET(&fdata, DB_DBT_PARTIAL | DB_DBT_USERMEM); - if ((ret = __dbc_get(fdbc, skeyp, &fdata, - DB_SET | rmw)) == DB_NOTFOUND || - ret == DB_KEYEMPTY) - ret = DB_FOREIGN_CONFLICT; - } - F_SET(skeyp, DB_DBT_ISSET); - tskeyp = skeyp; - nskey = 1; - } - if (fdbc != NULL && (t_ret = __dbc_close(fdbc)) != 0 && - ret == 0) - ret = t_ret; - fdbc = NULL; - if (ret != 0) - goto err; - - /* - * If we have the old record, we can generate and remove any - * old secondary key(s) now. We can also skip the secondary - * put if there is no change. - */ - if (FLD_ISSET(*put_statep, DBC_PUT_HAVEREC)) { - if ((ret = __dbc_del_oldskey(sdbp, dbc, - skeyp, pkey, orig_data)) == DB_KEYEXIST) - continue; - else if (ret != 0) - goto err; - } - if (nskey == 0) - continue; - - /* - * Open a cursor in this secondary. - * - * Use the same locker ID as our primary cursor, so that - * we're guaranteed that the locks don't conflict (e.g. in CDB - * or if we're subdatabases that share and want to lock a - * metadata page). - */ - if ((ret = __db_cursor_int(sdbp, dbc->thread_info, dbc->txn, - sdbp->type, PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0) - goto err; - - /* - * If we're in CDB, updates will fail since the new cursor - * isn't a writer. However, we hold the WRITE lock in the - * primary and will for as long as our new cursor lasts, - * and the primary and secondary share a lock file ID, - * so it's safe to consider this a WRITER. The close - * routine won't try to put anything because we don't - * really have a lock. - */ - if (CDB_LOCKING(env)) { - DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID); - F_SET(sdbc, DBC_WRITER); - } - - /* - * Swap the primary key to the byte order of this secondary, if - * necessary. By doing this now, we can compare directly - * against the data already in the secondary without having to - * swap it after reading. - */ - SWAP_IF_NEEDED(sdbp, pkey); - - for (; nskey > 0 && ret == 0; nskey--, tskeyp++) { - /* Skip this key if it is already in the database. */ - if (!F_ISSET(tskeyp, DB_DBT_ISSET)) - continue; - - /* - * There are three cases here-- - * 1) The secondary supports sorted duplicates. - * If we attempt to put a secondary/primary pair - * that already exists, that's a duplicate - * duplicate, and c_put will return DB_KEYEXIST - * (see __db_duperr). This will leave us with - * exactly one copy of the secondary/primary pair, - * and this is just right--we'll avoid deleting it - * later, as the old and new secondaries will - * match (since the old secondary is the dup dup - * that's already there). - * 2) The secondary supports duplicates, but they're not - * sorted. We need to avoid putting a duplicate - * duplicate, because the matching old and new - * secondaries will prevent us from deleting - * anything and we'll wind up with two secondary - * records that point to the same primary key. Do - * a c_get(DB_GET_BOTH); only do the put if the - * secondary doesn't exist. - * 3) The secondary doesn't support duplicates at all. - * In this case, secondary keys must be unique; - * if another primary key already exists for this - * secondary key, we have to either overwrite it - * or not put this one, and in either case we've - * corrupted the secondary index. Do a - * c_get(DB_SET). If the secondary/primary pair - * already exists, do nothing; if the secondary - * exists with a different primary, return an - * error; and if the secondary does not exist, - * put it. - */ - if (!F_ISSET(sdbp, DB_AM_DUP)) { - /* Case 3. */ - memset(&oldpkey, 0, sizeof(DBT)); - F_SET(&oldpkey, DB_DBT_MALLOC); - ret = __dbc_get(sdbc, - tskeyp, &oldpkey, rmw | DB_SET); - if (ret == 0) { - cmp = __bam_defcmp(sdbp, - &oldpkey, pkey); - __os_ufree(env, oldpkey.data); - /* - * If the secondary key is unchanged, - * skip the put and go on to the next - * one. - */ - if (cmp == 0) - continue; - - __db_errx(env, "%s%s", - "Put results in a non-unique secondary key in an ", - "index not configured to support duplicates"); - ret = EINVAL; - } - if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) - break; - } else if (!F_ISSET(sdbp, DB_AM_DUPSORT)) { - /* Case 2. */ - DB_INIT_DBT(tempskey, - tskeyp->data, tskeyp->size); - DB_INIT_DBT(temppkey, - pkey->data, pkey->size); - ret = __dbc_get(sdbc, &tempskey, &temppkey, - rmw | DB_GET_BOTH); - if (ret != DB_NOTFOUND && ret != DB_KEYEMPTY) - break; - } - - ret = __dbc_put(sdbc, tskeyp, pkey, - DB_UPDATE_SECONDARY); - - /* - * We don't know yet whether this was a put-overwrite - * that in fact changed nothing. If it was, we may get - * DB_KEYEXIST. This is not an error. - */ - if (ret == DB_KEYEXIST) - ret = 0; - } - - /* Make sure the primary key is back in native byte-order. */ - SWAP_IF_NEEDED(sdbp, pkey); - - if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) - ret = t_ret; - - if (ret != 0) - goto err; - - /* - * Mark that we have a key for this secondary so we can check - * it later before deleting the old one. We can't set it - * earlier or it would be cleared in the calls above. - */ - F_SET(skeyp, DB_DBT_ISSET); - } -err: if (sdbp != NULL && - (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0) - ret = t_ret; - COMPQUIET(s_count, 0); - return (ret); -} - -static int -__dbc_put_primary(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp, *sdbp; - DBC *dbc_n, *pdbc; - DBT oldkey, olddata, newdata; - DBT *all_skeys, *skeyp, *tskeyp; - ENV *env; - int ret, t_ret, s_count; - u_int32_t nskey, put_state, rmw; - - dbp = dbc->dbp; - env = dbp->env; - ret = t_ret = s_count = 0; - put_state = 0; - sdbp = NULL; - pdbc = dbc_n = NULL; - all_skeys = NULL; - memset(&newdata, 0, sizeof(DBT)); - memset(&olddata, 0, sizeof(DBT)); - - /* - * We do multiple cursor operations in some cases and subsequently - * access the data DBT information. Set DB_DBT_MALLOC so we don't risk - * modification of the data between our uses of it. - */ - F_SET(&olddata, DB_DBT_MALLOC); - - /* - * We have at least one secondary which we may need to update. - * - * There is a rather vile locking issue here. Secondary gets - * will always involve acquiring a read lock in the secondary, - * then acquiring a read lock in the primary. Ideally, we - * would likewise perform puts by updating all the secondaries - * first, then doing the actual put in the primary, to avoid - * deadlock (since having multiple threads doing secondary - * gets and puts simultaneously is probably a common case). - * - * However, if this put is a put-overwrite--and we have no way to - * tell in advance whether it will be--we may need to delete - * an outdated secondary key. In order to find that old - * secondary key, we need to get the record we're overwriting, - * before we overwrite it. - * - * (XXX: It would be nice to avoid this extra get, and have the - * underlying put routines somehow pass us the old record - * since they need to traverse the tree anyway. I'm saving - * this optimization for later, as it's a lot of work, and it - * would be hard to fit into this locking paradigm anyway.) - * - * The simple thing to do would be to go get the old record before - * we do anything else. Unfortunately, though, doing so would - * violate our "secondary, then primary" lock acquisition - * ordering--even in the common case where no old primary record - * exists, we'll still acquire and keep a lock on the page where - * we're about to do the primary insert. - * - * To get around this, we do the following gyrations, which - * hopefully solve this problem in the common case: - * - * 1) If this is a c_put(DB_CURRENT), go ahead and get the - * old record. We already hold the lock on this page in - * the primary, so no harm done, and we'll need the primary - * key (which we weren't passed in this case) to do any - * secondary puts anyway. - * If this is a put(DB_APPEND), then we need to insert the item, - * so that we can know the key value. So go ahead and insert. In - * the case of a put(DB_APPEND) without secondaries it is - * implemented in the __db_put method as an optimization. - * - * 2) If we're doing a partial put, we need to perform the - * get on the primary key right away, since we don't have - * the whole datum that the secondary key is based on. - * We may also need to pad out the record if the primary - * has a fixed record length. - * - * 3) Loop through the secondary indices, putting into each a - * new secondary key that corresponds to the new record. - * - * 4) If we haven't done so in (1) or (2), get the old primary - * key/data pair. If one does not exist--the common case--we're - * done with secondary indices, and can go straight on to the - * primary put. - * - * 5) If we do have an old primary key/data pair, however, we need - * to loop through all the secondaries a second time and delete - * the old secondary in each. - */ - s_count = __db_s_count(dbp); - if ((ret = __os_calloc(env, - (u_int)s_count, sizeof(DBT), &all_skeys)) != 0) - goto err; - - /* - * Primary indices can't have duplicates, so only DB_APPEND, - * DB_CURRENT, DB_KEYFIRST, and DB_KEYLAST make any sense. Other flags - * should have been caught by the checking routine, but - * add a sprinkling of paranoia. - */ - DB_ASSERT(env, flags == DB_APPEND || flags == DB_CURRENT || - flags == DB_KEYFIRST || flags == DB_KEYLAST || - flags == DB_NOOVERWRITE || flags == DB_OVERWRITE_DUP); - - /* - * We'll want to use DB_RMW in a few places, but it's only legal - * when locking is on. - */ - rmw = STD_LOCKING(dbc) ? DB_RMW : 0; - if (rmw) - FLD_SET(put_state, DBC_PUT_RMW); - - /* Resolve the primary key if required (Step 1). */ - if (flags == DB_CURRENT) { - if ((ret = __dbc_put_resolve_key(dbc, - &oldkey, &olddata, &put_state, flags)) != 0) - goto err; - key = &oldkey; - } else if (flags == DB_APPEND) { - if ((ret = __dbc_put_append(dbc, - key, data, &put_state, flags)) != 0) - goto err; - } - - /* - * PUT_NOOVERWRITE with secondaries is a troublesome case. We need - * to check that the insert will work prior to making any changes - * to secondaries. Try to work within the locking constraints outlined - * above. - * - * This is DB->put (DB_NOOVERWRITE). DBC->put(DB_NODUPDATA) is not - * relevant since it is only valid on DBs that support duplicates, - * which primaries with secondaries can't have. - */ - if (flags == DB_NOOVERWRITE) { - /* Don't bother retrieving the data. */ - F_SET(key, DB_DBT_ISSET); - olddata.dlen = 0; - olddata.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; - if (__dbc_get(dbc, key, &olddata, DB_SET) != DB_NOTFOUND) { - ret = DB_KEYEXIST; - goto done; - } - } - - /* - * Check for partial puts using DB_DBT_PARTIAL (Step 2). - */ - if (F_ISSET(data, DB_DBT_PARTIAL)) { - if ((ret = __dbc_put_partial(dbc, - key, data, &olddata, &newdata, &put_state, flags)) != 0) - goto err; - } else { - newdata = *data; - } - - /* - * Check for partial puts, with fixed length record databases (Step 2). - */ - if ((dbp->type == DB_RECNO && F_ISSET(dbp, DB_AM_FIXEDLEN)) || - (dbp->type == DB_QUEUE)) { - if ((ret = __dbc_put_fixed_len(dbc, data, &newdata)) != 0) - goto err; - } - - /* Validate any foreign databases, and update secondaries. (Step 3). */ - if ((ret = __dbc_put_secondaries(dbc, key, &newdata, - &olddata, s_count, all_skeys, &put_state)) - != 0) - goto err; - /* - * If we've already got the old primary key/data pair, the secondary - * updates are already done. - */ - if (FLD_ISSET(put_state, DBC_PUT_HAVEREC)) - goto done; - - /* - * If still necessary, go get the old primary key/data. (Step 4.) - * - * See the comments in step 2. This is real familiar. - */ - if ((ret = __dbc_idup(dbc, &pdbc, 0)) != 0) - goto err; - DB_ASSERT(env, flags != DB_CURRENT); - F_SET(key, DB_DBT_ISSET); - ret = __dbc_get(pdbc, key, &olddata, rmw | DB_SET); - if (ret == DB_KEYEMPTY || ret == DB_NOTFOUND) { - FLD_SET(put_state, DBC_PUT_NODEL); - ret = 0; - } - if ((t_ret = __dbc_close(pdbc)) != 0 && ret == 0) - ret = t_ret; - if (ret != 0) - goto err; - - /* - * Check whether we do in fact have an old record we may need to - * delete. (Step 5). - */ - if (FLD_ISSET(put_state, DBC_PUT_NODEL)) - goto done; - - for (ret = __db_s_first(dbp, &sdbp), skeyp = all_skeys; - sdbp != NULL && ret == 0; - ret = __db_s_next(&sdbp, dbc->txn), skeyp++) { - DB_ASSERT(env, skeyp - all_skeys < s_count); - /* - * Don't process this secondary if the key is immutable. We - * know that the old record exists, so this optimization can - * always be used. - */ - if (FLD_ISSET(sdbp->s_assoc_flags, DB_ASSOC_IMMUTABLE_KEY)) - continue; - - if ((ret = __dbc_del_oldskey(sdbp, dbc, - skeyp, key, &olddata)) != 0 && ret != DB_KEYEXIST) - goto err; - } - if (ret != 0) - goto err; - -done: -err: - if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0) - ret = t_ret; - - /* If newdata or olddata were used, free their buffers. */ - if (newdata.data != NULL && newdata.data != data->data) - __os_free(env, newdata.data); - if (olddata.data != NULL) - __os_ufree(env, olddata.data); - - CDB_LOCKING_DONE(env, dbc); - - if (sdbp != NULL && - (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0) - ret = t_ret; - - for (skeyp = all_skeys; skeyp - all_skeys < s_count; skeyp++) { - if (F_ISSET(skeyp, DB_DBT_MULTIPLE)) { - for (nskey = skeyp->size, tskeyp = (DBT *)skeyp->data; - nskey > 0; - nskey--, tskeyp++) - FREE_IF_NEEDED(env, tskeyp); - } - FREE_IF_NEEDED(env, skeyp); - } - if (all_skeys != NULL) - __os_free(env, all_skeys); - return (ret); -} - -/* - * __dbc_put -- - * Put using a cursor. - * - * PUBLIC: int __dbc_put __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_put(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp; - int ret; - - dbp = dbc->dbp; - ret = 0; - - /* - * Putting to secondary indices is forbidden; when we need to - * internally update one, we're called with a private flag, - * DB_UPDATE_SECONDARY, which does the right thing but won't return an - * error during flag checking. - * - * As a convenience, many places that want the default DB_KEYLAST - * behavior call DBC->put with flags == 0. Protect lower-level code - * here by translating that. - * - * Lastly, the DB_OVERWRITE_DUP flag is equivalent to DB_KEYLAST unless - * there are sorted duplicates. Limit the number of places that need - * to test for it explicitly. - */ - if (flags == DB_UPDATE_SECONDARY || flags == 0 || - (flags == DB_OVERWRITE_DUP && !F_ISSET(dbp, DB_AM_DUPSORT))) - flags = DB_KEYLAST; - - CDB_LOCKING_INIT(dbc->env, dbc); - - /* - * Check to see if we are a primary and have secondary indices. - * If we are not, we save ourselves a good bit of trouble and - * just skip to the "normal" put. - */ - if (DB_IS_PRIMARY(dbp) && - ((ret = __dbc_put_primary(dbc, key, data, flags)) != 0)) - return (ret); - - /* - * If this is an append operation, the insert was done prior to the - * secondary updates, so we are finished. - */ - if (flags == DB_APPEND) - return (ret); - -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(dbp)) - return (__bamc_compress_put(dbc, key, data, flags)); -#endif - - return (__dbc_iput(dbc, key, data, flags)); -} - -/* - * __dbc_iput -- - * Implementation of put using a cursor. - * - * PUBLIC: int __dbc_iput __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_iput(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DBC *dbc_n, *oldopd, *opd; - db_pgno_t pgno; - int ret, t_ret; - u_int32_t tmp_flags; - - /* - * Cursor Cleanup Note: - * All of the cursors passed to the underlying access methods by this - * routine are duplicated cursors. On return, any referenced pages - * will be discarded, and, if the cursor is not intended to be used - * again, the close function will be called. So, pages/locks that - * the cursor references do not need to be resolved by the underlying - * functions. - */ - dbc_n = NULL; - ret = t_ret = 0; - - /* - * If we have an off-page duplicates cursor, and the operation applies - * to it, perform the operation. Duplicate the cursor and call the - * underlying function. - * - * Off-page duplicate trees are locked in the primary tree, that is, - * we acquire a write lock in the primary tree and no locks in the - * off-page dup tree. If the put operation is done in an off-page - * duplicate tree, call the primary cursor's upgrade routine first. - */ - if (dbc->internal->opd != NULL && - (flags == DB_AFTER || flags == DB_BEFORE || flags == DB_CURRENT)) { - /* - * A special case for hash off-page duplicates. Hash doesn't - * support (and is documented not to support) put operations - * relative to a cursor which references an already deleted - * item. For consistency, apply the same criteria to off-page - * duplicates as well. - */ - if (dbc->dbtype == DB_HASH && F_ISSET( - ((BTREE_CURSOR *)(dbc->internal->opd->internal)), - C_DELETED)) { - ret = DB_NOTFOUND; - goto err; - } - - if ((ret = dbc->am_writelock(dbc)) != 0 || - (ret = __dbc_dup(dbc, &dbc_n, DB_POSITION)) != 0) - goto err; - opd = dbc_n->internal->opd; - if ((ret = opd->am_put( - opd, key, data, flags, NULL)) != 0) - goto err; - goto done; - } - - /* - * Perform an operation on the main cursor. Duplicate the cursor, - * and call the underlying function. - */ - if (flags == DB_AFTER || flags == DB_BEFORE || flags == DB_CURRENT) - tmp_flags = DB_POSITION; - else - tmp_flags = 0; - - /* - * If this cursor is going to be closed immediately, we don't - * need to take precautions to clean it up on error. - */ - if (F_ISSET(dbc, DBC_TRANSIENT | DBC_PARTITIONED)) - dbc_n = dbc; - else if ((ret = __dbc_idup(dbc, &dbc_n, tmp_flags)) != 0) - goto err; - - pgno = PGNO_INVALID; - if ((ret = dbc_n->am_put(dbc_n, key, data, flags, &pgno)) != 0) - goto err; - - /* - * We may be referencing a new off-page duplicates tree. Acquire - * a new cursor and call the underlying function. - */ - if (pgno != PGNO_INVALID) { - oldopd = dbc_n->internal->opd; - if ((ret = __dbc_newopd(dbc, pgno, oldopd, &opd)) != 0) { - dbc_n->internal->opd = opd; - goto err; - } - - dbc_n->internal->opd = opd; - opd->internal->pdbc = dbc_n; - - if (flags == DB_NOOVERWRITE) - flags = DB_KEYLAST; - if ((ret = opd->am_put( - opd, key, data, flags, NULL)) != 0) - goto err; - } - -done: -err: /* Cleanup and cursor resolution. */ - if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __dbc_del_oldskey -- - * Delete an old secondary key, if necessary. - * Returns DB_KEYEXIST if the new and old keys match.. - */ -static int -__dbc_del_oldskey(sdbp, dbc, skey, pkey, olddata) - DB *sdbp; - DBC *dbc; - DBT *skey, *pkey, *olddata; -{ - DB *dbp; - DBC *sdbc; - DBT *toldskeyp, *tskeyp; - DBT oldskey, temppkey, tempskey; - ENV *env; - int ret, t_ret; - u_int32_t i, noldskey, nsame, nskey, rmw; - - sdbc = NULL; - dbp = sdbp->s_primary; - env = dbp->env; - nsame = 0; - rmw = STD_LOCKING(dbc) ? DB_RMW : 0; - - /* - * Get the old secondary key. - */ - memset(&oldskey, 0, sizeof(DBT)); - if ((ret = sdbp->s_callback(sdbp, pkey, olddata, &oldskey)) != 0) { - if (ret == DB_DONOTINDEX || - (F_ISSET(&oldskey, DB_DBT_MULTIPLE) && oldskey.size == 0)) - /* There's no old key to delete. */ - ret = 0; - return (ret); - } - - if (F_ISSET(&oldskey, DB_DBT_MULTIPLE)) { -#ifdef DIAGNOSTIC - __db_check_skeyset(sdbp, &oldskey); -#endif - toldskeyp = (DBT *)oldskey.data; - noldskey = oldskey.size; - } else { - toldskeyp = &oldskey; - noldskey = 1; - } - - if (F_ISSET(skey, DB_DBT_MULTIPLE)) { - nskey = skey->size; - skey = (DBT *)skey->data; - } else - nskey = F_ISSET(skey, DB_DBT_ISSET) ? 1 : 0; - - for (; noldskey > 0 && ret == 0; noldskey--, toldskeyp++) { - /* - * Check whether this old secondary key is also a new key - * before we delete it. Note that bt_compare is (and must be) - * set no matter what access method we're in. - */ - for (i = 0, tskeyp = skey; i < nskey; i++, tskeyp++) - if (((BTREE *)sdbp->bt_internal)->bt_compare(sdbp, - toldskeyp, tskeyp) == 0) { - nsame++; - F_CLR(tskeyp, DB_DBT_ISSET); - break; - } - - if (i < nskey) { - FREE_IF_NEEDED(env, toldskeyp); - continue; - } - - if (sdbc == NULL) { - if ((ret = __db_cursor_int(sdbp, - dbc->thread_info, dbc->txn, sdbp->type, - PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0) - goto err; - if (CDB_LOCKING(env)) { - DB_ASSERT(env, - sdbc->mylock.off == LOCK_INVALID); - F_SET(sdbc, DBC_WRITER); - } - } - - /* - * Don't let c_get(DB_GET_BOTH) stomp on our data. Use - * temporary DBTs instead. - */ - SWAP_IF_NEEDED(sdbp, pkey); - DB_INIT_DBT(temppkey, pkey->data, pkey->size); - DB_INIT_DBT(tempskey, toldskeyp->data, toldskeyp->size); - if ((ret = __dbc_get(sdbc, - &tempskey, &temppkey, rmw | DB_GET_BOTH)) == 0) - ret = __dbc_del(sdbc, DB_UPDATE_SECONDARY); - else if (ret == DB_NOTFOUND) - ret = __db_secondary_corrupt(dbp); - SWAP_IF_NEEDED(sdbp, pkey); - FREE_IF_NEEDED(env, toldskeyp); - } - -err: for (; noldskey > 0; noldskey--, toldskeyp++) - FREE_IF_NEEDED(env, toldskeyp); - FREE_IF_NEEDED(env, &oldskey); - if (sdbc != NULL && (t_ret = __dbc_close(sdbc)) != 0 && ret == 0) - ret = t_ret; - if (ret == 0 && nsame == nskey) - return (DB_KEYEXIST); - return (ret); -} - -/* - * __db_duperr() - * Error message: we don't currently support sorted duplicate duplicates. - * PUBLIC: int __db_duperr __P((DB *, u_int32_t)); - */ -int -__db_duperr(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - /* - * If we run into this error while updating a secondary index, - * don't yell--there's no clean way to pass DB_NODUPDATA in along - * with DB_UPDATE_SECONDARY, but we may run into this problem - * in a normal, non-error course of events. - * - * !!! - * If and when we ever permit duplicate duplicates in sorted-dup - * databases, we need to either change the secondary index code - * to check for dup dups, or we need to maintain the implicit - * "DB_NODUPDATA" behavior for databases with DB_AM_SECONDARY set. - */ - if (flags != DB_NODUPDATA && !F_ISSET(dbp, DB_AM_SECONDARY)) - __db_errx(dbp->env, - "Duplicate data items are not supported with sorted data"); - return (DB_KEYEXIST); -} - -/* - * __dbc_cleanup -- - * Clean up duplicate cursors. - * - * PUBLIC: int __dbc_cleanup __P((DBC *, DBC *, int)); - */ -int -__dbc_cleanup(dbc, dbc_n, failed) - DBC *dbc, *dbc_n; - int failed; -{ - DB *dbp; - DBC *opd; - DBC_INTERNAL *internal; - DB_MPOOLFILE *mpf; - int ret, t_ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - internal = dbc->internal; - ret = 0; - - /* Discard any pages we're holding. */ - if (internal->page != NULL) { - if ((t_ret = __memp_fput(mpf, dbc->thread_info, - internal->page, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - internal->page = NULL; - } - opd = internal->opd; - if (opd != NULL && opd->internal->page != NULL) { - if ((t_ret = __memp_fput(mpf, dbc->thread_info, - opd->internal->page, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - opd->internal->page = NULL; - } - - /* - * If dbc_n is NULL, there's no internal cursor swapping to be done - * and no dbc_n to close--we probably did the entire operation on an - * offpage duplicate cursor. Just return. - * - * If dbc and dbc_n are the same, we're either inside a DB->{put/get} - * operation, and as an optimization we performed the operation on - * the main cursor rather than on a duplicated one, or we're in a - * bulk get that can't have moved the cursor (DB_MULTIPLE with the - * initial c_get operation on an off-page dup cursor). Just - * return--either we know we didn't move the cursor, or we're going - * to close it before we return to application code, so we're sure - * not to visibly violate the "cursor stays put on error" rule. - */ - if (dbc_n == NULL || dbc == dbc_n) - return (ret); - - if (dbc_n->internal->page != NULL) { - if ((t_ret = __memp_fput(mpf, dbc->thread_info, - dbc_n->internal->page, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - dbc_n->internal->page = NULL; - } - opd = dbc_n->internal->opd; - if (opd != NULL && opd->internal->page != NULL) { - if ((t_ret = __memp_fput(mpf, dbc->thread_info, - opd->internal->page, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - opd->internal->page = NULL; - } - - /* - * If we didn't fail before entering this routine or just now when - * freeing pages, swap the interesting contents of the old and new - * cursors. - */ - if (!failed && ret == 0) { - if (opd != NULL) - opd->internal->pdbc = dbc; - if (internal->opd != NULL) - internal->opd->internal->pdbc = dbc_n; - dbc->internal = dbc_n->internal; - dbc_n->internal = internal; - } - - /* - * Close the cursor we don't care about anymore. The close can fail, - * but we only expect DB_LOCK_DEADLOCK failures. This violates our - * "the cursor is unchanged on error" semantics, but since all you can - * do with a DB_LOCK_DEADLOCK failure is close the cursor, I believe - * that's OK. - * - * XXX - * There's no way to recover from failure to close the old cursor. - * All we can do is move to the new position and return an error. - * - * XXX - * We might want to consider adding a flag to the cursor, so that any - * subsequent operations other than close just return an error? - */ - if ((t_ret = __dbc_close(dbc_n)) != 0 && ret == 0) - ret = t_ret; - - /* - * If this was an update that is supporting dirty reads - * then we may have just swapped our read for a write lock - * which is held by the surviving cursor. We need - * to explicitly downgrade this lock. The closed cursor - * may only have had a read lock. - */ - if (F_ISSET(dbp, DB_AM_READ_UNCOMMITTED) && - dbc->internal->lock_mode == DB_LOCK_WRITE) { - if ((t_ret = - __TLPUT(dbc, dbc->internal->lock)) != 0 && ret == 0) - ret = t_ret; - if (t_ret == 0) - dbc->internal->lock_mode = DB_LOCK_WWRITE; - if (dbc->internal->page != NULL && (t_ret = - __memp_shared(dbp->mpf, dbc->internal->page)) != 0 && - ret == 0) - ret = t_ret; - } - - return (ret); -} - -/* - * __dbc_secondary_get_pp -- - * This wrapper function for DBC->pget() is the DBC->get() function - * for a secondary index cursor. - * - * PUBLIC: int __dbc_secondary_get_pp __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_secondary_get_pp(dbc, skey, data, flags) - DBC *dbc; - DBT *skey, *data; - u_int32_t flags; -{ - DB_ASSERT(dbc->env, F_ISSET(dbc->dbp, DB_AM_SECONDARY)); - return (__dbc_pget_pp(dbc, skey, NULL, data, flags)); -} - -/* - * __dbc_pget -- - * Get a primary key/data pair through a secondary index. - * - * PUBLIC: int __dbc_pget __P((DBC *, DBT *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_pget(dbc, skey, pkey, data, flags) - DBC *dbc; - DBT *skey, *pkey, *data; - u_int32_t flags; -{ - DB *pdbp, *sdbp; - DBC *dbc_n, *pdbc; - DBT nullpkey; - u_int32_t save_pkey_flags, tmp_flags, tmp_read_locking, tmp_rmw; - int pkeymalloc, ret, t_ret; - - sdbp = dbc->dbp; - pdbp = sdbp->s_primary; - dbc_n = NULL; - pkeymalloc = t_ret = 0; - - /* - * The challenging part of this function is getting the behavior - * right for all the various permutations of DBT flags. The - * next several blocks handle the various cases we need to - * deal with specially. - */ - - /* - * We may be called with a NULL pkey argument, if we've been - * wrapped by a 2-DBT get call. If so, we need to use our - * own DBT. - */ - if (pkey == NULL) { - memset(&nullpkey, 0, sizeof(DBT)); - pkey = &nullpkey; - } - - /* Clear OR'd in additional bits so we can check for flag equality. */ - tmp_rmw = LF_ISSET(DB_RMW); - LF_CLR(DB_RMW); - - SET_READ_LOCKING_FLAGS(dbc, tmp_read_locking); - /* - * DB_GET_RECNO is a special case, because we're interested not in - * the primary key/data pair, but rather in the primary's record - * number. - */ - if (flags == DB_GET_RECNO) { - if (tmp_rmw) - F_SET(dbc, DBC_RMW); - F_SET(dbc, tmp_read_locking); - ret = __dbc_pget_recno(dbc, pkey, data, flags); - if (tmp_rmw) - F_CLR(dbc, DBC_RMW); - /* Clear the temp flags, but leave WAS_READ_COMMITTED. */ - F_CLR(dbc, tmp_read_locking & ~DBC_WAS_READ_COMMITTED); - return (ret); - } - - /* - * If the DBTs we've been passed don't have any of the - * user-specified memory management flags set, we want to make sure - * we return values using the DBTs dbc->rskey, dbc->rkey, and - * dbc->rdata, respectively. - * - * There are two tricky aspects to this: first, we need to pass - * skey and pkey *in* to the initial c_get on the secondary key, - * since either or both may be looked at by it (depending on the - * get flag). Second, we must not use a normal DB->get call - * on the secondary, even though that's what we want to accomplish, - * because the DB handle may be free-threaded. Instead, - * we open a cursor, then take steps to ensure that we actually use - * the rkey/rdata from the *secondary* cursor. - * - * We accomplish all this by passing in the DBTs we started out - * with to the c_get, but swapping the contents of rskey and rkey, - * respectively, into rkey and rdata; __db_ret will treat them like - * the normal key/data pair in a c_get call, and will realloc them as - * need be (this is "step 1"). Then, for "step 2", we swap back - * rskey/rkey/rdata to normal, and do a get on the primary with the - * secondary dbc appointed as the owner of the returned-data memory. - * - * Note that in step 2, we copy the flags field in case we need to - * pass down a DB_DBT_PARTIAL or other flag that is compatible with - * letting DB do the memory management. - */ - - /* - * It is correct, though slightly sick, to attempt a partial get of a - * primary key. However, if we do so here, we'll never find the - * primary record; clear the DB_DBT_PARTIAL field of pkey just for the - * duration of the next call. - */ - save_pkey_flags = pkey->flags; - F_CLR(pkey, DB_DBT_PARTIAL); - - /* - * Now we can go ahead with the meat of this call. First, get the - * primary key from the secondary index. (What exactly we get depends - * on the flags, but the underlying cursor get will take care of the - * dirty work.) Duplicate the cursor, in case the later get on the - * primary fails. - */ - switch (flags) { - case DB_CURRENT: - case DB_GET_BOTHC: - case DB_NEXT: - case DB_NEXT_DUP: - case DB_NEXT_NODUP: - case DB_PREV: - case DB_PREV_DUP: - case DB_PREV_NODUP: - tmp_flags = DB_POSITION; - break; - default: - tmp_flags = 0; - break; - } - - if (F_ISSET(dbc, DBC_PARTITIONED | DBC_TRANSIENT)) - dbc_n = dbc; - else if ((ret = __dbc_dup(dbc, &dbc_n, tmp_flags)) != 0) - return (ret); - - F_SET(dbc_n, DBC_TRANSIENT); - - if (tmp_rmw) - F_SET(dbc_n, DBC_RMW); - F_SET(dbc_n, tmp_read_locking); - - /* - * If we've been handed a primary key, it will be in native byte order, - * so we need to swap it before reading from the secondary. - */ - if (flags == DB_GET_BOTH || flags == DB_GET_BOTHC || - flags == DB_GET_BOTH_RANGE) - SWAP_IF_NEEDED(sdbp, pkey); - -retry: /* Step 1. */ - dbc_n->rdata = dbc->rkey; - dbc_n->rkey = dbc->rskey; - ret = __dbc_get(dbc_n, skey, pkey, flags); - /* Restore pkey's flags in case we stomped the PARTIAL flag. */ - pkey->flags = save_pkey_flags; - - /* - * We need to swap the primary key to native byte order if we read it - * successfully, or if we swapped it on entry above. We can't return - * with the application's data modified. - */ - if (ret == 0 || flags == DB_GET_BOTH || flags == DB_GET_BOTHC || - flags == DB_GET_BOTH_RANGE) - SWAP_IF_NEEDED(sdbp, pkey); - - if (ret != 0) - goto err; - - /* - * Now we're ready for "step 2". If either or both of pkey and data do - * not have memory management flags set--that is, if DB is managing - * their memory--we need to swap around the rkey/rdata structures so - * that we don't wind up trying to use memory managed by the primary - * database cursor, which we'll close before we return. - * - * !!! - * If you're carefully following the bouncing ball, you'll note that in - * the DB-managed case, the buffer hanging off of pkey is the same as - * dbc->rkey->data. This is just fine; we may well realloc and stomp - * on it when we return, if we're doing a DB_GET_BOTH and need to - * return a different partial or key (depending on the comparison - * function), but this is safe. - * - * !!! - * We need to use __db_cursor_int here rather than simply calling - * pdbp->cursor, because otherwise, if we're in CDB, we'll allocate a - * new locker ID and leave ourselves open to deadlocks. (Even though - * we're only acquiring read locks, we'll still block if there are any - * waiters.) - */ - if ((ret = __db_cursor_int(pdbp, dbc->thread_info, - dbc->txn, pdbp->type, PGNO_INVALID, 0, dbc->locker, &pdbc)) != 0) - goto err; - - F_SET(pdbc, tmp_read_locking | - F_ISSET(dbc, DBC_READ_UNCOMMITTED | DBC_READ_COMMITTED | DBC_RMW)); - - /* - * We're about to use pkey a second time. If DB_DBT_MALLOC is set on - * it, we'll leak the memory we allocated the first time. Thus, set - * DB_DBT_REALLOC instead so that we reuse that memory instead of - * leaking it. - * - * Alternatively, if the application is handling copying for pkey, we - * need to take a copy now. The copy will be freed on exit from - * __dbc_pget_pp (and we must be coming through there if DB_DBT_USERCOPY - * is set). In the case of DB_GET_BOTH_RANGE, the pkey supplied by - * the application has already been copied in but the value may have - * changed in the search. In that case, free the original copy and get - * a new one. - * - * !!! - * This assumes that the user must always specify a compatible realloc - * function if a malloc function is specified. I think this is a - * reasonable requirement. - */ - if (F_ISSET(pkey, DB_DBT_MALLOC)) { - F_CLR(pkey, DB_DBT_MALLOC); - F_SET(pkey, DB_DBT_REALLOC); - pkeymalloc = 1; - } else if (F_ISSET(pkey, DB_DBT_USERCOPY)) { - if (flags == DB_GET_BOTH_RANGE) - __dbt_userfree(sdbp->env, NULL, pkey, NULL); - if ((ret = __dbt_usercopy(sdbp->env, pkey)) != 0) - goto err; - } - - /* - * Do the actual get. Set DBC_TRANSIENT since we don't care about - * preserving the position on error, and it's faster. SET_RET_MEM so - * that the secondary DBC owns any returned-data memory. - */ - F_SET(pdbc, DBC_TRANSIENT); - SET_RET_MEM(pdbc, dbc); - ret = __dbc_get(pdbc, pkey, data, DB_SET); - - /* - * If the item wasn't found in the primary, this is a bug; our - * secondary has somehow gotten corrupted, and contains elements that - * don't correspond to anything in the primary. Complain. - */ - - /* Now close the primary cursor. */ - if ((t_ret = __dbc_close(pdbc)) != 0 && ret == 0) - ret = t_ret; - - else if (ret == DB_NOTFOUND) { - if (!F_ISSET(pdbc, DBC_READ_UNCOMMITTED)) - ret = __db_secondary_corrupt(pdbp); - else switch (flags) { - case DB_GET_BOTHC: - case DB_NEXT: - case DB_NEXT_DUP: - case DB_NEXT_NODUP: - case DB_PREV: - case DB_PREV_DUP: - case DB_PREV_NODUP: - goto retry; - default: - break; - } - } - -err: /* Cleanup and cursor resolution. */ - if ((t_ret = __dbc_cleanup(dbc, dbc_n, ret)) != 0 && ret == 0) - ret = t_ret; - if (pkeymalloc) { - /* - * If pkey had a MALLOC flag, we need to restore it; otherwise, - * if the user frees the buffer but reuses the DBT without - * NULL'ing its data field or changing the flags, we may drop - * core. - */ - F_CLR(pkey, DB_DBT_REALLOC); - F_SET(pkey, DB_DBT_MALLOC); - } - - return (ret); -} - -/* - * __dbc_pget_recno -- - * Perform a DB_GET_RECNO c_pget on a secondary index. Returns - * the secondary's record number in the pkey field and the primary's - * in the data field. - */ -static int -__dbc_pget_recno(sdbc, pkey, data, flags) - DBC *sdbc; - DBT *pkey, *data; - u_int32_t flags; -{ - DB *pdbp, *sdbp; - DBC *pdbc; - DBT discardme, primary_key; - ENV *env; - db_recno_t oob; - u_int32_t rmw; - int ret, t_ret; - - sdbp = sdbc->dbp; - pdbp = sdbp->s_primary; - env = sdbp->env; - pdbc = NULL; - ret = t_ret = 0; - - rmw = LF_ISSET(DB_RMW); - - memset(&discardme, 0, sizeof(DBT)); - F_SET(&discardme, DB_DBT_USERMEM | DB_DBT_PARTIAL); - - oob = RECNO_OOB; - - /* - * If the primary is an rbtree, we want its record number, whether - * or not the secondary is one too. Fetch the recno into "data". - * - * If it's not an rbtree, return RECNO_OOB in "data". - */ - if (F_ISSET(pdbp, DB_AM_RECNUM)) { - /* - * Get the primary key, so we can find the record number - * in the primary. (We're uninterested in the secondary key.) - */ - memset(&primary_key, 0, sizeof(DBT)); - F_SET(&primary_key, DB_DBT_MALLOC); - if ((ret = __dbc_get(sdbc, - &discardme, &primary_key, rmw | DB_CURRENT)) != 0) - return (ret); - - /* - * Open a cursor on the primary, set it to the right record, - * and fetch its recno into "data". - * - * (See __dbc_pget for comments on the use of __db_cursor_int.) - * - * SET_RET_MEM so that the secondary DBC owns any returned-data - * memory. - */ - if ((ret = __db_cursor_int(pdbp, sdbc->thread_info, sdbc->txn, - pdbp->type, PGNO_INVALID, 0, sdbc->locker, &pdbc)) != 0) - goto perr; - SET_RET_MEM(pdbc, sdbc); - if ((ret = __dbc_get(pdbc, - &primary_key, &discardme, rmw | DB_SET)) != 0) - goto perr; - - ret = __dbc_get(pdbc, &discardme, data, rmw | DB_GET_RECNO); - -perr: __os_ufree(env, primary_key.data); - if (pdbc != NULL && - (t_ret = __dbc_close(pdbc)) != 0 && ret == 0) - ret = t_ret; - if (ret != 0) - return (ret); - } else if ((ret = __db_retcopy(env, data, &oob, - sizeof(oob), &sdbc->rkey->data, &sdbc->rkey->ulen)) != 0) - return (ret); - - /* - * If the secondary is an rbtree, we want its record number, whether - * or not the primary is one too. Fetch the recno into "pkey". - * - * If it's not an rbtree, return RECNO_OOB in "pkey". - */ - if (F_ISSET(sdbp, DB_AM_RECNUM)) - return (__dbc_get(sdbc, &discardme, pkey, flags)); - else - return (__db_retcopy(env, pkey, &oob, - sizeof(oob), &sdbc->rdata->data, &sdbc->rdata->ulen)); -} - -/* - * __db_wrlock_err -- do not have a write lock. - */ -static int -__db_wrlock_err(env) - ENV *env; -{ - __db_errx(env, "Write attempted on read-only cursor"); - return (EPERM); -} - -/* - * __dbc_del_secondary -- - * Perform a delete operation on a secondary index: call through - * to the primary and delete the primary record that this record - * points to. - * - * Note that deleting the primary record will call c_del on all - * the secondaries, including this one; thus, it is not necessary - * to execute both this function and an actual delete. - */ -static int -__dbc_del_secondary(dbc) - DBC *dbc; -{ - DB *pdbp; - DBC *pdbc; - DBT skey, pkey; - ENV *env; - int ret, t_ret; - u_int32_t rmw; - - pdbp = dbc->dbp->s_primary; - env = pdbp->env; - rmw = STD_LOCKING(dbc) ? DB_RMW : 0; - - /* - * Get the current item that we're pointing at. - * We don't actually care about the secondary key, just - * the primary. - */ - memset(&skey, 0, sizeof(DBT)); - memset(&pkey, 0, sizeof(DBT)); - F_SET(&skey, DB_DBT_PARTIAL | DB_DBT_USERMEM); - if ((ret = __dbc_get(dbc, &skey, &pkey, DB_CURRENT)) != 0) - return (ret); - - SWAP_IF_NEEDED(dbc->dbp, &pkey); - - /* - * Create a cursor on the primary with our locker ID, - * so that when it calls back, we don't conflict. - * - * We create a cursor explicitly because there's no - * way to specify the same locker ID if we're using - * locking but not transactions if we use the DB->del - * interface. This shouldn't be any less efficient - * anyway. - */ - if ((ret = __db_cursor_int(pdbp, dbc->thread_info, dbc->txn, - pdbp->type, PGNO_INVALID, 0, dbc->locker, &pdbc)) != 0) - return (ret); - - /* - * See comment in __dbc_put--if we're in CDB, - * we already hold the locks we need, and we need to flag - * the cursor as a WRITER so we don't run into errors - * when we try to delete. - */ - if (CDB_LOCKING(env)) { - DB_ASSERT(env, pdbc->mylock.off == LOCK_INVALID); - F_SET(pdbc, DBC_WRITER); - } - - /* - * Set the new cursor to the correct primary key. Then - * delete it. We don't really care about the datum; - * just reuse our skey DBT. - * - * If the primary get returns DB_NOTFOUND, something is amiss-- - * every record in the secondary should correspond to some record - * in the primary. - */ - if ((ret = __dbc_get(pdbc, &pkey, &skey, DB_SET | rmw)) == 0) - ret = __dbc_del(pdbc, 0); - else if (ret == DB_NOTFOUND) - ret = __db_secondary_corrupt(pdbp); - - if ((t_ret = __dbc_close(pdbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __dbc_del_primary -- - * Perform a delete operation on a primary index. Loop through - * all the secondary indices which correspond to this primary - * database, and delete any secondary keys that point at the current - * record. - * - * PUBLIC: int __dbc_del_primary __P((DBC *)); - */ -int -__dbc_del_primary(dbc) - DBC *dbc; -{ - DB *dbp, *sdbp; - DBC *sdbc; - DBT *tskeyp; - DBT data, pkey, skey, temppkey, tempskey; - ENV *env; - u_int32_t nskey, rmw; - int ret, t_ret; - - dbp = dbc->dbp; - env = dbp->env; - sdbp = NULL; - rmw = STD_LOCKING(dbc) ? DB_RMW : 0; - - /* - * If we're called at all, we have at least one secondary. - * (Unfortunately, we can't assert this without grabbing the mutex.) - * Get the current record so that we can construct appropriate - * secondary keys as needed. - */ - memset(&pkey, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - if ((ret = __dbc_get(dbc, &pkey, &data, DB_CURRENT)) != 0) - return (ret); - - memset(&skey, 0, sizeof(DBT)); - for (ret = __db_s_first(dbp, &sdbp); - sdbp != NULL && ret == 0; - ret = __db_s_next(&sdbp, dbc->txn)) { - /* - * Get the secondary key for this secondary and the current - * item. - */ - if ((ret = sdbp->s_callback(sdbp, &pkey, &data, &skey)) != 0) { - /* Not indexing is equivalent to an empty key set. */ - if (ret == DB_DONOTINDEX) { - F_SET(&skey, DB_DBT_MULTIPLE); - skey.size = 0; - } else /* We had a substantive error. Bail. */ - goto err; - } - -#ifdef DIAGNOSTIC - if (F_ISSET(&skey, DB_DBT_MULTIPLE)) - __db_check_skeyset(sdbp, &skey); -#endif - - if (F_ISSET(&skey, DB_DBT_MULTIPLE)) { - tskeyp = (DBT *)skey.data; - nskey = skey.size; - if (nskey == 0) - continue; - } else { - tskeyp = &skey; - nskey = 1; - } - - /* Open a secondary cursor. */ - if ((ret = __db_cursor_int(sdbp, - dbc->thread_info, dbc->txn, sdbp->type, - PGNO_INVALID, 0, dbc->locker, &sdbc)) != 0) - goto err; - /* See comment above and in __dbc_put. */ - if (CDB_LOCKING(env)) { - DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID); - F_SET(sdbc, DBC_WRITER); - } - - for (; nskey > 0; nskey--, tskeyp++) { - /* - * Set the secondary cursor to the appropriate item. - * Delete it. - * - * We want to use DB_RMW if locking is on; it's only - * legal then, though. - * - * !!! - * Don't stomp on any callback-allocated buffer in skey - * when we do a c_get(DB_GET_BOTH); use a temp DBT - * instead. Similarly, don't allow pkey to be - * invalidated when the cursor is closed. - */ - DB_INIT_DBT(tempskey, tskeyp->data, tskeyp->size); - SWAP_IF_NEEDED(sdbp, &pkey); - DB_INIT_DBT(temppkey, pkey.data, pkey.size); - if ((ret = __dbc_get(sdbc, &tempskey, &temppkey, - DB_GET_BOTH | rmw)) == 0) - ret = __dbc_del(sdbc, DB_UPDATE_SECONDARY); - else if (ret == DB_NOTFOUND) - ret = __db_secondary_corrupt(dbp); - SWAP_IF_NEEDED(sdbp, &pkey); - FREE_IF_NEEDED(env, tskeyp); - } - - if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) - ret = t_ret; - if (ret != 0) - goto err; - - /* - * In the common case where there is a single secondary key, we - * will have freed any application-allocated data in skey - * already. In the multiple key case, we need to free it here. - * It is safe to do this twice as the macro resets the data - * field. - */ - FREE_IF_NEEDED(env, &skey); - } - -err: if (sdbp != NULL && - (t_ret = __db_s_done(sdbp, dbc->txn)) != 0 && ret == 0) - ret = t_ret; - FREE_IF_NEEDED(env, &skey); - return (ret); -} - -/* - * __dbc_del_foreign -- - * Apply the foreign database constraints for a particular foreign - * database when an item is being deleted (dbc points at item being deleted - * in the foreign database.) - * - * Delete happens in dbp, check for occurrences of key in pdpb. - * Terminology: - * Foreign db = Where delete occurs (dbp). - * Secondary db = Where references to dbp occur (sdbp, a secondary) - * Primary db = sdbp's primary database, references to dbp are secondary - * keys here - * Foreign Key = Key being deleted in dbp (fkey) - * Primary Key = Key of the corresponding entry in sdbp's primary (pkey). - */ -static int -__dbc_del_foreign(dbc) - DBC *dbc; -{ - DB_FOREIGN_INFO *f_info; - DB *dbp, *pdbp, *sdbp; - DBC *pdbc, *sdbc; - DBT data, fkey, pkey; - ENV *env; - u_int32_t flags, rmw; - int changed, ret, t_ret; - - dbp = dbc->dbp; - env = dbp->env; - - memset(&fkey, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - if ((ret = __dbc_get(dbc, &fkey, &data, DB_CURRENT)) != 0) - return (ret); - - LIST_FOREACH(f_info, &(dbp->f_primaries), f_links) { - sdbp = f_info->dbp; - pdbp = sdbp->s_primary; - flags = f_info->flags; - - rmw = (STD_LOCKING(dbc) && - !LF_ISSET(DB_FOREIGN_ABORT)) ? DB_RMW : 0; - - /* - * Handle CDB locking. Some of this is copied from - * __dbc_del_primary, but a bit more acrobatics are required. - * If we're not going to abort, then we need to get a write - * cursor. If CDB_ALLDB is set, then only one write cursor is - * allowed and we hold it, so we fudge things and promote the - * cursor on the other DBs manually, it won't cause a problem. - * If CDB_ALLDB is not set, then we go through the usual route - * to make sure we block as necessary. If there are any open - * read cursors on sdbp, the delete or put call later will - * block. - * - * If NULLIFY is set, we'll need a cursor on the primary to - * update it with the nullified data. Because primary and - * secondary dbs share a lock file ID in CDB, we open a cursor - * on the secondary and then get another writeable cursor on the - * primary via __db_cursor_int to avoid deadlocking. - */ - sdbc = pdbc = NULL; - if (!LF_ISSET(DB_FOREIGN_ABORT) && CDB_LOCKING(env) && - !F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) { - ret = __db_cursor(sdbp, - dbc->thread_info, dbc->txn, &sdbc, DB_WRITECURSOR); - if (LF_ISSET(DB_FOREIGN_NULLIFY) && ret == 0) { - ret = __db_cursor_int(pdbp, - dbc->thread_info, dbc->txn, pdbp->type, - PGNO_INVALID, 0, dbc->locker, &pdbc); - F_SET(pdbc, DBC_WRITER); - } - } else { - ret = __db_cursor_int(sdbp, dbc->thread_info, dbc->txn, - sdbp->type, PGNO_INVALID, 0, dbc->locker, &sdbc); - if (LF_ISSET(DB_FOREIGN_NULLIFY) && ret == 0) - ret = __db_cursor_int(pdbp, dbc->thread_info, - dbc->txn, pdbp->type, PGNO_INVALID, 0, - dbc->locker, &pdbc); - } - if (ret != 0) { - if (sdbc != NULL) - (void)__dbc_close(sdbc); - return (ret); - } - if (CDB_LOCKING(env) && F_ISSET(env->dbenv, DB_ENV_CDB_ALLDB)) { - DB_ASSERT(env, sdbc->mylock.off == LOCK_INVALID); - F_SET(sdbc, DBC_WRITER); - if (LF_ISSET(DB_FOREIGN_NULLIFY) && pdbc != NULL) { - DB_ASSERT(env, - pdbc->mylock.off == LOCK_INVALID); - F_SET(pdbc, DBC_WRITER); - } - } - - /* - * There are three actions possible when a foreign database has - * items corresponding to a deleted item: - * DB_FOREIGN_ABORT - The delete operation should be aborted. - * DB_FOREIGN_CASCADE - All corresponding foreign items should - * be deleted. - * DB_FOREIGN_NULLIFY - A callback needs to be made, allowing - * the application to modify the data DBT from the - * associated database. If the callback makes a - * modification, the updated item needs to replace the - * original item in the foreign db - */ - memset(&pkey, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - ret = __dbc_pget(sdbc, &fkey, &pkey, &data, DB_SET|rmw); - - if (ret == DB_NOTFOUND) { - /* No entry means no constraint */ - ret = __dbc_close(sdbc); - if (LF_ISSET(DB_FOREIGN_NULLIFY) && - (t_ret = __dbc_close(pdbc)) != 0) - ret = t_ret; - if (ret != 0) - return (ret); - continue; - } else if (ret != 0) { - /* Just return the error code from the pget */ - (void)__dbc_close(sdbc); - if (LF_ISSET(DB_FOREIGN_NULLIFY)) - (void)__dbc_close(pdbc); - return (ret); - } else if (LF_ISSET(DB_FOREIGN_ABORT)) { - /* If the record exists and ABORT is set, we're done */ - if ((ret = __dbc_close(sdbc)) != 0) - return (ret); - return (DB_FOREIGN_CONFLICT); - } - - /* - * There were matching items in the primary DB, and the action - * is either DB_FOREIGN_CASCADE or DB_FOREIGN_NULLIFY. - */ - while (ret == 0) { - if (LF_ISSET(DB_FOREIGN_CASCADE)) { - /* - * Don't use the DB_UPDATE_SECONDARY flag, - * since we want the delete to cascade into the - * secondary's primary. - */ - if ((ret = __dbc_del(sdbc, 0)) != 0) { - __db_err(env, ret, - "Attempt to execute cascading delete in a foreign index failed"); - break; - } - } else if (LF_ISSET(DB_FOREIGN_NULLIFY)) { - changed = 0; - if ((ret = f_info->callback(sdbp, - &pkey, &data, &fkey, &changed)) != 0) { - __db_err(env, ret, - "Foreign database application callback"); - break; - } - - /* - * If the user callback modified the DBT and - * a put on the primary failed. - */ - if (changed && (ret = __dbc_put(pdbc, - &pkey, &data, DB_KEYFIRST)) != 0) { - __db_err(env, ret, - "Attempt to overwrite item in foreign database with nullified value failed"); - break; - } - } - /* retrieve the next matching item from the prim. db */ - memset(&pkey, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - ret = __dbc_pget(sdbc, - &fkey, &pkey, &data, DB_NEXT_DUP|rmw); - } - - if (ret == DB_NOTFOUND) - ret = 0; - if ((t_ret = __dbc_close(sdbc)) != 0 && ret == 0) - ret = t_ret; - if (LF_ISSET(DB_FOREIGN_NULLIFY) && - (t_ret = __dbc_close(pdbc)) != 0 && ret == 0) - ret = t_ret; - if (ret != 0) - return (ret); - } - - return (ret); -} - -/* - * __db_s_first -- - * Get the first secondary, if any are present, from the primary. - * - * PUBLIC: int __db_s_first __P((DB *, DB **)); - */ -int -__db_s_first(pdbp, sdbpp) - DB *pdbp, **sdbpp; -{ - DB *sdbp; - - MUTEX_LOCK(pdbp->env, pdbp->mutex); - sdbp = LIST_FIRST(&pdbp->s_secondaries); - - /* See __db_s_next. */ - if (sdbp != NULL) - sdbp->s_refcnt++; - MUTEX_UNLOCK(pdbp->env, pdbp->mutex); - - *sdbpp = sdbp; - - return (0); -} - -/* - * __db_s_next -- - * Get the next secondary in the list. - * - * PUBLIC: int __db_s_next __P((DB **, DB_TXN *)); - */ -int -__db_s_next(sdbpp, txn) - DB **sdbpp; - DB_TXN *txn; -{ - DB *sdbp, *pdbp, *closeme; - ENV *env; - int ret; - - /* - * Secondary indices are kept in a linked list, s_secondaries, - * off each primary DB handle. If a primary is free-threaded, - * this list may only be traversed or modified while the primary's - * thread mutex is held. - * - * The tricky part is that we don't want to hold the thread mutex - * across the full set of secondary puts necessary for each primary - * put, or we'll wind up essentially single-threading all the puts - * to the handle; the secondary puts will each take about as - * long as the primary does, and may require I/O. So we instead - * hold the thread mutex only long enough to follow one link to the - * next secondary, and then we release it before performing the - * actual secondary put. - * - * The only danger here is that we might legitimately close a - * secondary index in one thread while another thread is performing - * a put and trying to update that same secondary index. To - * prevent this from happening, we refcount the secondary handles. - * If close is called on a secondary index handle while we're putting - * to it, it won't really be closed--the refcount will simply drop, - * and we'll be responsible for closing it here. - */ - sdbp = *sdbpp; - pdbp = sdbp->s_primary; - env = pdbp->env; - closeme = NULL; - - MUTEX_LOCK(env, pdbp->mutex); - DB_ASSERT(env, sdbp->s_refcnt != 0); - if (--sdbp->s_refcnt == 0) { - LIST_REMOVE(sdbp, s_links); - closeme = sdbp; - } - sdbp = LIST_NEXT(sdbp, s_links); - if (sdbp != NULL) - sdbp->s_refcnt++; - MUTEX_UNLOCK(env, pdbp->mutex); - - *sdbpp = sdbp; - - /* - * closeme->close() is a wrapper; call __db_close explicitly. - */ - if (closeme == NULL) - ret = 0; - else - ret = __db_close(closeme, txn, 0); - - return (ret); -} - -/* - * __db_s_done -- - * Properly decrement the refcount on a secondary database handle we're - * using, without calling __db_s_next. - * - * PUBLIC: int __db_s_done __P((DB *, DB_TXN *)); - */ -int -__db_s_done(sdbp, txn) - DB *sdbp; - DB_TXN *txn; -{ - DB *pdbp; - ENV *env; - int doclose, ret; - - pdbp = sdbp->s_primary; - env = pdbp->env; - doclose = 0; - - MUTEX_LOCK(env, pdbp->mutex); - DB_ASSERT(env, sdbp->s_refcnt != 0); - if (--sdbp->s_refcnt == 0) { - LIST_REMOVE(sdbp, s_links); - doclose = 1; - } - MUTEX_UNLOCK(env, pdbp->mutex); - - if (doclose == 0) - ret = 0; - else - ret = __db_close(sdbp, txn, 0); - return (ret); -} - -/* - * __db_s_count -- - * Count the number of secondaries associated with a given primary. - */ -static int -__db_s_count(pdbp) - DB *pdbp; -{ - DB *sdbp; - ENV *env; - int count; - - env = pdbp->env; - count = 0; - - MUTEX_LOCK(env, pdbp->mutex); - for (sdbp = LIST_FIRST(&pdbp->s_secondaries); - sdbp != NULL; - sdbp = LIST_NEXT(sdbp, s_links)) - ++count; - MUTEX_UNLOCK(env, pdbp->mutex); - - return (count); -} - -/* - * __db_buildpartial -- - * Build the record that will result after a partial put is applied to - * an existing record. - * - * This should probably be merged with __bam_build, but that requires - * a little trickery if we plan to keep the overflow-record optimization - * in that function. - * - * PUBLIC: int __db_buildpartial __P((DB *, DBT *, DBT *, DBT *)); - */ -int -__db_buildpartial(dbp, oldrec, partial, newrec) - DB *dbp; - DBT *oldrec, *partial, *newrec; -{ - ENV *env; - u_int32_t len, nbytes; - u_int8_t *buf; - int ret; - - env = dbp->env; - - DB_ASSERT(env, F_ISSET(partial, DB_DBT_PARTIAL)); - - memset(newrec, 0, sizeof(DBT)); - - nbytes = __db_partsize(oldrec->size, partial); - newrec->size = nbytes; - - if ((ret = __os_malloc(env, nbytes, &buf)) != 0) - return (ret); - newrec->data = buf; - - /* Nul or pad out the buffer, for any part that isn't specified. */ - memset(buf, - F_ISSET(dbp, DB_AM_FIXEDLEN) ? ((BTREE *)dbp->bt_internal)->re_pad : - 0, nbytes); - - /* Copy in any leading data from the original record. */ - memcpy(buf, oldrec->data, - partial->doff > oldrec->size ? oldrec->size : partial->doff); - - /* Copy the data from partial. */ - memcpy(buf + partial->doff, partial->data, partial->size); - - /* Copy any trailing data from the original record. */ - len = partial->doff + partial->dlen; - if (oldrec->size > len) - memcpy(buf + partial->doff + partial->size, - (u_int8_t *)oldrec->data + len, oldrec->size - len); - - return (0); -} - -/* - * __db_partsize -- - * Given the number of bytes in an existing record and a DBT that - * is about to be partial-put, calculate the size of the record - * after the put. - * - * This code is called from __bam_partsize. - * - * PUBLIC: u_int32_t __db_partsize __P((u_int32_t, DBT *)); - */ -u_int32_t -__db_partsize(nbytes, data) - u_int32_t nbytes; - DBT *data; -{ - - /* - * There are really two cases here: - * - * Case 1: We are replacing some bytes that do not exist (i.e., they - * are past the end of the record). In this case the number of bytes - * we are replacing is irrelevant and all we care about is how many - * bytes we are going to add from offset. So, the new record length - * is going to be the size of the new bytes (size) plus wherever those - * new bytes begin (doff). - * - * Case 2: All the bytes we are replacing exist. Therefore, the new - * size is the oldsize (nbytes) minus the bytes we are replacing (dlen) - * plus the bytes we are adding (size). - */ - if (nbytes < data->doff + data->dlen) /* Case 1 */ - return (data->doff + data->size); - - return (nbytes + data->size - data->dlen); /* Case 2 */ -} - -#ifdef DIAGNOSTIC -/* - * __db_check_skeyset -- - * Diagnostic check that the application's callback returns a set of - * secondary keys without repeats. - * - * PUBLIC: #ifdef DIAGNOSTIC - * PUBLIC: void __db_check_skeyset __P((DB *, DBT *)); - * PUBLIC: #endif - */ -void -__db_check_skeyset(sdbp, skeyp) - DB *sdbp; - DBT *skeyp; -{ - DBT *firstkey, *lastkey, *key1, *key2; - ENV *env; - - env = sdbp->env; - - firstkey = (DBT *)skeyp->data; - lastkey = firstkey + skeyp->size; - for (key1 = firstkey; key1 < lastkey; key1++) - for (key2 = key1 + 1; key2 < lastkey; key2++) - DB_ASSERT(env, - ((BTREE *)sdbp->bt_internal)->bt_compare(sdbp, - key1, key2) != 0); -} -#endif diff --git a/db/db_cds.c b/db/db_cds.c deleted file mode 100644 index 5efda31..0000000 --- a/db/db_cds.c +++ /dev/null @@ -1,177 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2000-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_am.h" -#include "dbinc/lock.h" -#include "dbinc/txn.h" - -static int __cdsgroup_abort __P((DB_TXN *txn)); -static int __cdsgroup_commit __P((DB_TXN *txn, u_int32_t flags)); -static int __cdsgroup_discard __P((DB_TXN *txn, u_int32_t flags)); -static u_int32_t __cdsgroup_id __P((DB_TXN *txn)); -static int __cdsgroup_notsup __P((ENV *env, const char *meth)); -static int __cdsgroup_prepare __P((DB_TXN *txn, u_int8_t *gid)); -static int __cdsgroup_set_name __P((DB_TXN *txn, const char *name)); -static int __cdsgroup_set_timeout - __P((DB_TXN *txn, db_timeout_t timeout, u_int32_t flags)); - -/* - * __cdsgroup_notsup -- - * Error when CDS groups don't support a method. - */ -static int -__cdsgroup_notsup(env, meth) - ENV *env; - const char *meth; -{ - __db_errx(env, "CDS groups do not support %s", meth); - return (DB_OPNOTSUP); -} - -static int -__cdsgroup_abort(txn) - DB_TXN *txn; -{ - return (__cdsgroup_notsup(txn->mgrp->env, "abort")); -} - -static int -__cdsgroup_commit(txn, flags) - DB_TXN *txn; - u_int32_t flags; -{ - DB_LOCKER *locker; - DB_LOCKREQ lreq; - ENV *env; - int ret, t_ret; - - COMPQUIET(flags, 0); - env = txn->mgrp->env; - - /* Check for live cursors. */ - if (txn->cursors != 0) { - __db_errx(env, "CDS group has active cursors"); - return (EINVAL); - } - - /* We may be holding handle locks; release them. */ - lreq.op = DB_LOCK_PUT_ALL; - lreq.obj = NULL; - ret = __lock_vec(env, txn->locker, 0, &lreq, 1, NULL); - - env = txn->mgrp->env; - locker = txn->locker; - __os_free(env, txn->mgrp); - __os_free(env, txn); - if ((t_ret = __lock_id_free(env, locker)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -static int __cdsgroup_discard(txn, flags) - DB_TXN *txn; - u_int32_t flags; -{ - COMPQUIET(flags, 0); - return (__cdsgroup_notsup(txn->mgrp->env, "discard")); -} - -static u_int32_t __cdsgroup_id(txn) - DB_TXN *txn; -{ - return (txn->txnid); -} - -static int __cdsgroup_prepare(txn, gid) - DB_TXN *txn; - u_int8_t *gid; -{ - COMPQUIET(gid, NULL); - return (__cdsgroup_notsup(txn->mgrp->env, "prepare")); -} - -static int __cdsgroup_set_name(txn, name) - DB_TXN *txn; - const char *name; -{ - COMPQUIET(name, NULL); - return (__cdsgroup_notsup(txn->mgrp->env, "set_name")); -} - -static int __cdsgroup_set_timeout(txn, timeout, flags) - DB_TXN *txn; - db_timeout_t timeout; - u_int32_t flags; -{ - COMPQUIET(timeout, 0); - COMPQUIET(flags, 0); - return (__cdsgroup_notsup(txn->mgrp->env, "set_timeout")); -} - -/* - * __cds_txn_begin -- - * ENV->cdsgroup_begin - * - * PUBLIC: int __cdsgroup_begin __P((DB_ENV *, DB_TXN **)); - */ -int -__cdsgroup_begin(dbenv, txnpp) - DB_ENV *dbenv; - DB_TXN **txnpp; -{ - DB_THREAD_INFO *ip; - DB_TXN *txn; - ENV *env; - int ret; - - env = dbenv->env; - - ENV_ILLEGAL_BEFORE_OPEN(env, "cdsgroup_begin"); - if (!CDB_LOCKING(env)) - return (__env_not_config(env, "cdsgroup_begin", DB_INIT_CDB)); - - ENV_ENTER(env, ip); - *txnpp = txn = NULL; - if ((ret = __os_calloc(env, 1, sizeof(DB_TXN), &txn)) != 0) - goto err; - /* - * We need a dummy DB_TXNMGR -- it's the only way to get from a - * transaction handle to the environment handle. - */ - if ((ret = __os_calloc(env, 1, sizeof(DB_TXNMGR), &txn->mgrp)) != 0) - goto err; - txn->mgrp->env = env; - - if ((ret = __lock_id(env, &txn->txnid, &txn->locker)) != 0) - goto err; - - txn->flags = TXN_CDSGROUP; - txn->abort = __cdsgroup_abort; - txn->commit = __cdsgroup_commit; - txn->discard = __cdsgroup_discard; - txn->id = __cdsgroup_id; - txn->prepare = __cdsgroup_prepare; - txn->set_name = __cdsgroup_set_name; - txn->set_timeout = __cdsgroup_set_timeout; - - *txnpp = txn; - - if (0) { -err: if (txn != NULL) { - if (txn->mgrp != NULL) - __os_free(env, txn->mgrp); - __os_free(env, txn); - } - } - ENV_LEAVE(env, ip); - return (ret); -} diff --git a/db/db_conv.c b/db/db_conv.c deleted file mode 100644 index 4572683..0000000 --- a/db/db_conv.c +++ /dev/null @@ -1,733 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/hmac.h" -#include "dbinc/db_page.h" -#include "dbinc/db_swap.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/log.h" -#include "dbinc/qam.h" - -/* - * __db_pgin -- - * Primary page-swap routine. - * - * PUBLIC: int __db_pgin __P((DB_ENV *, db_pgno_t, void *, DBT *)); - */ -int -__db_pgin(dbenv, pg, pp, cookie) - DB_ENV *dbenv; - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - DB dummydb, *dbp; - DB_CIPHER *db_cipher; - DB_LSN not_used; - DB_PGINFO *pginfo; - ENV *env; - PAGE *pagep; - size_t sum_len; - int is_hmac, ret; - u_int8_t *chksum; - - pginfo = (DB_PGINFO *)cookie->data; - env = dbenv->env; - pagep = (PAGE *)pp; - - ret = is_hmac = 0; - chksum = NULL; - memset(&dummydb, 0, sizeof(DB)); - dbp = &dummydb; - dbp->dbenv = dbenv; - dbp->env = env; - dbp->flags = pginfo->flags; - dbp->pgsize = pginfo->db_pagesize; - db_cipher = env->crypto_handle; - switch (pagep->type) { - case P_HASHMETA: - case P_BTREEMETA: - case P_QAMMETA: - /* - * If checksumming is set on the meta-page, we must set - * it in the dbp. - */ - if (FLD_ISSET(((DBMETA *)pp)->metaflags, DBMETA_CHKSUM)) - F_SET(dbp, DB_AM_CHKSUM); - else - F_CLR(dbp, DB_AM_CHKSUM); - if (((DBMETA *)pp)->encrypt_alg != 0 || - F_ISSET(dbp, DB_AM_ENCRYPT)) - is_hmac = 1; - /* - * !!! - * For all meta pages it is required that the chksum - * be at the same location. Use BTMETA to get to it - * for any meta type. - */ - chksum = ((BTMETA *)pp)->chksum; - sum_len = DBMETASIZE; - break; - case P_INVALID: - /* - * We assume that we've read a file hole if we have - * a zero LSN, zero page number and P_INVALID. Otherwise - * we have an invalid page that might contain real data. - */ - if (IS_ZERO_LSN(LSN(pagep)) && pagep->pgno == PGNO_INVALID) { - sum_len = 0; - break; - } - /* FALLTHROUGH */ - default: - chksum = P_CHKSUM(dbp, pagep); - sum_len = pginfo->db_pagesize; - /* - * If we are reading in a non-meta page, then if we have - * a db_cipher then we are using hmac. - */ - is_hmac = CRYPTO_ON(env) ? 1 : 0; - break; - } - - /* - * We expect a checksum error if there was a configuration problem. - * If there is no configuration problem and we don't get a match, - * it's fatal: panic the system. - */ - if (F_ISSET(dbp, DB_AM_CHKSUM) && sum_len != 0) { - if (F_ISSET(dbp, DB_AM_SWAP) && is_hmac == 0) - P_32_SWAP(chksum); - switch (ret = __db_check_chksum( - env, NULL, db_cipher, chksum, pp, sum_len, is_hmac)) { - case 0: - break; - case -1: - if (DBENV_LOGGING(env)) - (void)__db_cksum_log( - env, NULL, ¬_used, DB_FLUSH); - __db_errx(env, - "checksum error: page %lu: catastrophic recovery required", - (u_long)pg); - return (__env_panic(env, DB_RUNRECOVERY)); - default: - return (ret); - } - } - if ((ret = __db_decrypt_pg(env, dbp, pagep)) != 0) - return (ret); - switch (pagep->type) { - case P_INVALID: - if (pginfo->type == DB_QUEUE) - return (__qam_pgin_out(env, pg, pp, cookie)); - else - return (__ham_pgin(dbp, pg, pp, cookie)); - case P_HASH_UNSORTED: - case P_HASH: - case P_HASHMETA: - return (__ham_pgin(dbp, pg, pp, cookie)); - case P_BTREEMETA: - case P_IBTREE: - case P_IRECNO: - case P_LBTREE: - case P_LDUP: - case P_LRECNO: - case P_OVERFLOW: - return (__bam_pgin(dbp, pg, pp, cookie)); - case P_QAMMETA: - case P_QAMDATA: - return (__qam_pgin_out(env, pg, pp, cookie)); - default: - break; - } - return (__db_pgfmt(env, pg)); -} - -/* - * __db_pgout -- - * Primary page-swap routine. - * - * PUBLIC: int __db_pgout __P((DB_ENV *, db_pgno_t, void *, DBT *)); - */ -int -__db_pgout(dbenv, pg, pp, cookie) - DB_ENV *dbenv; - db_pgno_t pg; - void *pp; - DBT *cookie; -{ - DB dummydb, *dbp; - DB_PGINFO *pginfo; - ENV *env; - PAGE *pagep; - int ret; - - pginfo = (DB_PGINFO *)cookie->data; - env = dbenv->env; - pagep = (PAGE *)pp; - - memset(&dummydb, 0, sizeof(DB)); - dbp = &dummydb; - dbp->dbenv = dbenv; - dbp->env = env; - dbp->flags = pginfo->flags; - dbp->pgsize = pginfo->db_pagesize; - ret = 0; - switch (pagep->type) { - case P_INVALID: - if (pginfo->type == DB_QUEUE) - ret = __qam_pgin_out(env, pg, pp, cookie); - else - ret = __ham_pgout(dbp, pg, pp, cookie); - break; - case P_HASH: - case P_HASH_UNSORTED: - /* - * Support pgout of unsorted hash pages - since online - * replication upgrade can cause pages of this type to be - * written out. - * - * FALLTHROUGH - */ - case P_HASHMETA: - ret = __ham_pgout(dbp, pg, pp, cookie); - break; - case P_BTREEMETA: - case P_IBTREE: - case P_IRECNO: - case P_LBTREE: - case P_LDUP: - case P_LRECNO: - case P_OVERFLOW: - ret = __bam_pgout(dbp, pg, pp, cookie); - break; - case P_QAMMETA: - case P_QAMDATA: - ret = __qam_pgin_out(env, pg, pp, cookie); - break; - default: - return (__db_pgfmt(env, pg)); - } - if (ret) - return (ret); - - return (__db_encrypt_and_checksum_pg(env, dbp, pagep)); -} - -/* - * __db_decrypt_pg -- - * Utility function to decrypt a db page. - * - * PUBLIC: int __db_decrypt_pg __P((ENV *, DB *, PAGE *)); - */ -int -__db_decrypt_pg (env, dbp, pagep) - ENV *env; - DB *dbp; - PAGE *pagep; -{ - DB_CIPHER *db_cipher; - size_t pg_len, pg_off; - u_int8_t *iv; - int ret; - - db_cipher = env->crypto_handle; - ret = 0; - iv = NULL; - if (F_ISSET(dbp, DB_AM_ENCRYPT)) { - DB_ASSERT(env, db_cipher != NULL); - DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM)); - - pg_off = P_OVERHEAD(dbp); - DB_ASSERT(env, db_cipher->adj_size(pg_off) == 0); - - switch (pagep->type) { - case P_HASHMETA: - case P_BTREEMETA: - case P_QAMMETA: - /* - * !!! - * For all meta pages it is required that the iv - * be at the same location. Use BTMETA to get to it - * for any meta type. - */ - iv = ((BTMETA *)pagep)->iv; - pg_len = DBMETASIZE; - break; - case P_INVALID: - if (IS_ZERO_LSN(LSN(pagep)) && - pagep->pgno == PGNO_INVALID) { - pg_len = 0; - break; - } - /* FALLTHROUGH */ - default: - iv = P_IV(dbp, pagep); - pg_len = dbp->pgsize; - break; - } - if (pg_len != 0) - ret = db_cipher->decrypt(env, db_cipher->data, - iv, ((u_int8_t *)pagep) + pg_off, - pg_len - pg_off); - } - return (ret); -} - -/* - * __db_encrypt_and_checksum_pg -- - * Utility function to encrypt and checksum a db page. - * - * PUBLIC: int __db_encrypt_and_checksum_pg - * PUBLIC: __P((ENV *, DB *, PAGE *)); - */ -int -__db_encrypt_and_checksum_pg (env, dbp, pagep) - ENV *env; - DB *dbp; - PAGE *pagep; -{ - DB_CIPHER *db_cipher; - int ret; - size_t pg_off, pg_len, sum_len; - u_int8_t *chksum, *iv, *key; - - chksum = iv = key = NULL; - db_cipher = env->crypto_handle; - - if (F_ISSET(dbp, DB_AM_ENCRYPT)) { - DB_ASSERT(env, db_cipher != NULL); - DB_ASSERT(env, F_ISSET(dbp, DB_AM_CHKSUM)); - - pg_off = P_OVERHEAD(dbp); - DB_ASSERT(env, db_cipher->adj_size(pg_off) == 0); - - key = db_cipher->mac_key; - - switch (pagep->type) { - case P_HASHMETA: - case P_BTREEMETA: - case P_QAMMETA: - /* - * !!! - * For all meta pages it is required that the iv - * be at the same location. Use BTMETA to get to it - * for any meta type. - */ - iv = ((BTMETA *)pagep)->iv; - pg_len = DBMETASIZE; - break; - default: - iv = P_IV(dbp, pagep); - pg_len = dbp->pgsize; - break; - } - if ((ret = db_cipher->encrypt(env, db_cipher->data, - iv, ((u_int8_t *)pagep) + pg_off, pg_len - pg_off)) != 0) - return (ret); - } - if (F_ISSET(dbp, DB_AM_CHKSUM)) { - switch (pagep->type) { - case P_HASHMETA: - case P_BTREEMETA: - case P_QAMMETA: - /* - * !!! - * For all meta pages it is required that the chksum - * be at the same location. Use BTMETA to get to it - * for any meta type. - */ - chksum = ((BTMETA *)pagep)->chksum; - sum_len = DBMETASIZE; - break; - default: - chksum = P_CHKSUM(dbp, pagep); - sum_len = dbp->pgsize; - break; - } - __db_chksum(NULL, (u_int8_t *)pagep, sum_len, key, chksum); - if (F_ISSET(dbp, DB_AM_SWAP) && !F_ISSET(dbp, DB_AM_ENCRYPT)) - P_32_SWAP(chksum); - } - return (0); -} - -/* - * __db_metaswap -- - * Byteswap the common part of the meta-data page. - * - * PUBLIC: void __db_metaswap __P((PAGE *)); - */ -void -__db_metaswap(pg) - PAGE *pg; -{ - u_int8_t *p; - - p = (u_int8_t *)pg; - - /* Swap the meta-data information. */ - SWAP32(p); /* lsn.file */ - SWAP32(p); /* lsn.offset */ - SWAP32(p); /* pgno */ - SWAP32(p); /* magic */ - SWAP32(p); /* version */ - SWAP32(p); /* pagesize */ - p += 4; /* unused, page type, unused, unused */ - SWAP32(p); /* free */ - SWAP32(p); /* alloc_lsn part 1 */ - SWAP32(p); /* alloc_lsn part 2 */ - SWAP32(p); /* cached key count */ - SWAP32(p); /* cached record count */ - SWAP32(p); /* flags */ -} - -/* - * __db_byteswap -- - * Byteswap an ordinary database page. - * - * PUBLIC: int __db_byteswap - * PUBLIC: __P((DB *, db_pgno_t, PAGE *, size_t, int)); - */ -int -__db_byteswap(dbp, pg, h, pagesize, pgin) - DB *dbp; - db_pgno_t pg; - PAGE *h; - size_t pagesize; - int pgin; -{ - ENV *env; - BINTERNAL *bi; - BKEYDATA *bk; - BOVERFLOW *bo; - RINTERNAL *ri; - db_indx_t i, *inp, len, tmp; - u_int8_t *end, *p, *pgend; - - if (pagesize == 0) - return (0); - - env = dbp->env; - - if (pgin) { - M_32_SWAP(h->lsn.file); - M_32_SWAP(h->lsn.offset); - M_32_SWAP(h->pgno); - M_32_SWAP(h->prev_pgno); - M_32_SWAP(h->next_pgno); - M_16_SWAP(h->entries); - M_16_SWAP(h->hf_offset); - } - - pgend = (u_int8_t *)h + pagesize; - - inp = P_INP(dbp, h); - if ((u_int8_t *)inp >= pgend) - goto out; - - switch (TYPE(h)) { - case P_HASH_UNSORTED: - case P_HASH: - for (i = 0; i < NUM_ENT(h); i++) { - if (pgin) - M_16_SWAP(inp[i]); - - if (P_ENTRY(dbp, h, i) >= pgend) - continue; - - switch (HPAGE_TYPE(dbp, h, i)) { - case H_KEYDATA: - break; - case H_DUPLICATE: - len = LEN_HKEYDATA(dbp, h, pagesize, i); - p = HKEYDATA_DATA(P_ENTRY(dbp, h, i)); - for (end = p + len; p < end;) { - if (pgin) { - P_16_SWAP(p); - memcpy(&tmp, - p, sizeof(db_indx_t)); - p += sizeof(db_indx_t); - } else { - memcpy(&tmp, - p, sizeof(db_indx_t)); - SWAP16(p); - } - p += tmp; - SWAP16(p); - } - break; - case H_OFFDUP: - p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i)); - SWAP32(p); /* pgno */ - break; - case H_OFFPAGE: - p = HOFFPAGE_PGNO(P_ENTRY(dbp, h, i)); - SWAP32(p); /* pgno */ - SWAP32(p); /* tlen */ - break; - default: - return (__db_pgfmt(env, pg)); - } - - } - - /* - * The offsets in the inp array are used to determine - * the size of entries on a page; therefore they - * cannot be converted until we've done all the - * entries. - */ - if (!pgin) - for (i = 0; i < NUM_ENT(h); i++) - M_16_SWAP(inp[i]); - break; - case P_LBTREE: - case P_LDUP: - case P_LRECNO: - for (i = 0; i < NUM_ENT(h); i++) { - if (pgin) - M_16_SWAP(inp[i]); - - /* - * In the case of on-page duplicates, key information - * should only be swapped once. - */ - if (h->type == P_LBTREE && i > 1) { - if (pgin) { - if (inp[i] == inp[i - 2]) - continue; - } else { - M_16_SWAP(inp[i]); - if (inp[i] == inp[i - 2]) - continue; - M_16_SWAP(inp[i]); - } - } - - bk = GET_BKEYDATA(dbp, h, i); - if ((u_int8_t *)bk >= pgend) - continue; - switch (B_TYPE(bk->type)) { - case B_KEYDATA: - M_16_SWAP(bk->len); - break; - case B_DUPLICATE: - case B_OVERFLOW: - bo = (BOVERFLOW *)bk; - M_32_SWAP(bo->pgno); - M_32_SWAP(bo->tlen); - break; - default: - return (__db_pgfmt(env, pg)); - } - - if (!pgin) - M_16_SWAP(inp[i]); - } - break; - case P_IBTREE: - for (i = 0; i < NUM_ENT(h); i++) { - if (pgin) - M_16_SWAP(inp[i]); - - bi = GET_BINTERNAL(dbp, h, i); - if ((u_int8_t *)bi >= pgend) - continue; - - M_16_SWAP(bi->len); - M_32_SWAP(bi->pgno); - M_32_SWAP(bi->nrecs); - - switch (B_TYPE(bi->type)) { - case B_KEYDATA: - break; - case B_DUPLICATE: - case B_OVERFLOW: - bo = (BOVERFLOW *)bi->data; - M_32_SWAP(bo->pgno); - M_32_SWAP(bo->tlen); - break; - default: - return (__db_pgfmt(env, pg)); - } - - if (!pgin) - M_16_SWAP(inp[i]); - } - break; - case P_IRECNO: - for (i = 0; i < NUM_ENT(h); i++) { - if (pgin) - M_16_SWAP(inp[i]); - - ri = GET_RINTERNAL(dbp, h, i); - if ((u_int8_t *)ri >= pgend) - continue; - - M_32_SWAP(ri->pgno); - M_32_SWAP(ri->nrecs); - - if (!pgin) - M_16_SWAP(inp[i]); - } - break; - case P_INVALID: - case P_OVERFLOW: - case P_QAMDATA: - /* Nothing to do. */ - break; - default: - return (__db_pgfmt(env, pg)); - } - -out: if (!pgin) { - /* Swap the header information. */ - M_32_SWAP(h->lsn.file); - M_32_SWAP(h->lsn.offset); - M_32_SWAP(h->pgno); - M_32_SWAP(h->prev_pgno); - M_32_SWAP(h->next_pgno); - M_16_SWAP(h->entries); - M_16_SWAP(h->hf_offset); - } - return (0); -} - -/* - * __db_pageswap -- - * Byteswap any database page. Normally, the page to be swapped will be - * referenced by the "pp" argument and the pdata argument will be NULL. - * This function is also called by automatically generated log functions, - * where the page may be split into separate header and data parts. In - * that case, pdata is not NULL we reconsitute - * - * PUBLIC: int __db_pageswap - * PUBLIC: __P((DB *, void *, size_t, DBT *, int)); - */ -int -__db_pageswap(dbp, pp, len, pdata, pgin) - DB *dbp; - void *pp; - size_t len; - DBT *pdata; - int pgin; -{ - ENV *env; - db_pgno_t pg; - size_t pgsize; - void *pgcopy; - int ret; - u_int16_t hoffset; - - env = dbp->env; - - switch (TYPE(pp)) { - case P_BTREEMETA: - return (__bam_mswap(env, pp)); - - case P_HASHMETA: - return (__ham_mswap(env, pp)); - - case P_QAMMETA: - return (__qam_mswap(env, pp)); - - case P_INVALID: - case P_OVERFLOW: - case P_QAMDATA: - /* - * We may have been passed an invalid page, or a queue data - * page, or an overflow page where fields like hoffset have a - * special meaning. In that case, no swapping of the page data - * is required, just the fields in the page header. - */ - pdata = NULL; - break; - - default: - break; - } - - if (pgin) { - P_32_COPYSWAP(&PGNO(pp), &pg); - P_16_COPYSWAP(&HOFFSET(pp), &hoffset); - } else { - pg = PGNO(pp); - hoffset = HOFFSET(pp); - } - - if (pdata == NULL) - ret = __db_byteswap(dbp, pg, (PAGE *)pp, len, pgin); - else { - pgsize = hoffset + pdata->size; - if ((ret = __os_malloc(env, pgsize, &pgcopy)) != 0) - return (ret); - memset(pgcopy, 0, pgsize); - memcpy(pgcopy, pp, len); - memcpy((u_int8_t *)pgcopy + hoffset, pdata->data, pdata->size); - - ret = __db_byteswap(dbp, pg, (PAGE *)pgcopy, pgsize, pgin); - memcpy(pp, pgcopy, len); - - /* - * If we are swapping data to be written to the log, we can't - * overwrite the buffer that was passed in: it may be a pointer - * into a page in cache. We set DB_DBT_APPMALLOC here so that - * the calling code can free the memory we allocate here. - */ - if (!pgin) { - if ((ret = - __os_malloc(env, pdata->size, &pdata->data)) != 0) { - __os_free(env, pgcopy); - return (ret); - } - F_SET(pdata, DB_DBT_APPMALLOC); - } - memcpy(pdata->data, (u_int8_t *)pgcopy + hoffset, pdata->size); - __os_free(env, pgcopy); - } - - return (ret); -} diff --git a/db/db_dispatch.c b/db/db_dispatch.c deleted file mode 100644 index 65dc260..0000000 --- a/db/db_dispatch.c +++ /dev/null @@ -1,953 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1995, 1996 - * The President and Fellows of Harvard University. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Margo Seltzer. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/hash.h" -#include "dbinc/fop.h" -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/txn.h" - -static int __db_txnlist_find_internal __P((ENV *, DB_TXNHEAD *, - db_txnlist_type, u_int32_t, DB_TXNLIST **, - int, u_int32_t *)); - -/* - * __db_dispatch -- - * - * This is the transaction dispatch function used by the db access methods. - * It is designed to handle the record format used by all the access - * methods (the one automatically generated by the db_{h,log,read}.sh - * scripts in the tools directory). An application using a different - * recovery paradigm will supply a different dispatch function to txn_open. - * - * PUBLIC: int __db_dispatch __P((ENV *, - * PUBLIC: DB_DISTAB *, DBT *, DB_LSN *, db_recops, DB_TXNHEAD *)); - */ -int -__db_dispatch(env, dtab, db, lsnp, redo, info) - ENV *env; /* The environment. */ - DB_DISTAB *dtab; - DBT *db; /* The log record upon which to dispatch. */ - DB_LSN *lsnp; /* The lsn of the record being dispatched. */ - db_recops redo; /* Redo this op (or undo it). */ - DB_TXNHEAD *info; /* Transaction list. */ -{ - DB_ENV *dbenv; - DB_LSN prev_lsn; - u_int32_t rectype, status, txnid, urectype; - int make_call, ret; - - dbenv = env->dbenv; - LOGCOPY_32(env, &rectype, db->data); - LOGCOPY_32(env, &txnid, (u_int8_t *)db->data + sizeof(rectype)); - - make_call = ret = 0; - - /* If we don't have a dispatch table, it's hard to dispatch. */ - DB_ASSERT(env, dtab != NULL); - - /* - * If we find a record that is in the user's number space and they - * have specified a recovery routine, let them handle it. If they - * didn't specify a recovery routine, then we expect that they've - * followed all our rules and registered new recovery functions. - */ - switch (redo) { - case DB_TXN_ABORT: - case DB_TXN_APPLY: - case DB_TXN_PRINT: - make_call = 1; - break; - case DB_TXN_OPENFILES: - /* - * We collect all the transactions that have - * "begin" records, those with no previous LSN, - * so that we do not abort partial transactions. - * These are known to be undone, otherwise the - * log would not have been freeable. - */ - LOGCOPY_TOLSN(env, &prev_lsn, (u_int8_t *)db->data + - sizeof(rectype) + sizeof(txnid)); - if (txnid != 0 && prev_lsn.file == 0 && (ret = - __db_txnlist_add(env, info, txnid, TXN_OK, NULL)) != 0) - return (ret); - - /* FALLTHROUGH */ - case DB_TXN_POPENFILES: - if (rectype == DB___dbreg_register || - rectype == DB___txn_child || - rectype == DB___txn_ckp || rectype == DB___txn_recycle) - return ((dtab->int_dispatch[rectype])(env, - db, lsnp, redo, info)); - break; - case DB_TXN_BACKWARD_ROLL: - /* - * Running full recovery in the backward pass. In general, - * we only process records during this pass that belong - * to aborted transactions. Unfortunately, there are several - * exceptions: - * 1. If this is a meta-record, one not associated with - * a transaction, then we must always process it. - * 2. If this is a transaction commit/abort, we must - * always process it, so that we know the status of - * every transaction. - * 3. If this is a child commit, we need to process it - * because the outcome of the child transaction depends - * on the outcome of the parent. - * 4. If this is a dbreg_register record, we must always - * process is because they contain non-transactional - * closes that must be properly handled. - * 5. If this is a noop, we must always undo it so that we - * properly handle any aborts before a file was closed. - * 6. If this a file remove, we need to process it to - * determine if the on-disk file is the same as the - * one being described. - */ - switch (rectype) { - /* - * These either do not belong to a transaction or (regop) - * must be processed regardless of the status of the - * transaction. - */ - case DB___txn_regop: - case DB___txn_recycle: - case DB___txn_ckp: - make_call = 1; - break; - /* - * These belong to a transaction whose status must be - * checked. - */ - case DB___txn_child: - case DB___db_noop: - case DB___fop_file_remove: - case DB___dbreg_register: - make_call = 1; - - /* FALLTHROUGH */ - default: - if (txnid == 0) - break; - - ret = __db_txnlist_find(env, info, txnid, &status); - - /* If not found, this is an incomplete abort. */ - if (ret == DB_NOTFOUND) - return (__db_txnlist_add(env, - info, txnid, TXN_IGNORE, lsnp)); - if (ret != 0) - return (ret); - - /* - * If we ignore the transaction, ignore the operation - * UNLESS this is a child commit in which case we need - * to make sure that the child also gets marked as - * ignore. - */ - if (status == TXN_IGNORE && rectype != DB___txn_child) { - make_call = 0; - break; - } - if (status == TXN_COMMIT) - break; - - /* Set make_call in case we came through default */ - make_call = 1; - if (status == TXN_OK && - (ret = __db_txnlist_update(env, - info, txnid, rectype == DB___txn_prepare ? - TXN_PREPARE : TXN_ABORT, NULL, &status, 0)) != 0) - return (ret); - } - break; - case DB_TXN_FORWARD_ROLL: - /* - * In the forward pass, if we haven't seen the transaction, - * do nothing, else recover it. - * - * We need to always redo DB___db_noop records, so that we - * properly handle any commits after the file was closed. - */ - switch (rectype) { - case DB___txn_recycle: - case DB___txn_ckp: - case DB___db_noop: - case DB___dbreg_register: - make_call = 1; - break; - - default: - if (txnid == 0) - status = 0; - else { - ret = __db_txnlist_find(env, - info, txnid, &status); - - if (ret == DB_NOTFOUND) - /* Break out out of if clause. */ - ; - else if (ret != 0) - return (ret); - else if (status == TXN_COMMIT) { - make_call = 1; - break; - } - } - - } - break; - default: - return (__db_unknown_flag( - env, "__db_dispatch", (u_int32_t)redo)); - } - - if (make_call) { - /* - * If the debug flag is set then we are logging - * records for a non-durable update so that they - * may be examined for diagnostic purposes. - * So only make the call if we are printing, - * otherwise we need to extract the previous - * lsn so undo will work properly. - */ - if (rectype & DB_debug_FLAG) { - if (redo == DB_TXN_PRINT) - rectype &= ~DB_debug_FLAG; - else { - LOGCOPY_TOLSN(env, lsnp, - (u_int8_t *)db->data + - sizeof(rectype) + - sizeof(txnid)); - return (0); - } - } - if (rectype >= DB_user_BEGIN) { - if (dbenv->app_dispatch != NULL) - return (dbenv->app_dispatch(dbenv, - db, lsnp, redo)); - - /* No application-specific dispatch */ - urectype = rectype - DB_user_BEGIN; - if (urectype > dtab->ext_size || - dtab->ext_dispatch[urectype] == NULL) { - __db_errx(env, - "Illegal application-specific record type %lu in log", - (u_long)rectype); - return (EINVAL); - } - return ((dtab->ext_dispatch[urectype])(dbenv, - db, lsnp, redo)); - } else { - if (rectype > dtab->int_size || - dtab->int_dispatch[rectype] == NULL) { - __db_errx(env, - "Illegal record type %lu in log", - (u_long)rectype); - return (EINVAL); - } - return ((dtab->int_dispatch[rectype])(env, - db, lsnp, redo, info)); - } - } - - return (0); -} - -/* - * __db_add_recovery -- Add recovery functions to the dispatch table. - * - * We have two versions of this, an external one and an internal one, - * because application-specific functions take different arguments - * for dispatch (ENV versus DB_ENV). - * - * This is the external version. - * - * PUBLIC: int __db_add_recovery __P((DB_ENV *, DB_DISTAB *, - * PUBLIC: int (*)(DB_ENV *, DBT *, DB_LSN *, db_recops), u_int32_t)); - */ -int -__db_add_recovery(dbenv, dtab, func, ndx) - DB_ENV *dbenv; - DB_DISTAB *dtab; - int (*func) __P((DB_ENV *, DBT *, DB_LSN *, db_recops)); - u_int32_t ndx; -{ - size_t i, nsize; - int ret; - - /* Make sure this is an application-specific record. */ - if (ndx < DB_user_BEGIN) { - __db_errx(dbenv->env, - "Attempting to add application-specific record with invalid type %lu", - (u_long)ndx); - return (EINVAL); - } - ndx -= DB_user_BEGIN; - - /* Check if we have to grow the table. */ - if (ndx >= dtab->ext_size) { - nsize = ndx + 40; - if ((ret = - __os_realloc(dbenv->env, nsize * - sizeof((dtab->ext_dispatch)[0]), &dtab->ext_dispatch)) - != 0) - return (ret); - for (i = dtab->ext_size; i < nsize; ++i) - (dtab->ext_dispatch)[i] = NULL; - dtab->ext_size = nsize; - } - - (dtab->ext_dispatch)[ndx] = func; - return (0); -} - -/* - * __db_add_recovery_int -- - * - * Internal version of dispatch addition function. - * - * - * PUBLIC: int __db_add_recovery_int __P((ENV *, DB_DISTAB *, - * PUBLIC: int (*)(ENV *, DBT *, DB_LSN *, db_recops, void *), u_int32_t)); - */ -int -__db_add_recovery_int(env, dtab, func, ndx) - ENV *env; - DB_DISTAB *dtab; - int (*func) __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - u_int32_t ndx; -{ - size_t i, nsize; - int ret; - - if (ndx >= DB_user_BEGIN) { - __db_errx(env, - "Attempting to add internal record with invalid type %lu", - (u_long)ndx); - return (EINVAL); - } - - /* Check if we have to grow the table. */ - if (ndx >= dtab->int_size) { - nsize = ndx + 40; - if ((ret = - __os_realloc(env, nsize * sizeof((dtab->int_dispatch)[0]), - &dtab->int_dispatch)) != 0) - return (ret); - for (i = dtab->int_size; i < nsize; ++i) - (dtab->int_dispatch)[i] = NULL; - dtab->int_size = nsize; - } - - (dtab->int_dispatch)[ndx] = func; - return (0); -} - -/* - * __db_txnlist_init -- - * Initialize transaction linked list. - * - * PUBLIC: int __db_txnlist_init __P((ENV *, DB_THREAD_INFO *, - * PUBLIC: u_int32_t, u_int32_t, DB_LSN *, DB_TXNHEAD **)); - */ -int -__db_txnlist_init(env, ip, low_txn, hi_txn, trunc_lsn, retp) - ENV *env; - DB_THREAD_INFO *ip; - u_int32_t low_txn, hi_txn; - DB_LSN *trunc_lsn; - DB_TXNHEAD **retp; -{ - DB_TXNHEAD *headp; - u_int32_t size, tmp; - int ret; - - /* - * Size a hash table. - * If low is zero then we are being called during rollback - * and we need only one slot. - * Hi maybe lower than low if we have recycled txnid's. - * The numbers here are guesses about txn density, we can afford - * to look at a few entries in each slot. - */ - if (low_txn == 0) - size = 1; - else { - if (hi_txn < low_txn) { - tmp = hi_txn; - hi_txn = low_txn; - low_txn = tmp; - } - tmp = hi_txn - low_txn; - /* See if we wrapped around. */ - if (tmp > (TXN_MAXIMUM - TXN_MINIMUM) / 2) - tmp = (low_txn - TXN_MINIMUM) + (TXN_MAXIMUM - hi_txn); - size = tmp / 5; - if (size < 100) - size = 100; - } - if ((ret = __os_malloc(env, - sizeof(DB_TXNHEAD) + size * sizeof(headp->head), &headp)) != 0) - return (ret); - - memset(headp, 0, sizeof(DB_TXNHEAD) + size * sizeof(headp->head)); - headp->maxid = hi_txn; - headp->generation = 0; - headp->nslots = size; - headp->gen_alloc = 8; - headp->thread_info = ip; - if ((ret = __os_malloc(env, headp->gen_alloc * - sizeof(headp->gen_array[0]), &headp->gen_array)) != 0) { - __os_free(env, headp); - return (ret); - } - headp->gen_array[0].generation = 0; - headp->gen_array[0].txn_min = TXN_MINIMUM; - headp->gen_array[0].txn_max = TXN_MAXIMUM; - if (trunc_lsn != NULL) { - headp->trunc_lsn = *trunc_lsn; - headp->maxlsn = *trunc_lsn; - } else { - ZERO_LSN(headp->trunc_lsn); - ZERO_LSN(headp->maxlsn); - } - ZERO_LSN(headp->ckplsn); - - *retp = headp; - return (0); -} - -#define FIND_GENERATION(hp, txnid, gen) do { \ - u_int32_t __i; \ - for (__i = 0; __i <= (hp)->generation; __i++) \ - /* The range may wrap around the end. */ \ - if ((hp)->gen_array[__i].txn_min < \ - (hp)->gen_array[__i].txn_max ? \ - ((txnid) >= (hp)->gen_array[__i].txn_min && \ - (txnid) <= (hp)->gen_array[__i].txn_max) : \ - ((txnid) >= (hp)->gen_array[__i].txn_min || \ - (txnid) <= (hp)->gen_array[__i].txn_max)) \ - break; \ - DB_ASSERT(env, __i <= (hp)->generation); \ - gen = (hp)->gen_array[__i].generation; \ -} while (0) - -/* - * __db_txnlist_add -- - * Add an element to our transaction linked list. - * - * PUBLIC: int __db_txnlist_add __P((ENV *, - * PUBLIC: DB_TXNHEAD *, u_int32_t, u_int32_t, DB_LSN *)); - */ -int -__db_txnlist_add(env, hp, txnid, status, lsn) - ENV *env; - DB_TXNHEAD *hp; - u_int32_t txnid, status; - DB_LSN *lsn; -{ - DB_TXNLIST *elp; - int ret; - - if ((ret = __os_malloc(env, sizeof(DB_TXNLIST), &elp)) != 0) - return (ret); - - LIST_INSERT_HEAD(&hp->head[DB_TXNLIST_MASK(hp, txnid)], elp, links); - - /* Find the most recent generation containing this ID */ - FIND_GENERATION(hp, txnid, elp->u.t.generation); - elp->type = TXNLIST_TXNID; - elp->u.t.txnid = txnid; - elp->u.t.status = status; - if (txnid > hp->maxid) - hp->maxid = txnid; - if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT) - hp->maxlsn = *lsn; - - DB_ASSERT(env, lsn == NULL || - status != TXN_COMMIT || LOG_COMPARE(&hp->maxlsn, lsn) >= 0); - - return (0); -} - -/* - * __db_txnlist_remove -- - * Remove an element from our transaction linked list. - * - * PUBLIC: int __db_txnlist_remove __P((ENV *, DB_TXNHEAD *, u_int32_t)); - */ -int -__db_txnlist_remove(env, hp, txnid) - ENV *env; - DB_TXNHEAD *hp; - u_int32_t txnid; -{ - DB_TXNLIST *entry; - u_int32_t status; - - return (__db_txnlist_find_internal(env, - hp, TXNLIST_TXNID, txnid, &entry, 1, &status)); -} - -/* - * __db_txnlist_ckp -- - * Used to record the maximum checkpoint that will be retained - * after recovery. Typically this is simply the max checkpoint, but - * if we are doing client replication recovery or timestamp-based - * recovery, we are going to virtually truncate the log and we need - * to retain the last checkpoint before the truncation point. - * - * PUBLIC: void __db_txnlist_ckp __P((ENV *, DB_TXNHEAD *, DB_LSN *)); - */ -void -__db_txnlist_ckp(env, hp, ckp_lsn) - ENV *env; - DB_TXNHEAD *hp; - DB_LSN *ckp_lsn; -{ - - COMPQUIET(env, NULL); - - if (IS_ZERO_LSN(hp->ckplsn) && !IS_ZERO_LSN(hp->maxlsn) && - LOG_COMPARE(&hp->maxlsn, ckp_lsn) >= 0) - hp->ckplsn = *ckp_lsn; -} - -/* - * __db_txnlist_end -- - * Discard transaction linked list. - * - * PUBLIC: void __db_txnlist_end __P((ENV *, DB_TXNHEAD *)); - */ -void -__db_txnlist_end(env, hp) - ENV *env; - DB_TXNHEAD *hp; -{ - u_int32_t i; - DB_TXNLIST *p; - - if (hp == NULL) - return; - - for (i = 0; i < hp->nslots; i++) - while (hp != NULL && (p = LIST_FIRST(&hp->head[i])) != NULL) { - switch (p->type) { - case TXNLIST_LSN: - __os_free(env, p->u.l.lsn_stack); - break; - case TXNLIST_DELETE: - case TXNLIST_TXNID: - default: - /* - * Possibly an incomplete DB_TXNLIST; just - * free it. - */ - break; - } - LIST_REMOVE(p, links); - __os_free(env, p); - } - - if (hp->gen_array != NULL) - __os_free(env, hp->gen_array); - __os_free(env, hp); -} - -/* - * __db_txnlist_find -- - * Checks to see if a txnid with the current generation is in the - * txnid list. This returns DB_NOTFOUND if the item isn't in the - * list otherwise it returns (like __db_txnlist_find_internal) - * the status of the transaction. A txnid of 0 means the record - * was generated while not in a transaction. - * - * PUBLIC: int __db_txnlist_find __P((ENV *, - * PUBLIC: DB_TXNHEAD *, u_int32_t, u_int32_t *)); - */ -int -__db_txnlist_find(env, hp, txnid, statusp) - ENV *env; - DB_TXNHEAD *hp; - u_int32_t txnid, *statusp; -{ - DB_TXNLIST *entry; - - if (txnid == 0) - return (DB_NOTFOUND); - - return (__db_txnlist_find_internal(env, hp, - TXNLIST_TXNID, txnid, &entry, 0, statusp)); -} - -/* - * __db_txnlist_update -- - * Change the status of an existing transaction entry. - * Returns DB_NOTFOUND if no such entry exists. - * - * PUBLIC: int __db_txnlist_update __P((ENV *, DB_TXNHEAD *, - * PUBLIC: u_int32_t, u_int32_t, DB_LSN *, u_int32_t *, int)); - */ -int -__db_txnlist_update(env, hp, txnid, status, lsn, ret_status, add_ok) - ENV *env; - DB_TXNHEAD *hp; - u_int32_t txnid, status; - DB_LSN *lsn; - u_int32_t *ret_status; - int add_ok; -{ - DB_TXNLIST *elp; - int ret; - - if (txnid == 0) - return (DB_NOTFOUND); - - ret = __db_txnlist_find_internal(env, - hp, TXNLIST_TXNID, txnid, &elp, 0, ret_status); - - if (ret == DB_NOTFOUND && add_ok) { - *ret_status = status; - return (__db_txnlist_add(env, hp, txnid, status, lsn)); - } - if (ret != 0) - return (ret); - - if (*ret_status == TXN_IGNORE) - return (0); - - elp->u.t.status = status; - - if (lsn != NULL && IS_ZERO_LSN(hp->maxlsn) && status == TXN_COMMIT) - hp->maxlsn = *lsn; - - return (ret); -} - -/* - * __db_txnlist_find_internal -- - * Find an entry on the transaction list. If the entry is not there or - * the list pointer is not initialized we return DB_NOTFOUND. If the - * item is found, we return the status. Currently we always call this - * with an initialized list pointer but checking for NULL keeps it general. - */ -static int -__db_txnlist_find_internal(env, - hp, type, txnid, txnlistp, delete, statusp) - ENV *env; - DB_TXNHEAD *hp; - db_txnlist_type type; - u_int32_t txnid; - DB_TXNLIST **txnlistp; - int delete; - u_int32_t *statusp; -{ - struct __db_headlink *head; - DB_TXNLIST *p; - u_int32_t generation, hash; - int ret; - - ret = 0; - - if (hp == NULL) - return (DB_NOTFOUND); - - switch (type) { - case TXNLIST_TXNID: - hash = txnid; - FIND_GENERATION(hp, txnid, generation); - break; - case TXNLIST_DELETE: - case TXNLIST_LSN: - default: - return (__env_panic(env, EINVAL)); - } - - head = &hp->head[DB_TXNLIST_MASK(hp, hash)]; - LIST_FOREACH(p, head, links) { - if (p->type != type) - continue; - switch (type) { - case TXNLIST_TXNID: - if (p->u.t.txnid != txnid || - generation != p->u.t.generation) - continue; - *statusp = p->u.t.status; - break; - - case TXNLIST_DELETE: - case TXNLIST_LSN: - default: - return (__env_panic(env, EINVAL)); - } - if (delete == 1) { - LIST_REMOVE(p, links); - __os_free(env, p); - *txnlistp = NULL; - } else if (p != LIST_FIRST(head)) { - /* Move it to head of list. */ - LIST_REMOVE(p, links); - LIST_INSERT_HEAD(head, p, links); - *txnlistp = p; - } else - *txnlistp = p; - return (ret); - } - - return (DB_NOTFOUND); -} - -/* - * __db_txnlist_gen -- - * Change the current generation number. - * - * PUBLIC: int __db_txnlist_gen __P((ENV *, - * PUBLIC: DB_TXNHEAD *, int, u_int32_t, u_int32_t)); - */ -int -__db_txnlist_gen(env, hp, incr, min, max) - ENV *env; - DB_TXNHEAD *hp; - int incr; - u_int32_t min, max; -{ - int ret; - - /* - * During recovery generation numbers keep track of "restart" - * checkpoints and recycle records. Restart checkpoints occur - * whenever we take a checkpoint and there are no outstanding - * transactions. When that happens, we can reset transaction IDs - * back to TXNID_MINIMUM. Currently we only do the reset - * at then end of recovery. Recycle records occur when txnids - * are exhausted during runtime. A free range of ids is identified - * and logged. This code maintains a stack of ranges. A txnid - * is given the generation number of the first range it falls into - * in the stack. - */ - if (incr < 0) { - --hp->generation; - memmove(hp->gen_array, &hp->gen_array[1], - (hp->generation + 1) * sizeof(hp->gen_array[0])); - } else { - ++hp->generation; - if (hp->generation >= hp->gen_alloc) { - hp->gen_alloc *= 2; - if ((ret = __os_realloc(env, hp->gen_alloc * - sizeof(hp->gen_array[0]), &hp->gen_array)) != 0) - return (ret); - } - memmove(&hp->gen_array[1], &hp->gen_array[0], - hp->generation * sizeof(hp->gen_array[0])); - hp->gen_array[0].generation = hp->generation; - hp->gen_array[0].txn_min = min; - hp->gen_array[0].txn_max = max; - } - return (0); -} - -/* - * __db_txnlist_lsnadd -- - * Save the prev_lsn from a txn_child record. - * - * PUBLIC: int __db_txnlist_lsnadd __P((ENV *, DB_TXNHEAD *, DB_LSN *)); - */ -int -__db_txnlist_lsnadd(env, hp, lsnp) - ENV *env; - DB_TXNHEAD *hp; - DB_LSN *lsnp; -{ - DB_TXNLIST *elp; - int ret; - - if (IS_ZERO_LSN(*lsnp)) - return (0); - - LIST_FOREACH(elp, &hp->head[0], links) - if (elp->type == TXNLIST_LSN) - break; - - if (elp == NULL) { - if ((ret = __db_txnlist_lsninit(env, hp, lsnp)) != 0) - return (ret); - return (DB_SURPRISE_KID); - } - - if (elp->u.l.stack_indx == elp->u.l.stack_size) { - elp->u.l.stack_size <<= 1; - if ((ret = __os_realloc(env, sizeof(DB_LSN) * - elp->u.l.stack_size, &elp->u.l.lsn_stack)) != 0) { - __db_txnlist_end(env, hp); - return (ret); - } - } - elp->u.l.lsn_stack[elp->u.l.stack_indx++] = *lsnp; - - return (0); -} - -/* - * __db_txnlist_lsnget -- - * - * PUBLIC: int __db_txnlist_lsnget __P((ENV *, - * PUBLIC: DB_TXNHEAD *, DB_LSN *, u_int32_t)); - * Get the lsn saved from a txn_child record. - */ -int -__db_txnlist_lsnget(env, hp, lsnp, flags) - ENV *env; - DB_TXNHEAD *hp; - DB_LSN *lsnp; - u_int32_t flags; -{ - DB_TXNLIST *elp; - - COMPQUIET(env, NULL); - COMPQUIET(flags, 0); - - LIST_FOREACH(elp, &hp->head[0], links) - if (elp->type == TXNLIST_LSN) - break; - - if (elp == NULL || elp->u.l.stack_indx == 0) { - ZERO_LSN(*lsnp); - return (0); - } - - *lsnp = elp->u.l.lsn_stack[--elp->u.l.stack_indx]; - - return (0); -} - -/* - * __db_txnlist_lsninit -- - * Initialize a transaction list with an lsn array entry. - * - * PUBLIC: int __db_txnlist_lsninit __P((ENV *, DB_TXNHEAD *, DB_LSN *)); - */ -int -__db_txnlist_lsninit(env, hp, lsnp) - ENV *env; - DB_TXNHEAD *hp; - DB_LSN *lsnp; -{ - DB_TXNLIST *elp; - int ret; - - elp = NULL; - - if ((ret = __os_malloc(env, sizeof(DB_TXNLIST), &elp)) != 0) - goto err; - LIST_INSERT_HEAD(&hp->head[0], elp, links); - - elp->type = TXNLIST_LSN; - if ((ret = __os_malloc(env, - sizeof(DB_LSN) * DB_LSN_STACK_SIZE, &elp->u.l.lsn_stack)) != 0) - goto err; - elp->u.l.stack_indx = 1; - elp->u.l.stack_size = DB_LSN_STACK_SIZE; - elp->u.l.lsn_stack[0] = *lsnp; - - return (0); - -err: __db_txnlist_end(env, hp); - return (ret); -} - -#ifdef DEBUG -/* - * __db_txnlist_print -- - * Print out the transaction list. - * - * PUBLIC: void __db_txnlist_print __P((DB_TXNHEAD *)); - */ -void -__db_txnlist_print(hp) - DB_TXNHEAD *hp; -{ - DB_TXNLIST *p; - u_int32_t i; - char *txntype; - - printf("Maxid: %lu Generation: %lu\n", - (u_long)hp->maxid, (u_long)hp->generation); - for (i = 0; i < hp->nslots; i++) - LIST_FOREACH(p, &hp->head[i], links) { - if (p->type != TXNLIST_TXNID) { - printf("Unrecognized type: %d\n", p->type); - continue; - } - switch (p->u.t.status) { - case TXN_OK: - txntype = "OK"; - break; - case TXN_COMMIT: - txntype = "commit"; - break; - case TXN_PREPARE: - txntype = "prepare"; - break; - case TXN_ABORT: - txntype = "abort"; - break; - case TXN_IGNORE: - txntype = "ignore"; - break; - case TXN_EXPECTED: - txntype = "expected"; - break; - case TXN_UNEXPECTED: - txntype = "unexpected"; - break; - default: - txntype = "UNKNOWN"; - break; - } - printf("TXNID: %lx(%lu): %s\n", - (u_long)p->u.t.txnid, - (u_long)p->u.t.generation, txntype); - } -} -#endif diff --git a/db/db_dup.c b/db/db_dup.c deleted file mode 100644 index b789e03..0000000 --- a/db/db_dup.c +++ /dev/null @@ -1,203 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/mp.h" -#include "dbinc/db_am.h" - -/* - * __db_ditem_nolog -- - * Remove an item from a page without affecting its recoverability. - * - * PUBLIC: int __db_ditem_nolog __P((DBC *, PAGE *, u_int32_t, u_int32_t)); - */ -int -__db_ditem_nolog(dbc, pagep, indx, nbytes) - DBC *dbc; - PAGE *pagep; - u_int32_t indx, nbytes; -{ - DB *dbp; - db_indx_t cnt, *inp, offset; - u_int8_t *from; - - dbp = dbc->dbp; - DB_ASSERT(dbp->env, IS_DIRTY(pagep)); - DB_ASSERT(dbp->env, indx < NUM_ENT(pagep)); - - /* - * If there's only a single item on the page, we don't have to - * work hard. - */ - if (NUM_ENT(pagep) == 1) { - NUM_ENT(pagep) = 0; - HOFFSET(pagep) = dbp->pgsize; - return (0); - } - - inp = P_INP(dbp, pagep); - /* - * Pack the remaining key/data items at the end of the page. Use - * memmove(3), the regions may overlap. - */ - from = (u_int8_t *)pagep + HOFFSET(pagep); - DB_ASSERT(dbp->env, inp[indx] >= HOFFSET(pagep)); - memmove(from + nbytes, from, inp[indx] - HOFFSET(pagep)); - HOFFSET(pagep) += nbytes; - - /* Adjust the indices' offsets. */ - offset = inp[indx]; - for (cnt = 0; cnt < NUM_ENT(pagep); ++cnt) - if (inp[cnt] < offset) - inp[cnt] += nbytes; - - /* Shift the indices down. */ - --NUM_ENT(pagep); - if (indx != NUM_ENT(pagep)) - memmove(&inp[indx], &inp[indx + 1], - sizeof(db_indx_t) * (NUM_ENT(pagep) - indx)); - - return (0); -} - -/* - * __db_ditem -- - * Remove an item from a page, logging it if enabled. - * - * PUBLIC: int __db_ditem __P((DBC *, PAGE *, u_int32_t, u_int32_t)); - */ -int -__db_ditem(dbc, pagep, indx, nbytes) - DBC *dbc; - PAGE *pagep; - u_int32_t indx, nbytes; -{ - DB *dbp; - DBT ldbt; - int ret; - - dbp = dbc->dbp; - - if (DBC_LOGGING(dbc)) { - ldbt.data = P_ENTRY(dbp, pagep, indx); - ldbt.size = nbytes; - if ((ret = __db_addrem_log(dbp, dbc->txn, - &LSN(pagep), 0, DB_REM_DUP, PGNO(pagep), - (u_int32_t)indx, nbytes, &ldbt, NULL, &LSN(pagep))) != 0) - return (ret); - } else - LSN_NOT_LOGGED(LSN(pagep)); - - return (__db_ditem_nolog(dbc, pagep, indx, nbytes)); -} - -/* - * __db_pitem_nolog -- - * Put an item on a page without logging. - * - * PUBLIC: int __db_pitem_nolog - * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); - */ -int -__db_pitem_nolog(dbc, pagep, indx, nbytes, hdr, data) - DBC *dbc; - PAGE *pagep; - u_int32_t indx; - u_int32_t nbytes; - DBT *hdr, *data; -{ - BKEYDATA bk; - DB *dbp; - DBT thdr; - db_indx_t *inp; - u_int8_t *p; - - dbp = dbc->dbp; - - DB_ASSERT(dbp->env, IS_DIRTY(pagep)); - - if (nbytes > P_FREESPACE(dbp, pagep)) { - DB_ASSERT(dbp->env, nbytes <= P_FREESPACE(dbp, pagep)); - return (EINVAL); - } - - if (hdr == NULL) { - B_TSET(bk.type, B_KEYDATA); - bk.len = data == NULL ? 0 : data->size; - - thdr.data = &bk; - thdr.size = SSZA(BKEYDATA, data); - hdr = &thdr; - } - inp = P_INP(dbp, pagep); - - /* Adjust the index table, then put the item on the page. */ - if (indx != NUM_ENT(pagep)) - memmove(&inp[indx + 1], &inp[indx], - sizeof(db_indx_t) * (NUM_ENT(pagep) - indx)); - HOFFSET(pagep) -= nbytes; - inp[indx] = HOFFSET(pagep); - ++NUM_ENT(pagep); - - p = P_ENTRY(dbp, pagep, indx); - memcpy(p, hdr->data, hdr->size); - if (data != NULL) - memcpy(p + hdr->size, data->data, data->size); - - return (0); -} - -/* - * __db_pitem -- - * Put an item on a page. - * - * PUBLIC: int __db_pitem - * PUBLIC: __P((DBC *, PAGE *, u_int32_t, u_int32_t, DBT *, DBT *)); - */ -int -__db_pitem(dbc, pagep, indx, nbytes, hdr, data) - DBC *dbc; - PAGE *pagep; - u_int32_t indx; - u_int32_t nbytes; - DBT *hdr, *data; -{ - DB *dbp; - int ret; - - dbp = dbc->dbp; - /* - * Put a single item onto a page. The logic figuring out where to - * insert and whether it fits is handled in the caller. All we do - * here is manage the page shuffling. We cheat a little bit in that - * we don't want to copy the dbt on a normal put twice. If hdr is - * NULL, we create a BKEYDATA structure on the page, otherwise, just - * copy the caller's information onto the page. - * - * This routine is also used to put entries onto the page where the - * entry is pre-built, e.g., during recovery. In this case, the hdr - * will point to the entry, and the data argument will be NULL. - * - * !!! - * There's a tremendous potential for off-by-one errors here, since - * the passed in header sizes must be adjusted for the structure's - * placeholder for the trailing variable-length data field. - */ - if (DBC_LOGGING(dbc)) { - if ((ret = __db_addrem_log(dbp, dbc->txn, - &LSN(pagep), 0, DB_ADD_DUP, PGNO(pagep), - (u_int32_t)indx, nbytes, hdr, data, &LSN(pagep))) != 0) - return (ret); - } else - LSN_NOT_LOGGED(LSN(pagep)); - - return (__db_pitem_nolog(dbc, pagep, indx, nbytes, hdr, data)); -} diff --git a/db/db_iface.c b/db/db_iface.c deleted file mode 100644 index 55f3e2a..0000000 --- a/db/db_iface.c +++ /dev/null @@ -1,2817 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#ifndef HAVE_QUEUE -#include "dbinc/qam.h" /* For __db_no_queue_am(). */ -#endif -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/partition.h" -#include "dbinc/txn.h" - -static int __db_associate_arg __P((DB *, DB *, - int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); -static int __dbc_del_arg __P((DBC *, u_int32_t)); -static int __dbc_pget_arg __P((DBC *, DBT *, u_int32_t)); -static int __dbc_put_arg __P((DBC *, DBT *, DBT *, u_int32_t)); -static int __db_curinval __P((const ENV *)); -static int __db_cursor_arg __P((DB *, u_int32_t)); -static int __db_del_arg __P((DB *, DBT *, u_int32_t)); -static int __db_get_arg __P((const DB *, DBT *, DBT *, u_int32_t)); -static int __db_join_arg __P((DB *, DBC **, u_int32_t)); -static int __db_open_arg __P((DB *, - DB_TXN *, const char *, const char *, DBTYPE, u_int32_t)); -static int __db_pget_arg __P((DB *, DBT *, u_int32_t)); -static int __db_put_arg __P((DB *, DBT *, DBT *, u_int32_t)); -static int __dbt_ferr __P((const DB *, const char *, const DBT *, int)); -static int __db_associate_foreign_arg __P((DB *, DB *, - int (*)(DB *, const DBT *, DBT *, const DBT *, int *), - u_int32_t)); - -/* - * These functions implement the Berkeley DB API. They are organized in a - * layered fashion. The interface functions (XXX_pp) perform all generic - * error checks (for example, PANIC'd region, replication state change - * in progress, inconsistent transaction usage), call function-specific - * check routines (_arg) to check for proper flag usage, etc., do pre-amble - * processing (incrementing handle counts, handling local transactions), - * call the function and then do post-amble processing (local transactions, - * decrement handle counts). - * - * The basic structure is: - * Check for simple/generic errors (PANIC'd region) - * Check if replication is changing state (increment handle count). - * Call function-specific argument checking routine - * Create internal transaction if necessary - * Call underlying worker function - * Commit/abort internal transaction if necessary - * Decrement handle count - */ - -/* - * __db_associate_pp -- - * DB->associate pre/post processing. - * - * PUBLIC: int __db_associate_pp __P((DB *, DB_TXN *, DB *, - * PUBLIC: int (*)(DB *, const DBT *, const DBT *, DBT *), u_int32_t)); - */ -int -__db_associate_pp(dbp, txn, sdbp, callback, flags) - DB *dbp, *sdbp; - DB_TXN *txn; - int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); - u_int32_t flags; -{ - DBC *sdbc; - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret, txn_local; - - env = dbp->env; - txn_local = 0; - - STRIP_AUTO_COMMIT(flags); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - /* - * Secondary cursors may have the primary's lock file ID, so we need - * to make sure that no older cursors are lying around when we make - * the transition. - */ - if (TAILQ_FIRST(&sdbp->active_queue) != NULL || - TAILQ_FIRST(&sdbp->join_queue) != NULL) { - __db_errx(env, - "Databases may not become secondary indices while cursors are open"); - ret = EINVAL; - goto err; - } - - if ((ret = __db_associate_arg(dbp, sdbp, callback, flags)) != 0) - goto err; - - /* - * Create a local transaction as necessary, check for consistent - * transaction usage, and, if we have no transaction but do have - * locking on, acquire a locker id for the handle lock acquisition. - */ - if (IS_DB_AUTO_COMMIT(dbp, txn)) { - if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) - goto err; - txn_local = 1; - } - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) - goto err; - - while ((sdbc = TAILQ_FIRST(&sdbp->free_queue)) != NULL) - if ((ret = __dbc_destroy(sdbc)) != 0) - goto err; - - ret = __db_associate(dbp, ip, txn, sdbp, callback, flags); - -err: if (txn_local && - (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) - ret = t_ret; - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_associate_arg -- - * Check DB->associate arguments. - */ -static int -__db_associate_arg(dbp, sdbp, callback, flags) - DB *dbp, *sdbp; - int (*callback) __P((DB *, const DBT *, const DBT *, DBT *)); - u_int32_t flags; -{ - ENV *env; - int ret; - - env = dbp->env; - - if (F_ISSET(sdbp, DB_AM_SECONDARY)) { - __db_errx(env, - "Secondary index handles may not be re-associated"); - return (EINVAL); - } - if (F_ISSET(dbp, DB_AM_SECONDARY)) { - __db_errx(env, - "Secondary indices may not be used as primary databases"); - return (EINVAL); - } - if (F_ISSET(dbp, DB_AM_DUP)) { - __db_errx(env, - "Primary databases may not be configured with duplicates"); - return (EINVAL); - } - if (F_ISSET(dbp, DB_AM_RENUMBER)) { - __db_errx(env, - "Renumbering recno databases may not be used as primary databases"); - return (EINVAL); - } - - /* - * It's OK for the primary and secondary to not share an environment IFF - * the environments are local to the DB handle. (Specifically, cursor - * adjustment will work correctly in this case.) The environment being - * local implies the environment is not configured for either locking or - * transactions, as neither of those could work correctly. - */ - if (dbp->env != sdbp->env && - (!F_ISSET(dbp->env, ENV_DBLOCAL) || - !F_ISSET(sdbp->env, ENV_DBLOCAL))) { - __db_errx(env, - "The primary and secondary must be opened in the same environment"); - return (EINVAL); - } - if ((DB_IS_THREADED(dbp) && !DB_IS_THREADED(sdbp)) || - (!DB_IS_THREADED(dbp) && DB_IS_THREADED(sdbp))) { - __db_errx(env, - "The DB_THREAD setting must be the same for primary and secondary"); - return (EINVAL); - } - if (callback == NULL && - (!F_ISSET(dbp, DB_AM_RDONLY) || !F_ISSET(sdbp, DB_AM_RDONLY))) { - __db_errx(env, - "Callback function may be NULL only when database handles are read-only"); - return (EINVAL); - } - - if ((ret = __db_fchk(env, "DB->associate", flags, DB_CREATE | - DB_IMMUTABLE_KEY)) != 0) - return (ret); - - return (0); -} - -/* - * __db_close_pp -- - * DB->close pre/post processing. - * - * PUBLIC: int __db_close_pp __P((DB *, u_int32_t)); - */ -int -__db_close_pp(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - ret = 0; - - /* - * Close a DB handle -- as a handle destructor, we can't fail. - * - * !!! - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if (flags != 0 && flags != DB_NOSYNC) - ret = __db_ferr(env, "DB->close", 0); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) { - handle_check = 0; - if (ret == 0) - ret = t_ret; - } - - if ((t_ret = __db_close(dbp, NULL, flags)) != 0 && ret == 0) - ret = t_ret; - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_cursor_pp -- - * DB->cursor pre/post processing. - * - * PUBLIC: int __db_cursor_pp __P((DB *, DB_TXN *, DBC **, u_int32_t)); - */ -int -__db_cursor_pp(dbp, txn, dbcp, flags) - DB *dbp; - DB_TXN *txn; - DBC **dbcp; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - REGENV *renv; - int rep_blocked, ret; - - env = dbp->env; - - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->cursor"); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - rep_blocked = 0; - if (txn == NULL && IS_ENV_REPLICATED(env)) { - if ((ret = __op_rep_enter(env)) != 0) - goto err; - rep_blocked = 1; - renv = env->reginfo->primary; - if (dbp->timestamp != renv->rep_timestamp) { - __db_errx(env, "%s %s", - "replication recovery unrolled committed transactions;", - "open DB and DBcursor handles must be closed"); - ret = DB_REP_HANDLE_DEAD; - goto err; - } - } - if ((ret = __db_cursor_arg(dbp, flags)) != 0) - goto err; - - /* - * Check for consistent transaction usage. For now, assume this - * cursor might be used for read operations only (in which case - * it may not require a txn). We'll check more stringently in - * c_del and c_put. (Note this means the read-op txn tests have - * to be a subset of the write-op ones.) - */ - if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0) - goto err; - - ret = __db_cursor(dbp, ip, txn, dbcp, flags); - -err: /* Release replication block on error. */ - if (ret != 0 && rep_blocked) - (void)__op_rep_exit(env); - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_cursor -- - * DB->cursor. - * - * PUBLIC: int __db_cursor __P((DB *, - * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBC **, u_int32_t)); - */ -int -__db_cursor(dbp, ip, txn, dbcp, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - DBC **dbcp; - u_int32_t flags; -{ - DBC *dbc; - ENV *env; - db_lockmode_t mode; - int ret; - - env = dbp->env; - - if (MULTIVERSION(dbp) && txn == NULL && (LF_ISSET(DB_TXN_SNAPSHOT) || - F_ISSET(env->dbenv, DB_ENV_TXN_SNAPSHOT))) { - if ((ret = - __txn_begin(env, ip, NULL, &txn, DB_TXN_SNAPSHOT)) != 0) - return (ret); - F_SET(txn, TXN_PRIVATE); - } - - if ((ret = __db_cursor_int(dbp, ip, txn, dbp->type, PGNO_INVALID, - LF_ISSET(DB_CURSOR_BULK | DB_CURSOR_TRANSIENT), NULL, &dbc)) != 0) - return (ret); - - /* - * If this is CDB, do all the locking in the interface, which is - * right here. - */ - if (CDB_LOCKING(env)) { - mode = (LF_ISSET(DB_WRITELOCK)) ? DB_LOCK_WRITE : - ((LF_ISSET(DB_WRITECURSOR) || txn != NULL) ? - DB_LOCK_IWRITE : DB_LOCK_READ); - if ((ret = __lock_get(env, dbc->locker, 0, - &dbc->lock_dbt, mode, &dbc->mylock)) != 0) - goto err; - if (LF_ISSET(DB_WRITECURSOR)) - F_SET(dbc, DBC_WRITECURSOR); - if (LF_ISSET(DB_WRITELOCK)) - F_SET(dbc, DBC_WRITER); - } - - if (LF_ISSET(DB_READ_UNCOMMITTED) || - (txn != NULL && F_ISSET(txn, TXN_READ_UNCOMMITTED))) - F_SET(dbc, DBC_READ_UNCOMMITTED); - - if (LF_ISSET(DB_READ_COMMITTED) || - (txn != NULL && F_ISSET(txn, TXN_READ_COMMITTED))) - F_SET(dbc, DBC_READ_COMMITTED); - - *dbcp = dbc; - return (0); - -err: (void)__dbc_close(dbc); - return (ret); -} - -/* - * __db_cursor_arg -- - * Check DB->cursor arguments. - */ -static int -__db_cursor_arg(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - ENV *env; - - env = dbp->env; - - /* - * DB_READ_COMMITTED and DB_READ_UNCOMMITTED require locking. - */ - if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED)) { - if (!LOCKING_ON(env)) - return (__db_fnl(env, "DB->cursor")); - } - - LF_CLR(DB_CURSOR_BULK | - DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_TXN_SNAPSHOT); - - /* Check for invalid function flags. */ - if (LF_ISSET(DB_WRITECURSOR)) { - if (DB_IS_READONLY(dbp)) - return (__db_rdonly(env, "DB->cursor")); - if (!CDB_LOCKING(env)) - return (__db_ferr(env, "DB->cursor", 0)); - LF_CLR(DB_WRITECURSOR); - } else if (LF_ISSET(DB_WRITELOCK)) { - if (DB_IS_READONLY(dbp)) - return (__db_rdonly(env, "DB->cursor")); - LF_CLR(DB_WRITELOCK); - } - - if (flags != 0) - return (__db_ferr(env, "DB->cursor", 0)); - - return (0); -} - -/* - * __db_del_pp -- - * DB->del pre/post processing. - * - * PUBLIC: int __db_del_pp __P((DB *, DB_TXN *, DBT *, u_int32_t)); - */ -int -__db_del_pp(dbp, txn, key, flags) - DB *dbp; - DB_TXN *txn; - DBT *key; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret, txn_local; - - env = dbp->env; - txn_local = 0; - - STRIP_AUTO_COMMIT(flags); - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->del"); - -#ifdef CONFIG_TEST - if (IS_REP_MASTER(env)) - DB_TEST_WAIT(env, env->test_check); -#endif - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - if ((ret = __db_del_arg(dbp, key, flags)) != 0) - goto err; - - /* Create local transaction as necessary. */ - if (IS_DB_AUTO_COMMIT(dbp, txn)) { - if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) - goto err; - txn_local = 1; - } - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) - goto err; - - ret = __db_del(dbp, ip, txn, key, flags); - -err: if (txn_local && - (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) - ret = t_ret; - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - ENV_LEAVE(env, ip); - __dbt_userfree(env, key, NULL, NULL); - return (ret); -} - -/* - * __db_del_arg -- - * Check DB->delete arguments. - */ -static int -__db_del_arg(dbp, key, flags) - DB *dbp; - DBT *key; - u_int32_t flags; -{ - ENV *env; - int ret; - - env = dbp->env; - - /* Check for changes to a read-only tree. */ - if (DB_IS_READONLY(dbp)) - return (__db_rdonly(env, "DB->del")); - - /* Check for invalid function flags. */ - switch (flags) { - case DB_CONSUME: - if (dbp->type != DB_QUEUE) - return (__db_ferr(env, "DB->del", 0)); - goto copy; - case DB_MULTIPLE: - case DB_MULTIPLE_KEY: - if (!F_ISSET(key, DB_DBT_BULK)) { - __db_errx(env, - "DB->del with DB_MULTIPLE(_KEY) requires multiple key records"); - return (EINVAL); - } - /* FALL THROUGH */ - case 0: -copy: if ((ret = __dbt_usercopy(env, key)) != 0) - return (ret); - break; - default: - return (__db_ferr(env, "DB->del", 0)); - } - - return (0); -} - -/* - * __db_exists -- - * DB->exists implementation. - * - * PUBLIC: int __db_exists __P((DB *, DB_TXN *, DBT *, u_int32_t)); - */ -int -__db_exists(dbp, txn, key, flags) - DB *dbp; - DB_TXN *txn; - DBT *key; - u_int32_t flags; -{ - DBT data; - int ret; - - /* - * Most flag checking is done in the DB->get call, we only check for - * specific incompatibilities here. This saves making __get_arg - * aware of the exist method's API constraints. - */ - STRIP_AUTO_COMMIT(flags); - if ((ret = __db_fchk(dbp->env, "DB->exists", flags, - DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) != 0) - return (ret); - - /* - * Configure a data DBT that returns no bytes so there's no copy - * of the data. - */ - memset(&data, 0, sizeof(data)); - data.dlen = 0; - data.flags = DB_DBT_PARTIAL | DB_DBT_USERMEM; - - return (dbp->get(dbp, txn, key, &data, flags)); -} - -/* - * db_fd_pp -- - * DB->fd pre/post processing. - * - * PUBLIC: int __db_fd_pp __P((DB *, int *)); - */ -int -__db_fd_pp(dbp, fdp) - DB *dbp; - int *fdp; -{ - DB_FH *fhp; - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->fd"); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) - goto err; - - /* - * !!! - * There's no argument checking to be done. - * - * !!! - * The actual method call is simple, do it inline. - * - * XXX - * Truly spectacular layering violation. - */ - if ((ret = __mp_xxx_fh(dbp->mpf, &fhp)) == 0) { - if (fhp == NULL) { - *fdp = -1; - __db_errx(env, - "Database does not have a valid file handle"); - ret = ENOENT; - } else - *fdp = fhp->fd; - } - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - -err: ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_get_pp -- - * DB->get pre/post processing. - * - * PUBLIC: int __db_get_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); - */ -int -__db_get_pp(dbp, txn, key, data, flags) - DB *dbp; - DB_TXN *txn; - DBT *key, *data; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - u_int32_t mode; - int handle_check, ignore_lease, ret, t_ret, txn_local; - - env = dbp->env; - mode = 0; - txn_local = 0; - - STRIP_AUTO_COMMIT(flags); - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get"); - - ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; - LF_CLR(DB_IGNORE_LEASE); - - if ((ret = __db_get_arg(dbp, key, data, flags)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - if (LF_ISSET(DB_READ_UNCOMMITTED)) - mode = DB_READ_UNCOMMITTED; - else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME || - (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT) { - mode = DB_WRITELOCK; - if (IS_DB_AUTO_COMMIT(dbp, txn)) { - if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) - goto err; - txn_local = 1; - } - } - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, - mode == DB_WRITELOCK || LF_ISSET(DB_RMW) ? 0 : 1)) != 0) - goto err; - - ret = __db_get(dbp, ip, txn, key, data, flags); - /* - * Check for master leases. - */ - if (ret == 0 && - IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) - ret = __rep_lease_check(env, 1); - -err: if (txn_local && - (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) - ret = t_ret; - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - __dbt_userfree(env, key, NULL, data); - return (ret); -} - -/* - * __db_get -- - * DB->get. - * - * PUBLIC: int __db_get __P((DB *, - * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, u_int32_t)); - */ -int -__db_get(dbp, ip, txn, key, data, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - DBT *key, *data; - u_int32_t flags; -{ - DBC *dbc; - u_int32_t mode; - int ret, t_ret; - - /* - * The DB_CURSOR_TRANSIENT flag indicates that we're just doing a single - * operation with this cursor, and that in case of error we don't need - * to restore it to its old position. Thus, we can perform the get - * without duplicating the cursor, saving some cycles in this common - * case. - */ - mode = DB_CURSOR_TRANSIENT; - if (LF_ISSET(DB_READ_UNCOMMITTED)) { - mode |= DB_READ_UNCOMMITTED; - LF_CLR(DB_READ_UNCOMMITTED); - } else if (LF_ISSET(DB_READ_COMMITTED)) { - mode |= DB_READ_COMMITTED; - LF_CLR(DB_READ_COMMITTED); - } else if ((flags & DB_OPFLAGS_MASK) == DB_CONSUME || - (flags & DB_OPFLAGS_MASK) == DB_CONSUME_WAIT) - mode |= DB_WRITELOCK; - - if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0) - return (ret); - - DEBUG_LREAD(dbc, txn, "DB->get", key, NULL, flags); - - /* - * The semantics of bulk gets are different for DB->get vs DBC->get. - * Mark the cursor so the low-level bulk get routines know which - * behavior we want. - */ - F_SET(dbc, DBC_FROM_DB_GET); - - /* - * SET_RET_MEM indicates that if key and/or data have no DBT - * flags set and DB manages the returned-data memory, that memory - * will belong to this handle, not to the underlying cursor. - */ - SET_RET_MEM(dbc, dbp); - - if (LF_ISSET(~(DB_RMW | DB_MULTIPLE)) == 0) - LF_SET(DB_SET); - -#ifdef HAVE_PARTITION - if (F_ISSET(dbc, DBC_PARTITIONED)) - ret = __partc_get(dbc, key, data, flags); - else -#endif - ret = __dbc_get(dbc, key, data, flags); - - if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_get_arg -- - * DB->get argument checking, used by both DB->get and DB->pget. - */ -static int -__db_get_arg(dbp, key, data, flags) - const DB *dbp; - DBT *key, *data; - u_int32_t flags; -{ - ENV *env; - int dirty, multi, ret; - - env = dbp->env; - - /* - * Check for read-modify-write validity. DB_RMW doesn't make sense - * with CDB cursors since if you're going to write the cursor, you - * had to create it with DB_WRITECURSOR. Regardless, we check for - * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it. - * If this changes, confirm that DB does not itself set the DB_RMW - * flag in a path where CDB may have been configured. - */ - dirty = 0; - if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) { - if (!LOCKING_ON(env)) - return (__db_fnl(env, "DB->get")); - if ((ret = __db_fcchk(env, "DB->get", - flags, DB_READ_UNCOMMITTED, DB_READ_COMMITTED)) != 0) - return (ret); - if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED)) - dirty = 1; - LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); - } - - multi = 0; - if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { - if (LF_ISSET(DB_MULTIPLE_KEY)) - goto multi_err; - multi = LF_ISSET(DB_MULTIPLE) ? 1 : 0; - LF_CLR(DB_MULTIPLE); - } - - /* Check for invalid function flags. */ - switch (flags) { - case DB_GET_BOTH: - if ((ret = __dbt_usercopy(env, data)) != 0) - return (ret); - /* FALLTHROUGH */ - case 0: - if ((ret = __dbt_usercopy(env, key)) != 0) { - __dbt_userfree(env, key, NULL, data); - return (ret); - } - break; - case DB_SET_RECNO: - if (!F_ISSET(dbp, DB_AM_RECNUM)) - goto err; - if ((ret = __dbt_usercopy(env, key)) != 0) - return (ret); - break; - case DB_CONSUME: - case DB_CONSUME_WAIT: - if (dirty) { - __db_errx(env, - "%s is not supported with DB_CONSUME or DB_CONSUME_WAIT", - LF_ISSET(DB_READ_UNCOMMITTED) ? - "DB_READ_UNCOMMITTED" : "DB_READ_COMMITTED"); - return (EINVAL); - } - if (multi) -multi_err: return (__db_ferr(env, "DB->get", 1)); - if (dbp->type == DB_QUEUE) - break; - /* FALLTHROUGH */ - default: -err: return (__db_ferr(env, "DB->get", 0)); - } - - /* - * Check for invalid key/data flags. - */ - if ((ret = - __dbt_ferr(dbp, "key", key, DB_RETURNS_A_KEY(dbp, flags))) != 0) - return (ret); - if ((ret = __dbt_ferr(dbp, "data", data, 1)) != 0) - return (ret); - - if (multi) { - if (!F_ISSET(data, DB_DBT_USERMEM)) { - __db_errx(env, - "DB_MULTIPLE requires DB_DBT_USERMEM be set"); - return (EINVAL); - } - if (F_ISSET(key, DB_DBT_PARTIAL) || - F_ISSET(data, DB_DBT_PARTIAL)) { - __db_errx(env, - "DB_MULTIPLE does not support DB_DBT_PARTIAL"); - return (EINVAL); - } - if (data->ulen < 1024 || - data->ulen < dbp->pgsize || data->ulen % 1024 != 0) { - __db_errx(env, "%s%s", - "DB_MULTIPLE buffers must be ", - "aligned, at least page size and multiples of 1KB"); - return (EINVAL); - } - } - - return (0); -} - -/* - * __db_join_pp -- - * DB->join pre/post processing. - * - * PUBLIC: int __db_join_pp __P((DB *, DBC **, DBC **, u_int32_t)); - */ -int -__db_join_pp(primary, curslist, dbcp, flags) - DB *primary; - DBC **curslist, **dbcp; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = primary->env; - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = - __db_rep_enter(primary, 1, 0, curslist[0]->txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - if ((ret = __db_join_arg(primary, curslist, flags)) == 0) - ret = __db_join(primary, curslist, dbcp, flags); - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - -err: ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_join_arg -- - * Check DB->join arguments. - */ -static int -__db_join_arg(primary, curslist, flags) - DB *primary; - DBC **curslist; - u_int32_t flags; -{ - DB_TXN *txn; - ENV *env; - int i; - - env = primary->env; - - switch (flags) { - case 0: - case DB_JOIN_NOSORT: - break; - default: - return (__db_ferr(env, "DB->join", 0)); - } - - if (curslist == NULL || curslist[0] == NULL) { - __db_errx(env, - "At least one secondary cursor must be specified to DB->join"); - return (EINVAL); - } - - txn = curslist[0]->txn; - for (i = 1; curslist[i] != NULL; i++) - if (curslist[i]->txn != txn) { - __db_errx(env, - "All secondary cursors must share the same transaction"); - return (EINVAL); - } - - return (0); -} - -/* - * __db_key_range_pp -- - * DB->key_range pre/post processing. - * - * PUBLIC: int __db_key_range_pp - * PUBLIC: __P((DB *, DB_TXN *, DBT *, DB_KEY_RANGE *, u_int32_t)); - */ -int -__db_key_range_pp(dbp, txn, key, kr, flags) - DB *dbp; - DB_TXN *txn; - DBT *key; - DB_KEY_RANGE *kr; - u_int32_t flags; -{ - DBC *dbc; - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->key_range"); - - /* - * !!! - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if (flags != 0) - return (__db_ferr(env, "DB->key_range", 0)); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 1)) != 0) - goto err; - - /* - * !!! - * The actual method call is simple, do it inline. - */ - switch (dbp->type) { - case DB_BTREE: -#ifndef HAVE_BREW - if ((ret = __dbt_usercopy(env, key)) != 0) - goto err; - - /* Acquire a cursor. */ - if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) - break; - - DEBUG_LWRITE(dbc, NULL, "bam_key_range", NULL, NULL, 0); -#ifdef HAVE_PARTITION - if (DB_IS_PARTITIONED(dbp)) - ret = __part_key_range(dbc, key, kr, flags); - else -#endif - ret = __bam_key_range(dbc, key, kr, flags); - - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - __dbt_userfree(env, key, NULL, NULL); - break; -#else - COMPQUIET(dbc, NULL); - COMPQUIET(key, NULL); - COMPQUIET(kr, NULL); - /* FALLTHROUGH */ -#endif - case DB_HASH: - case DB_QUEUE: - case DB_RECNO: - ret = __dbh_am_chk(dbp, DB_OK_BTREE); - break; - case DB_UNKNOWN: - default: - ret = __db_unknown_type(env, "DB->key_range", dbp->type); - break; - } - -err: /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_open_pp -- - * DB->open pre/post processing. - * - * PUBLIC: int __db_open_pp __P((DB *, DB_TXN *, - * PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int)); - */ -int -__db_open_pp(dbp, txn, fname, dname, type, flags, mode) - DB *dbp; - DB_TXN *txn; - const char *fname, *dname; - DBTYPE type; - u_int32_t flags; - int mode; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, nosync, remove_me, ret, t_ret, txn_local; - - env = dbp->env; - nosync = 1; - handle_check = remove_me = txn_local = 0; - - ENV_ENTER(env, ip); - - /* - * Save the file and database names and flags. We do this here - * because we don't pass all of the flags down into the actual - * DB->open method call, we strip DB_AUTO_COMMIT at this layer. - */ - if ((fname != NULL && - (ret = __os_strdup(env, fname, &dbp->fname)) != 0)) - goto err; - if ((dname != NULL && - (ret = __os_strdup(env, dname, &dbp->dname)) != 0)) - goto err; - dbp->open_flags = flags; - - /* Save the current DB handle flags for refresh. */ - dbp->orig_flags = dbp->flags; - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - /* - * Create local transaction as necessary, check for consistent - * transaction usage. - */ - if (IS_ENV_AUTO_COMMIT(env, txn, flags)) { - if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0) - goto err; - txn_local = 1; - } else if (txn != NULL && !TXN_ON(env) && - (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_CDSGROUP))) { - ret = __db_not_txn_env(env); - goto err; - } - LF_CLR(DB_AUTO_COMMIT); - - /* - * We check arguments after possibly creating a local transaction, - * which is unusual -- the reason is some flags are illegal if any - * kind of transaction is in effect. - */ - if ((ret = __db_open_arg(dbp, txn, fname, dname, type, flags)) == 0) - if ((ret = __db_open(dbp, ip, txn, fname, dname, type, - flags, mode, PGNO_BASE_MD)) != 0) - goto txnerr; - - /* - * You can open the database that describes the subdatabases in the - * rest of the file read-only. The content of each key's data is - * unspecified and applications should never be adding new records - * or updating existing records. However, during recovery, we need - * to open these databases R/W so we can redo/undo changes in them. - * Likewise, we need to open master databases read/write during - * rename and remove so we can be sure they're fully sync'ed, so - * we provide an override flag for the purpose. - */ - if (dname == NULL && !IS_RECOVERING(env) && !LF_ISSET(DB_RDONLY) && - !LF_ISSET(DB_RDWRMASTER) && F_ISSET(dbp, DB_AM_SUBDB)) { - __db_errx(env, - "files containing multiple databases may only be opened read-only"); - ret = EINVAL; - goto txnerr; - } - - /* - * Success: file creations have to be synchronous, otherwise we don't - * care. - */ - if (F_ISSET(dbp, DB_AM_CREATED | DB_AM_CREATED_MSTR)) - nosync = 0; - - /* Success: don't discard the file on close. */ - F_CLR(dbp, DB_AM_DISCARD | DB_AM_CREATED | DB_AM_CREATED_MSTR); - - /* - * If not transactional, remove the databases/subdatabases if it is - * persistent. If we're transactional, the child transaction abort - * cleans up. - */ -txnerr: if (ret != 0 && !IS_REAL_TXN(txn)) { - remove_me = (F_ISSET(dbp, DB_AM_CREATED) && - (fname != NULL || dname != NULL)) ? 1 : 0; - if (F_ISSET(dbp, DB_AM_CREATED_MSTR) || - (dname == NULL && remove_me)) - /* Remove file. */ - (void)__db_remove_int(dbp, - ip, txn, fname, NULL, DB_FORCE); - else if (remove_me) - /* Remove subdatabase. */ - (void)__db_remove_int(dbp, - ip, txn, fname, dname, DB_FORCE); - } - - if (txn_local && (t_ret = - __db_txn_auto_resolve(env, txn, nosync, ret)) && ret == 0) - ret = t_ret; - -err: /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_open_arg -- - * Check DB->open arguments. - */ -static int -__db_open_arg(dbp, txn, fname, dname, type, flags) - DB *dbp; - DB_TXN *txn; - const char *fname, *dname; - DBTYPE type; - u_int32_t flags; -{ - ENV *env; - u_int32_t ok_flags; - int ret; - - env = dbp->env; - - /* Validate arguments. */ -#undef OKFLAGS -#define OKFLAGS \ - (DB_AUTO_COMMIT | DB_CREATE | DB_EXCL | DB_FCNTL_LOCKING | \ - DB_MULTIVERSION | DB_NOMMAP | DB_NO_AUTO_COMMIT | DB_RDONLY | \ - DB_RDWRMASTER | DB_READ_UNCOMMITTED | DB_THREAD | DB_TRUNCATE) - if ((ret = __db_fchk(env, "DB->open", flags, OKFLAGS)) != 0) - return (ret); - if (LF_ISSET(DB_EXCL) && !LF_ISSET(DB_CREATE)) - return (__db_ferr(env, "DB->open", 1)); - if (LF_ISSET(DB_RDONLY) && LF_ISSET(DB_CREATE)) - return (__db_ferr(env, "DB->open", 1)); - -#ifdef HAVE_VXWORKS - if (LF_ISSET(DB_TRUNCATE)) { - __db_errx(env, "DB_TRUNCATE not supported on VxWorks"); - return (DB_OPNOTSUP); - } -#endif - switch (type) { - case DB_UNKNOWN: - if (LF_ISSET(DB_CREATE|DB_TRUNCATE)) { - __db_errx(env, - "DB_UNKNOWN type specified with DB_CREATE or DB_TRUNCATE"); - return (EINVAL); - } - ok_flags = 0; - break; - case DB_BTREE: - ok_flags = DB_OK_BTREE; - break; - case DB_HASH: -#ifndef HAVE_HASH - return (__db_no_hash_am(env)); -#endif - ok_flags = DB_OK_HASH; - break; - case DB_QUEUE: -#ifndef HAVE_QUEUE - return (__db_no_queue_am(env)); -#endif - ok_flags = DB_OK_QUEUE; - break; - case DB_RECNO: - ok_flags = DB_OK_RECNO; - break; - default: - __db_errx(env, "unknown type: %lu", (u_long)type); - return (EINVAL); - } - if (ok_flags) - DB_ILLEGAL_METHOD(dbp, ok_flags); - - /* The environment may have been created, but never opened. */ - if (!F_ISSET(env, ENV_DBLOCAL | ENV_OPEN_CALLED)) { - __db_errx(env, "database environment not yet opened"); - return (EINVAL); - } - - /* - * Historically, you could pass in an environment that didn't have a - * mpool, and DB would create a private one behind the scenes. This - * no longer works. - */ - if (!F_ISSET(env, ENV_DBLOCAL) && !MPOOL_ON(env)) { - __db_errx(env, "environment did not include a memory pool"); - return (EINVAL); - } - - /* - * You can't specify threads during DB->open if subsystems in the - * environment weren't configured with them. - */ - if (LF_ISSET(DB_THREAD) && !F_ISSET(env, ENV_DBLOCAL | ENV_THREAD)) { - __db_errx(env, "environment not created using DB_THREAD"); - return (EINVAL); - } - - /* DB_MULTIVERSION requires a database configured for transactions. */ - if (LF_ISSET(DB_MULTIVERSION) && !IS_REAL_TXN(txn)) { - __db_errx(env, - "DB_MULTIVERSION illegal without a transaction specified"); - return (EINVAL); - } - - if (LF_ISSET(DB_MULTIVERSION) && type == DB_QUEUE) { - __db_errx(env, - "DB_MULTIVERSION illegal with queue databases"); - return (EINVAL); - } - - /* DB_TRUNCATE is neither transaction recoverable nor lockable. */ - if (LF_ISSET(DB_TRUNCATE) && (LOCKING_ON(env) || txn != NULL)) { - __db_errx(env, - "DB_TRUNCATE illegal with %s specified", - LOCKING_ON(env) ? "locking" : "transactions"); - return (EINVAL); - } - - /* Subdatabase checks. */ - if (dname != NULL) { - /* QAM can only be done on in-memory subdatabases. */ - if (type == DB_QUEUE && fname != NULL) { - __db_errx( - env, "Queue databases must be one-per-file"); - return (EINVAL); - } - - /* - * Named in-memory databases can't support certain flags, - * so check here. - */ - if (fname == NULL) - F_CLR(dbp, DB_AM_CHKSUM | DB_AM_ENCRYPT); - } - - return (0); -} - -/* - * __db_pget_pp -- - * DB->pget pre/post processing. - * - * PUBLIC: int __db_pget_pp - * PUBLIC: __P((DB *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t)); - */ -int -__db_pget_pp(dbp, txn, skey, pkey, data, flags) - DB *dbp; - DB_TXN *txn; - DBT *skey, *pkey, *data; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ignore_lease, ret, t_ret; - - env = dbp->env; - - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->pget"); - - ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; - LF_CLR(DB_IGNORE_LEASE); - - if ((ret = __db_pget_arg(dbp, pkey, flags)) != 0 || - (ret = __db_get_arg(dbp, skey, data, flags)) != 0) { - __dbt_userfree(env, skey, pkey, data); - return (ret); - } - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - ret = __db_pget(dbp, ip, txn, skey, pkey, data, flags); - /* - * Check for master leases. - */ - if (ret == 0 && - IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) - ret = __rep_lease_check(env, 1); - -err: /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - __dbt_userfree(env, skey, pkey, data); - return (ret); -} - -/* - * __db_pget -- - * DB->pget. - * - * PUBLIC: int __db_pget __P((DB *, - * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DBT *, DBT *, DBT *, u_int32_t)); - */ -int -__db_pget(dbp, ip, txn, skey, pkey, data, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - DBT *skey, *pkey, *data; - u_int32_t flags; -{ - DBC *dbc; - u_int32_t mode; - int ret, t_ret; - - mode = DB_CURSOR_TRANSIENT; - if (LF_ISSET(DB_READ_UNCOMMITTED)) { - mode |= DB_READ_UNCOMMITTED; - LF_CLR(DB_READ_UNCOMMITTED); - } else if (LF_ISSET(DB_READ_COMMITTED)) { - mode |= DB_READ_COMMITTED; - LF_CLR(DB_READ_COMMITTED); - } - - if ((ret = __db_cursor(dbp, ip, txn, &dbc, mode)) != 0) - return (ret); - - SET_RET_MEM(dbc, dbp); - - DEBUG_LREAD(dbc, txn, "__db_pget", skey, NULL, flags); - - /* - * !!! - * The actual method call is simple, do it inline. - * - * The underlying cursor pget will fill in a default DBT for null - * pkeys, and use the cursor's returned-key memory internally to - * store any intermediate primary keys. However, we've just set - * the returned-key memory to the DB handle's key memory, which - * is unsafe to use if the DB handle is threaded. If the pkey - * argument is NULL, use the DBC-owned returned-key memory - * instead; it'll go away when we close the cursor before we - * return, but in this case that's just fine, as we're not - * returning the primary key. - */ - if (pkey == NULL) - dbc->rkey = &dbc->my_rkey; - - /* - * The cursor is just a perfectly ordinary secondary database cursor. - * Call its c_pget() method to do the dirty work. - */ - if (flags == 0 || flags == DB_RMW) - flags |= DB_SET; - - ret = __dbc_pget(dbc, skey, pkey, data, flags); - - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_pget_arg -- - * Check DB->pget arguments. - */ -static int -__db_pget_arg(dbp, pkey, flags) - DB *dbp; - DBT *pkey; - u_int32_t flags; -{ - ENV *env; - int ret; - - env = dbp->env; - - if (!F_ISSET(dbp, DB_AM_SECONDARY)) { - __db_errx(env, - "DB->pget may only be used on secondary indices"); - return (EINVAL); - } - - if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { - __db_errx(env, - "DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices"); - return (EINVAL); - } - - /* DB_CONSUME makes no sense on a secondary index. */ - LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); - switch (flags) { - case DB_CONSUME: - case DB_CONSUME_WAIT: - return (__db_ferr(env, "DB->pget", 0)); - default: - /* __db_get_arg will catch the rest. */ - break; - } - - /* - * We allow the pkey field to be NULL, so that we can make the - * two-DBT get calls into wrappers for the three-DBT ones. - */ - if (pkey != NULL && - (ret = __dbt_ferr(dbp, "primary key", pkey, 1)) != 0) - return (ret); - - if (flags == DB_GET_BOTH) { - /* The pkey field can't be NULL if we're doing a DB_GET_BOTH. */ - if (pkey == NULL) { - __db_errx(env, - "DB_GET_BOTH on a secondary index requires a primary key"); - return (EINVAL); - } - if ((ret = __dbt_usercopy(env, pkey)) != 0) - return (ret); - } - - return (0); -} - -/* - * __db_put_pp -- - * DB->put pre/post processing. - * - * PUBLIC: int __db_put_pp __P((DB *, DB_TXN *, DBT *, DBT *, u_int32_t)); - */ -int -__db_put_pp(dbp, txn, key, data, flags) - DB *dbp; - DB_TXN *txn; - DBT *key, *data; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, txn_local, t_ret; - - env = dbp->env; - txn_local = 0; - - STRIP_AUTO_COMMIT(flags); - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->put"); - - if ((ret = __db_put_arg(dbp, key, data, flags)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - /* Create local transaction as necessary. */ - if (IS_DB_AUTO_COMMIT(dbp, txn)) { - if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) - goto err; - txn_local = 1; - } - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) - goto err; - - ret = __db_put(dbp, ip, txn, key, data, flags); - -err: if (txn_local && - (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) - ret = t_ret; - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - __dbt_userfree(env, key, NULL, data); - return (ret); -} - -/* - * __db_put_arg -- - * Check DB->put arguments. - */ -static int -__db_put_arg(dbp, key, data, flags) - DB *dbp; - DBT *key, *data; - u_int32_t flags; -{ - ENV *env; - int ret, returnkey; - - env = dbp->env; - returnkey = 0; - - /* Check for changes to a read-only tree. */ - if (DB_IS_READONLY(dbp)) - return (__db_rdonly(env, "DB->put")); - - /* Check for puts on a secondary. */ - if (F_ISSET(dbp, DB_AM_SECONDARY)) { - __db_errx(env, "DB->put forbidden on secondary indices"); - return (EINVAL); - } - - if (LF_ISSET(DB_MULTIPLE_KEY | DB_MULTIPLE)) { - if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY)) - goto err; - - switch (LF_ISSET(DB_OPFLAGS_MASK)) { - case 0: - case DB_OVERWRITE_DUP: - break; - default: - __db_errx(env, - "DB->put: DB_MULTIPLE(_KEY) can only be combined with DB_OVERWRITE_DUP"); - return (EINVAL); - } - - if (!F_ISSET(key, DB_DBT_BULK)) { - __db_errx(env, - "DB->put with DB_MULTIPLE(_KEY) requires a bulk key buffer"); - return (EINVAL); - } - } - if (LF_ISSET(DB_MULTIPLE)) { - if (!F_ISSET(data, DB_DBT_BULK)) { - __db_errx(env, - "DB->put with DB_MULTIPLE requires a bulk data buffer"); - return (EINVAL); - } - } - - /* Check for invalid function flags. */ - switch (LF_ISSET(DB_OPFLAGS_MASK)) { - case 0: - case DB_NOOVERWRITE: - case DB_OVERWRITE_DUP: - break; - case DB_APPEND: - if (dbp->type != DB_RECNO && dbp->type != DB_QUEUE) - goto err; - returnkey = 1; - break; - case DB_NODUPDATA: - if (F_ISSET(dbp, DB_AM_DUPSORT)) - break; - /* FALLTHROUGH */ - default: -err: return (__db_ferr(env, "DB->put", 0)); - } - - /* - * Check for invalid key/data flags. The key may reasonably be NULL - * if DB_APPEND is set and the application doesn't care about the - * returned key. - */ - if (((returnkey && key != NULL) || !returnkey) && - (ret = __dbt_ferr(dbp, "key", key, returnkey)) != 0) - return (ret); - if (!LF_ISSET(DB_MULTIPLE_KEY) && - (ret = __dbt_ferr(dbp, "data", data, 0)) != 0) - return (ret); - - /* - * The key parameter should not be NULL or have the "partial" flag set - * in a put call unless the user doesn't care about a key value we'd - * return. The user tells us they don't care about the returned key by - * setting the key parameter to NULL or configuring the key DBT to not - * return any information. (Returned keys from a put are always record - * numbers, and returning part of a record number doesn't make sense: - * only accept a partial return if the length returned is 0.) - */ - if ((returnkey && - key != NULL && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0) || - (!returnkey && F_ISSET(key, DB_DBT_PARTIAL))) - return (__db_ferr(env, "key DBT", 0)); - - /* Check for partial puts in the presence of duplicates. */ - if (data != NULL && F_ISSET(data, DB_DBT_PARTIAL) && - (F_ISSET(dbp, DB_AM_DUP) || F_ISSET(key, DB_DBT_DUPOK))) { - __db_errx(env, -"a partial put in the presence of duplicates requires a cursor operation"); - return (EINVAL); - } - - if ((flags != DB_APPEND && (ret = __dbt_usercopy(env, key)) != 0) || - (!LF_ISSET(DB_MULTIPLE_KEY) && - (ret = __dbt_usercopy(env, data)) != 0)) - return (ret); - - return (0); -} - -/* - * __db_compact_pp -- - * DB->compact pre/post processing. - * - * PUBLIC: int __db_compact_pp __P((DB *, DB_TXN *, - * PUBLIC: DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); - */ -int -__db_compact_pp(dbp, txn, start, stop, c_data, flags, end) - DB *dbp; - DB_TXN *txn; - DBT *start, *stop; - DB_COMPACT *c_data; - u_int32_t flags; - DBT *end; -{ - DB_COMPACT *dp, l_data; - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->compact"); - - /* - * !!! - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if ((ret = __db_fchk( - env, "DB->compact", flags, DB_FREELIST_ONLY | DB_FREE_SPACE)) != 0) - return (ret); - - /* Check for changes to a read-only database. */ - if (DB_IS_READONLY(dbp)) - return (__db_rdonly(env, "DB->compact")); - - if (start != NULL && (ret = __dbt_usercopy(env, start)) != 0) - return (ret); - if (stop != NULL && (ret = __dbt_usercopy(env, stop)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, - txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - if (c_data == NULL) { - dp = &l_data; - memset(dp, 0, sizeof(*dp)); - } else - dp = c_data; -#ifdef HAVE_PARTITION - if (DB_IS_PARTITIONED(dbp)) - ret = __part_compact(dbp, ip, txn, start, stop, dp, flags, end); - else -#endif - switch (dbp->type) { - case DB_HASH: - if (!LF_ISSET(DB_FREELIST_ONLY)) - goto err; - /* FALLTHROUGH */ - case DB_BTREE: - case DB_RECNO: - ret = __bam_compact(dbp, ip, txn, start, stop, dp, flags, end); - break; - - default: -err: ret = __dbh_am_chk(dbp, DB_OK_BTREE); - break; - } - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - __dbt_userfree(env, start, stop, NULL); - return (ret); -} - -/* - * __db_associate_foreign_pp -- - * DB->associate_foreign pre/post processing. - * - * PUBLIC: int __db_associate_foreign_pp __P((DB *, DB *, - * PUBLIC: int (*)(DB *, const DBT *, DBT *, const DBT *, int *), - * PUBLIC: u_int32_t)); - */ -int -__db_associate_foreign_pp(fdbp, dbp, callback, flags) - DB *dbp, *fdbp; - int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *)); - u_int32_t flags; -{ - /* Most of this is based on the implementation of associate */ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - - PANIC_CHECK(env); - STRIP_AUTO_COMMIT(flags); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { - handle_check = 0; - goto err; - } - - if ((ret = __db_associate_foreign_arg(fdbp, dbp, callback, flags)) != 0) - goto err; - - ret = __db_associate_foreign(fdbp, dbp, callback, flags); - -err: /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_associate_foreign_arg -- - * DB->associate_foreign argument checking. - */ -static int -__db_associate_foreign_arg(fdbp, dbp, callback, flags) - DB *dbp, *fdbp; - int (*callback) __P((DB *, const DBT *, DBT *, const DBT *, int *)); - u_int32_t flags; -{ - ENV *env; - - env = fdbp->env; - - if (F_ISSET(fdbp, DB_AM_SECONDARY)) { - __db_errx(env, - "Secondary indices may not be used as foreign databases"); - return (EINVAL); - } - if (F_ISSET(fdbp, DB_AM_DUP)) { - __db_errx(env, - "Foreign databases may not be configured with duplicates"); - return (EINVAL); - } - if (F_ISSET(fdbp, DB_AM_RENUMBER)) { - __db_errx(env, - "Renumbering recno databases may not be used as foreign databases"); - return (EINVAL); - } - if (!F_ISSET(dbp, DB_AM_SECONDARY)) { - __db_errx(env, - "The associating database must be a secondary index."); - return (EINVAL); - } - if (LF_ISSET(DB_FOREIGN_NULLIFY) && callback == NULL) { - __db_errx(env, - "When specifying a delete action of nullify, a callback%s", - " function needs to be configured"); - return (EINVAL); - } else if (!LF_ISSET(DB_FOREIGN_NULLIFY) && callback != NULL) { - __db_errx(env, - "When not specifying a delete action of nullify, a%s", - " callback function cannot be configured"); - return (EINVAL); - } - - return (0); -} - -/* - * __db_sync_pp -- - * DB->sync pre/post processing. - * - * PUBLIC: int __db_sync_pp __P((DB *, u_int32_t)); - */ -int -__db_sync_pp(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->sync"); - - /* - * !!! - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if (flags != 0) - return (__db_ferr(env, "DB->sync", 0)); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { - handle_check = 0; - goto err; - } - - ret = __db_sync(dbp); - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - -err: ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __dbc_close_pp -- - * DBC->close pre/post processing. - * - * PUBLIC: int __dbc_close_pp __P((DBC *)); - */ -int -__dbc_close_pp(dbc) - DBC *dbc; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - dbp = dbc->dbp; - env = dbp->env; - - /* - * If the cursor is already closed we have a serious problem, and we - * assume that the cursor isn't on the active queue. Don't do any of - * the remaining cursor close processing. - */ - if (!F_ISSET(dbc, DBC_ACTIVE)) { - __db_errx(env, "Closing already-closed cursor"); - return (EINVAL); - } - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = dbc->txn == NULL && IS_ENV_REPLICATED(env); - ret = __dbc_close(dbc); - - /* Release replication block. */ - if (handle_check && - (t_ret = __op_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __dbc_cmp_pp -- - * DBC->cmp pre/post processing. - * - * PUBLIC: int __dbc_cmp_pp __P((DBC *, DBC *, int*, u_int32_t)); - */ -int -__dbc_cmp_pp(dbc, other_cursor, result, flags) - DBC *dbc, *other_cursor; - int *result; - u_int32_t flags; -{ - DB *dbp, *odbp; - DB_THREAD_INFO *ip; - ENV *env; - int ret; - - dbp = dbc->dbp; - odbp = other_cursor->dbp; - env = dbp->env; - - if (flags != 0) - return (__db_ferr(env, "DBcursor->cmp", 0)); - - if (other_cursor == NULL) { - __db_errx(env, "DBcursor->cmp dbc pointer must not be null"); - return (EINVAL); - } - - if (dbp != odbp) { - __db_errx(env, -"DBcursor->cmp both cursors must refer to the same database."); - return (EINVAL); - } - - ENV_ENTER(env, ip); - ret = __dbc_cmp(dbc, other_cursor, result); - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __dbc_count_pp -- - * DBC->count pre/post processing. - * - * PUBLIC: int __dbc_count_pp __P((DBC *, db_recno_t *, u_int32_t)); - */ -int -__dbc_count_pp(dbc, recnop, flags) - DBC *dbc; - db_recno_t *recnop; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int ret; - - dbp = dbc->dbp; - env = dbp->env; - - /* - * !!! - * The actual argument checking is simple, do it inline, outside of - * the replication block. - * - * The cursor must be initialized, return EINVAL for an invalid cursor. - */ - if (flags != 0) - return (__db_ferr(env, "DBcursor->count", 0)); - - if (!IS_INITIALIZED(dbc)) - return (__db_curinval(env)); - - ENV_ENTER(env, ip); - ret = __dbc_count(dbc, recnop); - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __dbc_del_pp -- - * DBC->del pre/post processing. - * - * PUBLIC: int __dbc_del_pp __P((DBC *, u_int32_t)); - */ -int -__dbc_del_pp(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int ret; - - dbp = dbc->dbp; - env = dbp->env; - - if ((ret = __dbc_del_arg(dbc, flags)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0) - goto err; - - DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->del", NULL, NULL, flags); - ret = __dbc_del(dbc, flags); - -err: ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __dbc_del_arg -- - * Check DBC->del arguments. - */ -static int -__dbc_del_arg(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - DB *dbp; - ENV *env; - - dbp = dbc->dbp; - env = dbp->env; - - /* Check for changes to a read-only tree. */ - if (DB_IS_READONLY(dbp)) - return (__db_rdonly(env, "DBcursor->del")); - - /* Check for invalid function flags. */ - switch (flags) { - case 0: - break; - case DB_CONSUME: - if (dbp->type != DB_QUEUE) - return (__db_ferr(env, "DBC->del", 0)); - break; - case DB_UPDATE_SECONDARY: - DB_ASSERT(env, F_ISSET(dbp, DB_AM_SECONDARY)); - break; - default: - return (__db_ferr(env, "DBcursor->del", 0)); - } - - /* - * The cursor must be initialized, return EINVAL for an invalid cursor, - * otherwise 0. - */ - if (!IS_INITIALIZED(dbc)) - return (__db_curinval(env)); - - return (0); -} - -/* - * __dbc_dup_pp -- - * DBC->dup pre/post processing. - * - * PUBLIC: int __dbc_dup_pp __P((DBC *, DBC **, u_int32_t)); - */ -int -__dbc_dup_pp(dbc, dbcp, flags) - DBC *dbc, **dbcp; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int ret; - - dbp = dbc->dbp; - env = dbp->env; - - /* - * !!! - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if (flags != 0 && flags != DB_POSITION) - return (__db_ferr(env, "DBcursor->dup", 0)); - - ENV_ENTER(env, ip); - ret = __dbc_dup(dbc, dbcp, flags); - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __dbc_get_pp -- - * DBC->get pre/post processing. - * - * PUBLIC: int __dbc_get_pp __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_get_pp(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int ignore_lease, ret; - - dbp = dbc->dbp; - env = dbp->env; - - ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; - LF_CLR(DB_IGNORE_LEASE); - if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get", - flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); - ret = __dbc_get(dbc, key, data, flags); - - /* - * Check for master leases. - */ - if (ret == 0 && - IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) - ret = __rep_lease_check(env, 1); - - ENV_LEAVE(env, ip); - __dbt_userfree(env, key, NULL, data); - return (ret); -} - -/* - * __dbc_get_arg -- - * Common DBC->get argument checking, used by both DBC->get and DBC->pget. - * PUBLIC: int __dbc_get_arg __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_get_arg(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp; - ENV *env; - int dirty, multi, ret; - - dbp = dbc->dbp; - env = dbp->env; - - /* - * Typically in checking routines that modify the flags, we have - * to save them and restore them, because the checking routine - * calls the work routine. However, this is a pure-checking - * routine which returns to a function that calls the work routine, - * so it's OK that we do not save and restore the flags, even though - * we modify them. - * - * Check for read-modify-write validity. DB_RMW doesn't make sense - * with CDB cursors since if you're going to write the cursor, you - * had to create it with DB_WRITECURSOR. Regardless, we check for - * LOCKING_ON and not STD_LOCKING, as we don't want to disallow it. - * If this changes, confirm that DB does not itself set the DB_RMW - * flag in a path where CDB may have been configured. - */ - dirty = 0; - if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) { - if (!LOCKING_ON(env)) - return (__db_fnl(env, "DBcursor->get")); - if (LF_ISSET(DB_READ_UNCOMMITTED)) - dirty = 1; - LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); - } - - multi = 0; - if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { - multi = 1; - if (LF_ISSET(DB_MULTIPLE) && LF_ISSET(DB_MULTIPLE_KEY)) - goto multi_err; - LF_CLR(DB_MULTIPLE | DB_MULTIPLE_KEY); - } - - /* Check for invalid function flags. */ - switch (flags) { - case DB_CONSUME: - case DB_CONSUME_WAIT: - if (dirty) { - __db_errx(env, - "DB_READ_UNCOMMITTED is not supported with DB_CONSUME or DB_CONSUME_WAIT"); - return (EINVAL); - } - if (dbp->type != DB_QUEUE) - goto err; - break; - case DB_CURRENT: - case DB_FIRST: - case DB_NEXT: - case DB_NEXT_DUP: - case DB_NEXT_NODUP: - break; - case DB_LAST: - case DB_PREV: - case DB_PREV_DUP: - case DB_PREV_NODUP: - if (multi) -multi_err: return (__db_ferr(env, "DBcursor->get", 1)); - break; - case DB_GET_BOTHC: - if (dbp->type == DB_QUEUE) - goto err; - /* FALLTHROUGH */ - case DB_GET_BOTH: - case DB_GET_BOTH_RANGE: - if ((ret = __dbt_usercopy(env, data)) != 0) - goto err; - /* FALLTHROUGH */ - case DB_SET: - case DB_SET_RANGE: - if ((ret = __dbt_usercopy(env, key)) != 0) - goto err; - break; - case DB_GET_RECNO: - /* - * The one situation in which this might be legal with a - * non-RECNUM dbp is if dbp is a secondary and its primary is - * DB_AM_RECNUM. - */ - if (!F_ISSET(dbp, DB_AM_RECNUM) && - (!F_ISSET(dbp, DB_AM_SECONDARY) || - !F_ISSET(dbp->s_primary, DB_AM_RECNUM))) - goto err; - break; - case DB_SET_RECNO: - if (!F_ISSET(dbp, DB_AM_RECNUM)) - goto err; - if ((ret = __dbt_usercopy(env, key)) != 0) - goto err; - break; - default: -err: __dbt_userfree(env, key, NULL, data); - return (__db_ferr(env, "DBcursor->get", 0)); - } - - /* Check for invalid key/data flags. */ - if ((ret = __dbt_ferr(dbp, "key", key, 0)) != 0) - return (ret); - if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0) - return (ret); - - if (multi) { - if (!F_ISSET(data, DB_DBT_USERMEM)) { - __db_errx(env, - "DB_MULTIPLE/DB_MULTIPLE_KEY require DB_DBT_USERMEM be set"); - return (EINVAL); - } - if (F_ISSET(key, DB_DBT_PARTIAL) || - F_ISSET(data, DB_DBT_PARTIAL)) { - __db_errx(env, - "DB_MULTIPLE/DB_MULTIPLE_KEY do not support DB_DBT_PARTIAL"); - return (EINVAL); - } - if (data->ulen < 1024 || - data->ulen < dbp->pgsize || data->ulen % 1024 != 0) { - __db_errx(env, "%s%s", - "DB_MULTIPLE/DB_MULTIPLE_KEY buffers must be ", - "aligned, at least page size and multiples of 1KB"); - return (EINVAL); - } - } - - /* - * The cursor must be initialized for DB_CURRENT, DB_GET_RECNO, - * DB_PREV_DUP and DB_NEXT_DUP. Return EINVAL for an invalid - * cursor, otherwise 0. - */ - if (!IS_INITIALIZED(dbc) && (flags == DB_CURRENT || - flags == DB_GET_RECNO || - flags == DB_NEXT_DUP || flags == DB_PREV_DUP)) - return (__db_curinval(env)); - - /* Check for consistent transaction usage. */ - if (LF_ISSET(DB_RMW) && - (ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0) - return (ret); - - return (0); -} - -/* - * __db_secondary_close_pp -- - * DB->close for secondaries - * - * PUBLIC: int __db_secondary_close_pp __P((DB *, u_int32_t)); - */ -int -__db_secondary_close_pp(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - ret = 0; - - /* - * As a DB handle destructor, we can't fail. - * - * !!! - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if (flags != 0 && flags != DB_NOSYNC) - ret = __db_ferr(env, "DB->close", 0); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (t_ret = __db_rep_enter(dbp, 0, 0, 0)) != 0) { - handle_check = 0; - if (ret == 0) - ret = t_ret; - } - - if ((t_ret = __db_secondary_close(dbp, flags)) != 0 && ret == 0) - ret = t_ret; - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __dbc_pget_pp -- - * DBC->pget pre/post processing. - * - * PUBLIC: int __dbc_pget_pp __P((DBC *, DBT *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_pget_pp(dbc, skey, pkey, data, flags) - DBC *dbc; - DBT *skey, *pkey, *data; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int ignore_lease, ret; - - dbp = dbc->dbp; - env = dbp->env; - - ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; - LF_CLR(DB_IGNORE_LEASE); - if ((ret = __dbc_pget_arg(dbc, pkey, flags)) != 0 || - (ret = __dbc_get_arg(dbc, skey, data, flags)) != 0) - return (ret); - - ENV_ENTER(env, ip); - ret = __dbc_pget(dbc, skey, pkey, data, flags); - /* - * Check for master leases. - */ - if (ret == 0 && - IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) - ret = __rep_lease_check(env, 1); - - ENV_LEAVE(env, ip); - - __dbt_userfree(env, skey, pkey, data); - return (ret); -} - -/* - * __dbc_pget_arg -- - * Check DBC->pget arguments. - */ -static int -__dbc_pget_arg(dbc, pkey, flags) - DBC *dbc; - DBT *pkey; - u_int32_t flags; -{ - DB *dbp; - ENV *env; - int ret; - - dbp = dbc->dbp; - env = dbp->env; - - if (!F_ISSET(dbp, DB_AM_SECONDARY)) { - __db_errx(env, - "DBcursor->pget may only be used on secondary indices"); - return (EINVAL); - } - - if (LF_ISSET(DB_MULTIPLE | DB_MULTIPLE_KEY)) { - __db_errx(env, - "DB_MULTIPLE and DB_MULTIPLE_KEY may not be used on secondary indices"); - return (EINVAL); - } - - switch (LF_ISSET(DB_OPFLAGS_MASK)) { - case DB_CONSUME: - case DB_CONSUME_WAIT: - /* These flags make no sense on a secondary index. */ - return (__db_ferr(env, "DBcursor->pget", 0)); - case DB_GET_BOTH: - case DB_GET_BOTH_RANGE: - /* BOTH is "get both the primary and the secondary". */ - if (pkey == NULL) { - __db_errx(env, - "%s requires both a secondary and a primary key", - LF_ISSET(DB_GET_BOTH) ? - "DB_GET_BOTH" : "DB_GET_BOTH_RANGE"); - return (EINVAL); - } - if ((ret = __dbt_usercopy(env, pkey)) != 0) - return (ret); - break; - default: - /* __dbc_get_arg will catch the rest. */ - break; - } - - /* - * We allow the pkey field to be NULL, so that we can make the - * two-DBT get calls into wrappers for the three-DBT ones. - */ - if (pkey != NULL && - (ret = __dbt_ferr(dbp, "primary key", pkey, 0)) != 0) - return (ret); - - /* But the pkey field can't be NULL if we're doing a DB_GET_BOTH. */ - if (pkey == NULL && (flags & DB_OPFLAGS_MASK) == DB_GET_BOTH) { - __db_errx(env, - "DB_GET_BOTH on a secondary index requires a primary key"); - return (EINVAL); - } - return (0); -} - -/* - * __dbc_put_pp -- - * DBC->put pre/post processing. - * - * PUBLIC: int __dbc_put_pp __P((DBC *, DBT *, DBT *, u_int32_t)); - */ -int -__dbc_put_pp(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int ret; - - dbp = dbc->dbp; - env = dbp->env; - - if ((ret = __dbc_put_arg(dbc, key, data, flags)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, dbc->txn, dbc->locker, 0)) != 0) - goto err; - - DEBUG_LWRITE(dbc, dbc->txn, "DBcursor->put", - flags == DB_KEYFIRST || flags == DB_KEYLAST || - flags == DB_NODUPDATA || flags == DB_UPDATE_SECONDARY ? - key : NULL, data, flags); - ret = __dbc_put(dbc, key, data, flags); - -err: ENV_LEAVE(env, ip); - __dbt_userfree(env, key, NULL, data); - return (ret); -} - -/* - * __dbc_put_arg -- - * Check DBC->put arguments. - */ -static int -__dbc_put_arg(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp; - ENV *env; - int key_flags, ret; - - dbp = dbc->dbp; - env = dbp->env; - key_flags = 0; - - /* Check for changes to a read-only tree. */ - if (DB_IS_READONLY(dbp)) - return (__db_rdonly(env, "DBcursor->put")); - - /* Check for puts on a secondary. */ - if (F_ISSET(dbp, DB_AM_SECONDARY)) { - if (flags == DB_UPDATE_SECONDARY) - flags = 0; - else { - __db_errx(env, - "DBcursor->put forbidden on secondary indices"); - return (EINVAL); - } - } - - if ((ret = __dbt_usercopy(env, data)) != 0) - return (ret); - - /* Check for invalid function flags. */ - switch (flags) { - case DB_AFTER: - case DB_BEFORE: - switch (dbp->type) { - case DB_BTREE: - case DB_HASH: /* Only with unsorted duplicates. */ - if (!F_ISSET(dbp, DB_AM_DUP)) - goto err; - if (dbp->dup_compare != NULL) - goto err; - break; - case DB_QUEUE: /* Not permitted. */ - goto err; - case DB_RECNO: /* Only with mutable record numbers. */ - if (!F_ISSET(dbp, DB_AM_RENUMBER)) - goto err; - key_flags = key == NULL ? 0 : 1; - break; - case DB_UNKNOWN: - default: - goto err; - } - break; - case DB_CURRENT: - /* - * If there is a comparison function, doing a DB_CURRENT - * must not change the part of the data item that is used - * for the comparison. - */ - break; - case DB_NODUPDATA: - if (!F_ISSET(dbp, DB_AM_DUPSORT)) - goto err; - /* FALLTHROUGH */ - case DB_KEYFIRST: - case DB_KEYLAST: - case DB_OVERWRITE_DUP: - key_flags = 1; - if ((ret = __dbt_usercopy(env, key)) != 0) - return (ret); - break; - default: -err: return (__db_ferr(env, "DBcursor->put", 0)); - } - - /* - * Check for invalid key/data flags. The key may reasonably be NULL - * if DB_AFTER or DB_BEFORE is set and the application doesn't care - * about the returned key, or if the DB_CURRENT flag is set. - */ - if (key_flags && (ret = __dbt_ferr(dbp, "key", key, 0)) != 0) - return (ret); - if ((ret = __dbt_ferr(dbp, "data", data, 0)) != 0) - return (ret); - - /* - * The key parameter should not be NULL or have the "partial" flag set - * in a put call unless the user doesn't care about a key value we'd - * return. The user tells us they don't care about the returned key by - * setting the key parameter to NULL or configuring the key DBT to not - * return any information. (Returned keys from a put are always record - * numbers, and returning part of a record number doesn't make sense: - * only accept a partial return if the length returned is 0.) - */ - if (key_flags && F_ISSET(key, DB_DBT_PARTIAL) && key->dlen != 0) - return (__db_ferr(env, "key DBT", 0)); - - /* - * The cursor must be initialized for anything other than DB_KEYFIRST, - * DB_KEYLAST or zero: return EINVAL for an invalid cursor, otherwise 0. - */ - if (!IS_INITIALIZED(dbc) && flags != 0 && flags != DB_KEYFIRST && - flags != DB_KEYLAST && flags != DB_NODUPDATA && - flags != DB_OVERWRITE_DUP) - return (__db_curinval(env)); - - return (0); -} - -/* - * __dbt_ferr -- - * Check a DBT for flag errors. - */ -static int -__dbt_ferr(dbp, name, dbt, check_thread) - const DB *dbp; - const char *name; - const DBT *dbt; - int check_thread; -{ - ENV *env; - int ret; - - env = dbp->env; - - /* - * Check for invalid DBT flags. We allow any of the flags to be - * specified to any DB or DBcursor call so that applications can - * set DB_DBT_MALLOC when retrieving a data item from a secondary - * database and then specify that same DBT as a key to a primary - * database, without having to clear flags. - */ - if ((ret = __db_fchk(env, name, dbt->flags, DB_DBT_APPMALLOC | - DB_DBT_BULK | DB_DBT_DUPOK | DB_DBT_MALLOC | DB_DBT_REALLOC | - DB_DBT_USERCOPY | DB_DBT_USERMEM | DB_DBT_PARTIAL)) != 0) - return (ret); - switch (F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC | - DB_DBT_USERCOPY | DB_DBT_USERMEM)) { - case 0: - case DB_DBT_MALLOC: - case DB_DBT_REALLOC: - case DB_DBT_USERCOPY: - case DB_DBT_USERMEM: - break; - default: - return (__db_ferr(env, name, 1)); - } - - if (F_ISSET(dbt, DB_DBT_BULK) && F_ISSET(dbt, DB_DBT_PARTIAL)) { - __db_errx(env, - "Bulk and partial operations cannot be combined on %s DBT", name); - return (EINVAL); - } - - if (check_thread && DB_IS_THREADED(dbp) && - !F_ISSET(dbt, DB_DBT_MALLOC | DB_DBT_REALLOC | - DB_DBT_USERCOPY | DB_DBT_USERMEM)) { - __db_errx(env, - "DB_THREAD mandates memory allocation flag on %s DBT", - name); - return (EINVAL); - } - return (0); -} - -/* - * __db_curinval - * Report that a cursor is in an invalid state. - */ -static int -__db_curinval(env) - const ENV *env; -{ - __db_errx(env, - "Cursor position must be set before performing this operation"); - return (EINVAL); -} - -/* - * __db_txn_auto_init -- - * Handle DB_AUTO_COMMIT initialization. - * - * PUBLIC: int __db_txn_auto_init __P((ENV *, DB_THREAD_INFO *, DB_TXN **)); - */ -int -__db_txn_auto_init(env, ip, txnidp) - ENV *env; - DB_THREAD_INFO *ip; - DB_TXN **txnidp; -{ - /* - * Method calls where applications explicitly specify DB_AUTO_COMMIT - * require additional validation: the DB_AUTO_COMMIT flag cannot be - * specified if a transaction cookie is also specified, nor can the - * flag be specified in a non-transactional environment. - */ - if (*txnidp != NULL) { - __db_errx(env, - "DB_AUTO_COMMIT may not be specified along with a transaction handle"); - return (EINVAL); - } - - if (!TXN_ON(env)) { - __db_errx(env, - "DB_AUTO_COMMIT may not be specified in non-transactional environment"); - return (EINVAL); - } - - /* - * Our caller checked to see if replication is making a state change. - * Don't call the user-level API (which would repeat that check). - */ - return (__txn_begin(env, ip, NULL, txnidp, 0)); -} - -/* - * __db_txn_auto_resolve -- - * Resolve local transactions. - * - * PUBLIC: int __db_txn_auto_resolve __P((ENV *, DB_TXN *, int, int)); - */ -int -__db_txn_auto_resolve(env, txn, nosync, ret) - ENV *env; - DB_TXN *txn; - int nosync, ret; -{ - int t_ret; - - /* - * We're resolving a transaction for the user, and must decrement the - * replication handle count. Call the user-level API. - */ - if (ret == 0) - return (__txn_commit(txn, nosync ? DB_TXN_NOSYNC : 0)); - - if ((t_ret = __txn_abort(txn)) != 0) - return (__env_panic(env, t_ret)); - - return (ret); -} diff --git a/db/db_join.c b/db/db_join.c deleted file mode 100644 index 05c11a4..0000000 --- a/db/db_join.c +++ /dev/null @@ -1,940 +0,0 @@ -/* - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1998-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_join.h" -#include "dbinc/btree.h" -#include "dbinc/lock.h" - -static int __db_join_close_pp __P((DBC *)); -static int __db_join_cmp __P((const void *, const void *)); -static int __db_join_del __P((DBC *, u_int32_t)); -static int __db_join_get __P((DBC *, DBT *, DBT *, u_int32_t)); -static int __db_join_get_pp __P((DBC *, DBT *, DBT *, u_int32_t)); -static int __db_join_getnext __P((DBC *, DBT *, DBT *, u_int32_t, u_int32_t)); -static int __db_join_primget __P((DB *, DB_THREAD_INFO *, - DB_TXN *, DB_LOCKER *, DBT *, DBT *, u_int32_t)); -static int __db_join_put __P((DBC *, DBT *, DBT *, u_int32_t)); - -/* - * Check to see if the Nth secondary cursor of join cursor jc is pointing - * to a sorted duplicate set. - */ -#define SORTED_SET(jc, n) ((jc)->j_curslist[(n)]->dbp->dup_compare != NULL) - -/* - * This is the duplicate-assisted join functionality. Right now we're - * going to write it such that we return one item at a time, although - * I think we may need to optimize it to return them all at once. - * It should be easier to get it working this way, and I believe that - * changing it should be fairly straightforward. - * - * We optimize the join by sorting cursors from smallest to largest - * cardinality. In most cases, this is indeed optimal. However, if - * a cursor with large cardinality has very few data in common with the - * first cursor, it is possible that the join will be made faster by - * putting it earlier in the cursor list. Since we have no way to detect - * cases like this, we simply provide a flag, DB_JOIN_NOSORT, which retains - * the sort order specified by the caller, who may know more about the - * structure of the data. - * - * The first cursor moves sequentially through the duplicate set while - * the others search explicitly for the duplicate in question. - * - */ - -/* - * __db_join -- - * This is the interface to the duplicate-assisted join functionality. - * In the same way that cursors mark a position in a database, a cursor - * can mark a position in a join. While most cursors are created by the - * cursor method of a DB, join cursors are created through an explicit - * call to DB->join. - * - * The curslist is an array of existing, initialized cursors and primary - * is the DB of the primary file. The data item that joins all the - * cursors in the curslist is used as the key into the primary and that - * key and data are returned. When no more items are left in the join - * set, the c_next operation off the join cursor will return DB_NOTFOUND. - * - * PUBLIC: int __db_join __P((DB *, DBC **, DBC **, u_int32_t)); - */ -int -__db_join(primary, curslist, dbcp, flags) - DB *primary; - DBC **curslist, **dbcp; - u_int32_t flags; -{ - DBC *dbc; - ENV *env; - JOIN_CURSOR *jc; - size_t ncurs, nslots; - u_int32_t i; - int ret; - - env = primary->env; - dbc = NULL; - jc = NULL; - - if ((ret = __os_calloc(env, 1, sizeof(DBC), &dbc)) != 0) - goto err; - - if ((ret = __os_calloc(env, 1, sizeof(JOIN_CURSOR), &jc)) != 0) - goto err; - - if ((ret = __os_malloc(env, 256, &jc->j_key.data)) != 0) - goto err; - jc->j_key.ulen = 256; - F_SET(&jc->j_key, DB_DBT_USERMEM); - - F_SET(&jc->j_rdata, DB_DBT_REALLOC); - - for (jc->j_curslist = curslist; - *jc->j_curslist != NULL; jc->j_curslist++) - ; - - /* - * The number of cursor slots we allocate is one greater than - * the number of cursors involved in the join, because the - * list is NULL-terminated. - */ - ncurs = (size_t)(jc->j_curslist - curslist); - nslots = ncurs + 1; - - /* - * !!! -- A note on the various lists hanging off jc. - * - * j_curslist is the initial NULL-terminated list of cursors passed - * into __db_join. The original cursors are not modified; pristine - * copies are required because, in databases with unsorted dups, we - * must reset all of the secondary cursors after the first each - * time the first one is incremented, or else we will lose data - * which happen to be sorted differently in two different cursors. - * - * j_workcurs is where we put those copies that we're planning to - * work with. They're lazily c_dup'ed from j_curslist as we need - * them, and closed when the join cursor is closed or when we need - * to reset them to their original values (in which case we just - * c_dup afresh). - * - * j_fdupcurs is an array of cursors which point to the first - * duplicate in the duplicate set that contains the data value - * we're currently interested in. We need this to make - * __db_join_get correctly return duplicate duplicates; i.e., if a - * given data value occurs twice in the set belonging to cursor #2, - * and thrice in the set belonging to cursor #3, and once in all - * the other cursors, successive calls to __db_join_get need to - * return that data item six times. To make this happen, each time - * cursor N is allowed to advance to a new datum, all cursors M - * such that M > N have to be reset to the first duplicate with - * that datum, so __db_join_get will return all the dup-dups again. - * We could just reset them to the original cursor from j_curslist, - * but that would be a bit slower in the unsorted case and a LOT - * slower in the sorted one. - * - * j_exhausted is a list of boolean values which represent - * whether or not their corresponding cursors are "exhausted", - * i.e. whether the datum under the corresponding cursor has - * been found not to exist in any unreturned combinations of - * later secondary cursors, in which case they are ready to be - * incremented. - */ - - /* We don't want to free regions whose callocs have failed. */ - jc->j_curslist = NULL; - jc->j_workcurs = NULL; - jc->j_fdupcurs = NULL; - jc->j_exhausted = NULL; - - if ((ret = __os_calloc(env, nslots, sizeof(DBC *), - &jc->j_curslist)) != 0) - goto err; - if ((ret = __os_calloc(env, nslots, sizeof(DBC *), - &jc->j_workcurs)) != 0) - goto err; - if ((ret = __os_calloc(env, nslots, sizeof(DBC *), - &jc->j_fdupcurs)) != 0) - goto err; - if ((ret = __os_calloc(env, nslots, sizeof(u_int8_t), - &jc->j_exhausted)) != 0) - goto err; - for (i = 0; curslist[i] != NULL; i++) { - jc->j_curslist[i] = curslist[i]; - jc->j_workcurs[i] = NULL; - jc->j_fdupcurs[i] = NULL; - jc->j_exhausted[i] = 0; - } - jc->j_ncurs = (u_int32_t)ncurs; - - /* - * If DB_JOIN_NOSORT is not set, optimize secondary cursors by - * sorting in order of increasing cardinality. - */ - if (!LF_ISSET(DB_JOIN_NOSORT)) - qsort(jc->j_curslist, ncurs, sizeof(DBC *), __db_join_cmp); - - /* - * We never need to reset the 0th cursor, so there's no - * solid reason to use workcurs[0] rather than curslist[0] in - * join_get. Nonetheless, it feels cleaner to do it for symmetry, - * and this is the most logical place to copy it. - * - * !!! - * There's no need to close the new cursor if we goto err only - * because this is the last thing that can fail. Modifier of this - * function beware! - */ - if ((ret = - __dbc_dup(jc->j_curslist[0], jc->j_workcurs, DB_POSITION)) != 0) - goto err; - - dbc->close = dbc->c_close = __db_join_close_pp; - dbc->del = dbc->c_del = __db_join_del; - dbc->get = dbc->c_get = __db_join_get_pp; - dbc->put = dbc->c_put = __db_join_put; - dbc->internal = (DBC_INTERNAL *)jc; - dbc->dbp = primary; - jc->j_primary = primary; - - /* Stash the first cursor's transaction here for easy access. */ - dbc->txn = curslist[0]->txn; - - *dbcp = dbc; - - MUTEX_LOCK(env, primary->mutex); - TAILQ_INSERT_TAIL(&primary->join_queue, dbc, links); - MUTEX_UNLOCK(env, primary->mutex); - - return (0); - -err: if (jc != NULL) { - if (jc->j_curslist != NULL) - __os_free(env, jc->j_curslist); - if (jc->j_workcurs != NULL) { - if (jc->j_workcurs[0] != NULL) - (void)__dbc_close(jc->j_workcurs[0]); - __os_free(env, jc->j_workcurs); - } - if (jc->j_fdupcurs != NULL) - __os_free(env, jc->j_fdupcurs); - if (jc->j_exhausted != NULL) - __os_free(env, jc->j_exhausted); - __os_free(env, jc); - } - if (dbc != NULL) - __os_free(env, dbc); - return (ret); -} - -/* - * __db_join_close_pp -- - * DBC->close pre/post processing for join cursors. - */ -static int -__db_join_close_pp(dbc) - DBC *dbc; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - dbp = dbc->dbp; - env = dbp->env; - - ENV_ENTER(env, ip); - - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, dbc->txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - ret = __db_join_close(dbc); - - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - -err: ENV_LEAVE(env, ip); - return (ret); -} - -static int -__db_join_put(dbc, key, data, flags) - DBC *dbc; - DBT *key; - DBT *data; - u_int32_t flags; -{ - COMPQUIET(dbc, NULL); - COMPQUIET(key, NULL); - COMPQUIET(data, NULL); - COMPQUIET(flags, 0); - return (EINVAL); -} - -static int -__db_join_del(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - COMPQUIET(dbc, NULL); - COMPQUIET(flags, 0); - return (EINVAL); -} - -/* - * __db_join_get_pp -- - * DBjoin->get pre/post processing. - */ -static int -__db_join_get_pp(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - u_int32_t handle_check, save_flags; - int ret, t_ret; - - dbp = dbc->dbp; - env = dbp->env; - - /* Save the original flags value. */ - save_flags = flags; - - if (LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW)) { - if (!LOCKING_ON(env)) - return (__db_fnl(env, "DBC->get")); - LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); - } - - switch (flags) { - case 0: - case DB_JOIN_ITEM: - break; - default: - return (__db_ferr(env, "DBC->get", 0)); - } - - /* - * A partial get of the key of a join cursor don't make much sense; - * the entire key is necessary to query the primary database - * and find the datum, and so regardless of the size of the key - * it would not be a performance improvement. Since it would require - * special handling, we simply disallow it. - * - * A partial get of the data, however, potentially makes sense (if - * all possible data are a predictable large structure, for instance) - * and causes us no headaches, so we permit it. - */ - if (F_ISSET(key, DB_DBT_PARTIAL)) { - __db_errx(env, - "DB_DBT_PARTIAL may not be set on key during join_get"); - return (EINVAL); - } - - ENV_ENTER(env, ip); - - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, dbc->txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - /* Restore the original flags value. */ - flags = save_flags; - - ret = __db_join_get(dbc, key, data, flags); - - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - -err: ENV_LEAVE(env, ip); - __dbt_userfree(env, key, NULL, NULL); - return (ret); -} - -static int -__db_join_get(dbc, key_arg, data_arg, flags) - DBC *dbc; - DBT *key_arg, *data_arg; - u_int32_t flags; -{ - DB *dbp; - DBC *cp; - DBT *key_n, key_n_mem; - ENV *env; - JOIN_CURSOR *jc; - int db_manage_data, ret; - u_int32_t i, j, operation, opmods; - - dbp = dbc->dbp; - env = dbp->env; - jc = (JOIN_CURSOR *)dbc->internal; - - operation = LF_ISSET(DB_OPFLAGS_MASK); - - /* !!! - * If the set of flags here changes, check that __db_join_primget - * is updated to handle them properly. - */ - opmods = LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); - - /* - * Since we are fetching the key as a datum in the secondary indices, - * we must be careful of caller-specified DB_DBT_* memory - * management flags. If necessary, use a stack-allocated DBT; - * we'll appropriately copy and/or allocate the data later. - */ - if (F_ISSET(key_arg, - DB_DBT_MALLOC | DB_DBT_USERCOPY | DB_DBT_USERMEM)) { - /* We just use the default buffer; no need to go malloc. */ - key_n = &key_n_mem; - memset(key_n, 0, sizeof(DBT)); - } else { - /* - * Either DB_DBT_REALLOC or the default buffer will work - * fine if we have to reuse it, as we do. - */ - key_n = key_arg; - } - if (F_ISSET(key_arg, DB_DBT_USERCOPY)) - key_arg->data = NULL; - - /* - * If our last attempt to do a get on the primary key failed, - * short-circuit the join and try again with the same key. - */ - if (F_ISSET(jc, JOIN_RETRY)) - goto samekey; - F_CLR(jc, JOIN_RETRY); - -retry: ret = __dbc_get(jc->j_workcurs[0], &jc->j_key, key_n, - opmods | (jc->j_exhausted[0] ? DB_NEXT_DUP : DB_CURRENT)); - - if (ret == DB_BUFFER_SMALL) { - jc->j_key.ulen <<= 1; - if ((ret = __os_realloc(env, - jc->j_key.ulen, &jc->j_key.data)) != 0) - goto mem_err; - goto retry; - } - - /* - * If ret == DB_NOTFOUND, we're out of elements of the first - * secondary cursor. This is how we finally finish the join - * if all goes well. - */ - if (ret != 0) - goto err; - - /* - * If jc->j_exhausted[0] == 1, we've just advanced the first cursor, - * and we're going to want to advance all the cursors that point to - * the first member of a duplicate duplicate set (j_fdupcurs[1..N]). - * Close all the cursors in j_fdupcurs; we'll reopen them the - * first time through the upcoming loop. - */ - for (i = 1; i < jc->j_ncurs; i++) { - if (jc->j_fdupcurs[i] != NULL && - (ret = __dbc_close(jc->j_fdupcurs[i])) != 0) - goto err; - jc->j_fdupcurs[i] = NULL; - } - - /* - * If jc->j_curslist[1] == NULL, we have only one cursor in the join. - * Thus, we can safely increment that one cursor on each call - * to __db_join_get, and we signal this by setting jc->j_exhausted[0] - * right away. - * - * Otherwise, reset jc->j_exhausted[0] to 0, so that we don't - * increment it until we know we're ready to. - */ - if (jc->j_curslist[1] == NULL) - jc->j_exhausted[0] = 1; - else - jc->j_exhausted[0] = 0; - - /* We have the first element; now look for it in the other cursors. */ - for (i = 1; i < jc->j_ncurs; i++) { - DB_ASSERT(env, jc->j_curslist[i] != NULL); - if (jc->j_workcurs[i] == NULL) - /* If this is NULL, we need to dup curslist into it. */ - if ((ret = __dbc_dup(jc->j_curslist[i], - &jc->j_workcurs[i], DB_POSITION)) != 0) - goto err; - -retry2: cp = jc->j_workcurs[i]; - - if ((ret = __db_join_getnext(cp, &jc->j_key, key_n, - jc->j_exhausted[i], opmods)) == DB_NOTFOUND) { - /* - * jc->j_workcurs[i] has no more of the datum we're - * interested in. Go back one cursor and get - * a new dup. We can't just move to a new - * element of the outer relation, because that way - * we might miss duplicate duplicates in cursor i-1. - * - * If this takes us back to the first cursor, - * -then- we can move to a new element of the outer - * relation. - */ - --i; - jc->j_exhausted[i] = 1; - - if (i == 0) { - for (j = 1; jc->j_workcurs[j] != NULL; j++) { - /* - * We're moving to a new element of - * the first secondary cursor. If - * that cursor is sorted, then any - * other sorted cursors can be safely - * reset to the first duplicate - * duplicate in the current set if we - * have a pointer to it (we can't just - * leave them be, or we'll miss - * duplicate duplicates in the outer - * relation). - * - * If the first cursor is unsorted, or - * if cursor j is unsorted, we can - * make no assumptions about what - * we're looking for next or where it - * will be, so we reset to the very - * beginning (setting workcurs NULL - * will achieve this next go-round). - * - * XXX: This is likely to break - * horribly if any two cursors are - * both sorted, but have different - * specified sort functions. For, - * now, we dismiss this as pathology - * and let strange things happen--we - * can't make rope childproof. - */ - if ((ret = __dbc_close( - jc->j_workcurs[j])) != 0) - goto err; - if (!SORTED_SET(jc, 0) || - !SORTED_SET(jc, j) || - jc->j_fdupcurs[j] == NULL) - /* - * Unsafe conditions; - * reset fully. - */ - jc->j_workcurs[j] = NULL; - else - /* Partial reset suffices. */ - if ((__dbc_dup( - jc->j_fdupcurs[j], - &jc->j_workcurs[j], - DB_POSITION)) != 0) - goto err; - jc->j_exhausted[j] = 0; - } - goto retry; - /* NOTREACHED */ - } - - /* - * We're about to advance the cursor and need to - * reset all of the workcurs[j] where j>i, so that - * we don't miss any duplicate duplicates. - */ - for (j = i + 1; - jc->j_workcurs[j] != NULL; - j++) { - if ((ret = - __dbc_close(jc->j_workcurs[j])) != 0) - goto err; - jc->j_exhausted[j] = 0; - if (jc->j_fdupcurs[j] == NULL) - jc->j_workcurs[j] = NULL; - else if ((ret = __dbc_dup(jc->j_fdupcurs[j], - &jc->j_workcurs[j], DB_POSITION)) != 0) - goto err; - } - goto retry2; - /* NOTREACHED */ - } - - if (ret == DB_BUFFER_SMALL) { - jc->j_key.ulen <<= 1; - if ((ret = __os_realloc(env, jc->j_key.ulen, - &jc->j_key.data)) != 0) { -mem_err: __db_errx(env, - "Allocation failed for join key, len = %lu", - (u_long)jc->j_key.ulen); - goto err; - } - goto retry2; - } - - if (ret != 0) - goto err; - - /* - * If we made it this far, we've found a matching - * datum in cursor i. Mark the current cursor - * unexhausted, so we don't miss any duplicate - * duplicates the next go-round--unless this is the - * very last cursor, in which case there are none to - * miss, and we'll need that exhausted flag to finally - * get a DB_NOTFOUND and move on to the next datum in - * the outermost cursor. - */ - if (i + 1 != jc->j_ncurs) - jc->j_exhausted[i] = 0; - else - jc->j_exhausted[i] = 1; - - /* - * If jc->j_fdupcurs[i] is NULL and the ith cursor's dups are - * sorted, then we're here for the first time since advancing - * cursor 0, and we have a new datum of interest. - * jc->j_workcurs[i] points to the beginning of a set of - * duplicate duplicates; store this into jc->j_fdupcurs[i]. - */ - if (SORTED_SET(jc, i) && jc->j_fdupcurs[i] == NULL && (ret = - __dbc_dup(cp, &jc->j_fdupcurs[i], DB_POSITION)) != 0) - goto err; - } - -err: if (ret != 0) - return (ret); - - if (0) { -samekey: /* - * Get the key we tried and failed to return last time; - * it should be the current datum of all the secondary cursors. - */ - if ((ret = __dbc_get(jc->j_workcurs[0], - &jc->j_key, key_n, DB_CURRENT | opmods)) != 0) - return (ret); - F_CLR(jc, JOIN_RETRY); - } - - /* - * ret == 0; we have a key to return. - * - * If DB_DBT_USERMEM or DB_DBT_MALLOC is set, we need to copy the key - * back into the dbt we were given for the key; call __db_retcopy. - * Otherwise, assert that we do not need to copy anything and proceed. - */ - DB_ASSERT(env, F_ISSET(key_arg, DB_DBT_USERMEM | DB_DBT_MALLOC | - DB_DBT_USERCOPY) || key_n == key_arg); - - if ((F_ISSET(key_arg, DB_DBT_USERMEM | DB_DBT_MALLOC | - DB_DBT_USERCOPY)) && - (ret = __db_retcopy(env, - key_arg, key_n->data, key_n->size, NULL, NULL)) != 0) { - /* - * The retcopy failed, most commonly because we have a user - * buffer for the key which is too small. Set things up to - * retry next time, and return. - */ - F_SET(jc, JOIN_RETRY); - return (ret); - } - - /* - * If DB_JOIN_ITEM is set, we return it; otherwise we do the lookup - * in the primary and then return. - */ - if (operation == DB_JOIN_ITEM) - return (0); - - /* - * If data_arg->flags == 0--that is, if DB is managing the - * data DBT's memory--it's not safe to just pass the DBT - * through to the primary get call, since we don't want that - * memory to belong to the primary DB handle (and if the primary - * is free-threaded, it can't anyway). - * - * Instead, use memory that is managed by the join cursor, in - * jc->j_rdata. - */ - if (!F_ISSET(data_arg, DB_DBT_MALLOC | DB_DBT_REALLOC | - DB_DBT_USERMEM | DB_DBT_USERCOPY)) - db_manage_data = 1; - else - db_manage_data = 0; - if ((ret = __db_join_primget(jc->j_primary, dbc->thread_info, - jc->j_curslist[0]->txn, jc->j_curslist[0]->locker, key_n, - db_manage_data ? &jc->j_rdata : data_arg, opmods)) != 0) { - if (ret == DB_NOTFOUND) { - if (LF_ISSET(DB_READ_UNCOMMITTED) || - (jc->j_curslist[0]->txn != NULL && F_ISSET( - jc->j_curslist[0]->txn, TXN_READ_UNCOMMITTED))) - goto retry; - /* - * If ret == DB_NOTFOUND, the primary and secondary - * are out of sync; every item in each secondary - * should correspond to something in the primary, - * or we shouldn't have done the join this way. - * Wail. - */ - ret = __db_secondary_corrupt(jc->j_primary); - } else - /* - * The get on the primary failed for some other - * reason, most commonly because we're using a user - * buffer that's not big enough. Flag our failure - * so we can return the same key next time. - */ - F_SET(jc, JOIN_RETRY); - } - if (db_manage_data && ret == 0) { - data_arg->data = jc->j_rdata.data; - data_arg->size = jc->j_rdata.size; - } - - return (ret); -} - -/* - * __db_join_close -- - * DBC->close for join cursors. - * - * PUBLIC: int __db_join_close __P((DBC *)); - */ -int -__db_join_close(dbc) - DBC *dbc; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - JOIN_CURSOR *jc; - int ret, t_ret; - u_int32_t i; - - jc = (JOIN_CURSOR *)dbc->internal; - dbp = dbc->dbp; - env = dbp->env; - ret = t_ret = 0; - - /* - * Remove from active list of join cursors. Note that this - * must happen before any action that can fail and return, or else - * __db_close may loop indefinitely. - */ - MUTEX_LOCK(env, dbp->mutex); - TAILQ_REMOVE(&dbp->join_queue, dbc, links); - MUTEX_UNLOCK(env, dbp->mutex); - - ENV_ENTER(env, ip); - /* - * Close any open scratch cursors. In each case, there may - * not be as many outstanding as there are cursors in - * curslist, but we want to close whatever's there. - * - * If any close fails, there's no reason not to close everything else; - * we'll just return the error code of the last one to fail. There's - * not much the caller can do anyway, since these cursors only exist - * hanging off a db-internal data structure that they shouldn't be - * mucking with. - */ - for (i = 0; i < jc->j_ncurs; i++) { - if (jc->j_workcurs[i] != NULL && - (t_ret = __dbc_close(jc->j_workcurs[i])) != 0) - ret = t_ret; - if (jc->j_fdupcurs[i] != NULL && - (t_ret = __dbc_close(jc->j_fdupcurs[i])) != 0) - ret = t_ret; - } - ENV_LEAVE(env, ip); - - __os_free(env, jc->j_exhausted); - __os_free(env, jc->j_curslist); - __os_free(env, jc->j_workcurs); - __os_free(env, jc->j_fdupcurs); - __os_free(env, jc->j_key.data); - if (jc->j_rdata.data != NULL) - __os_ufree(env, jc->j_rdata.data); - __os_free(env, jc); - __os_free(env, dbc); - - return (ret); -} - -/* - * __db_join_getnext -- - * This function replaces the DBC_CONTINUE and DBC_KEYSET - * functionality inside the various cursor get routines. - * - * If exhausted == 0, we're not done with the current datum; - * return it if it matches "matching", otherwise search - * using DB_GET_BOTHC (which is faster than iteratively doing - * DB_NEXT_DUP) forward until we find one that does. - * - * If exhausted == 1, we are done with the current datum, so just - * leap forward to searching NEXT_DUPs. - * - * If no matching datum exists, returns DB_NOTFOUND, else 0. - */ -static int -__db_join_getnext(dbc, key, data, exhausted, opmods) - DBC *dbc; - DBT *key, *data; - u_int32_t exhausted, opmods; -{ - int ret, cmp; - DB *dbp; - DBT ldata; - int (*func) __P((DB *, const DBT *, const DBT *)); - - dbp = dbc->dbp; - func = (dbp->dup_compare == NULL) ? __bam_defcmp : dbp->dup_compare; - - switch (exhausted) { - case 0: - /* - * We don't want to step on data->data; use a new - * DBT and malloc so we don't step on dbc's rdata memory. - */ - memset(&ldata, 0, sizeof(DBT)); - F_SET(&ldata, DB_DBT_MALLOC); - if ((ret = __dbc_get(dbc, - key, &ldata, opmods | DB_CURRENT)) != 0) - break; - cmp = func(dbp, data, &ldata); - if (cmp == 0) { - /* - * We have to return the real data value. Copy - * it into data, then free the buffer we malloc'ed - * above. - */ - if ((ret = __db_retcopy(dbp->env, data, ldata.data, - ldata.size, &data->data, &data->size)) != 0) - return (ret); - __os_ufree(dbp->env, ldata.data); - return (0); - } - - /* - * Didn't match--we want to fall through and search future - * dups. We just forget about ldata and free - * its buffer--data contains the value we're searching for. - */ - __os_ufree(dbp->env, ldata.data); - /* FALLTHROUGH */ - case 1: - ret = __dbc_get(dbc, key, data, opmods | DB_GET_BOTHC); - break; - default: - ret = EINVAL; - break; - } - - return (ret); -} - -/* - * __db_join_cmp -- - * Comparison function for sorting DBCs in cardinality order. - */ -static int -__db_join_cmp(a, b) - const void *a, *b; -{ - DBC *dbca, *dbcb; - db_recno_t counta, countb; - - dbca = *((DBC * const *)a); - dbcb = *((DBC * const *)b); - - if (__dbc_count(dbca, &counta) != 0 || - __dbc_count(dbcb, &countb) != 0) - return (0); - - return ((long)counta - (long)countb); -} - -/* - * __db_join_primget -- - * Perform a DB->get in the primary, being careful not to use a new - * locker ID if we're doing CDB locking. - */ -static int -__db_join_primget(dbp, ip, txn, locker, key, data, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - DB_LOCKER *locker; - DBT *key, *data; - u_int32_t flags; -{ - DBC *dbc; - u_int32_t rmw; - int ret, t_ret; - - if ((ret = __db_cursor_int(dbp, ip, - txn, dbp->type, PGNO_INVALID, 0, locker, &dbc)) != 0) - return (ret); - - /* - * The only allowable flags here are the two flags copied into "opmods" - * in __db_join_get, DB_RMW and DB_READ_UNCOMMITTED. The former is an - * op on the c_get call, the latter on the cursor call. It's a DB bug - * if we allow any other flags down in here. - */ - rmw = LF_ISSET(DB_RMW); - if (LF_ISSET(DB_READ_UNCOMMITTED) || - (txn != NULL && F_ISSET(txn, TXN_READ_UNCOMMITTED))) - F_SET(dbc, DBC_READ_UNCOMMITTED); - - if (LF_ISSET(DB_READ_COMMITTED) || - (txn != NULL && F_ISSET(txn, TXN_READ_COMMITTED))) - F_SET(dbc, DBC_READ_COMMITTED); - - LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED | DB_RMW); - DB_ASSERT(dbp->env, flags == 0); - - F_SET(dbc, DBC_TRANSIENT); - - /* - * This shouldn't be necessary, thanks to the fact that join cursors - * swap in their own DB_DBT_REALLOC'ed buffers, but just for form's - * sake, we mirror what __db_get does. - */ - SET_RET_MEM(dbc, dbp); - - ret = __dbc_get(dbc, key, data, DB_SET | rmw); - - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_secondary_corrupt -- - * Report primary/secondary inconsistencies. - * - * PUBLIC: int __db_secondary_corrupt __P((DB *)); - */ -int -__db_secondary_corrupt(dbp) - DB *dbp; -{ - __db_err(dbp->env, DB_SECONDARY_BAD, "%s%s%s", - dbp->fname == NULL ? "unnamed" : dbp->fname, - dbp->dname == NULL ? "" : "/", - dbp->dname == NULL ? "" : dbp->dname); - return (DB_SECONDARY_BAD); -} diff --git a/db/db_meta.c b/db/db_meta.c deleted file mode 100644 index ef42e44..0000000 --- a/db/db_meta.c +++ /dev/null @@ -1,1299 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/txn.h" -#include "dbinc/db_am.h" -#include "dbinc/hash.h" - -static void __db_init_meta __P((DB *, void *, db_pgno_t, u_int32_t)); -#ifdef HAVE_FTRUNCATE -static int __db_pglistcmp __P((const void *, const void *)); -static int __db_truncate_freelist __P((DBC *, DBMETA *, - PAGE *, db_pgno_t *, u_int32_t, u_int32_t)); -#endif - -/* - * __db_init_meta -- - * Helper function for __db_new that initializes the important fields in - * a meta-data page (used instead of P_INIT). We need to make sure that we - * retain the page number and LSN of the existing page. - */ -static void -__db_init_meta(dbp, p, pgno, pgtype) - DB *dbp; - void *p; - db_pgno_t pgno; - u_int32_t pgtype; -{ - DBMETA *meta; - DB_LSN save_lsn; - - meta = (DBMETA *)p; - save_lsn = meta->lsn; - memset(meta, 0, sizeof(DBMETA)); - meta->lsn = save_lsn; - meta->pagesize = dbp->pgsize; - if (F_ISSET(dbp, DB_AM_CHKSUM)) - FLD_SET(meta->metaflags, DBMETA_CHKSUM); - meta->pgno = pgno; - meta->type = (u_int8_t)pgtype; -} - -/* - * __db_new -- - * Get a new page, preferably from the freelist. - * - * PUBLIC: int __db_new __P((DBC *, u_int32_t, DB_LOCK *, PAGE **)); - */ -int -__db_new(dbc, type, lockp, pagepp) - DBC *dbc; - u_int32_t type; - DB_LOCK *lockp; - PAGE **pagepp; -{ - DB *dbp; - DBMETA *meta; - DB_LOCK metalock; - DB_LSN lsn; - DB_MPOOLFILE *mpf; - ENV *env; - PAGE *h; - db_pgno_t last, *list, pgno, newnext; - int extend, hash, ret, t_ret; - - meta = NULL; - dbp = dbc->dbp; - env = dbp->env; - mpf = dbp->mpf; - h = NULL; - newnext = PGNO_INVALID; - if (lockp != NULL) - LOCK_INIT(*lockp); - - hash = 0; - ret = 0; - LOCK_INIT(metalock); - -#ifdef HAVE_HASH - if (dbp->type == DB_HASH) { - if ((ret = __ham_return_meta(dbc, DB_MPOOL_DIRTY, &meta)) != 0) - goto err; - if (meta != NULL) - hash = 1; - } -#endif - if (meta == NULL) { - pgno = PGNO_BASE_MD; - if ((ret = __db_lget(dbc, - LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, - DB_MPOOL_DIRTY, &meta)) != 0) - goto err; - } - - last = meta->last_pgno; - if (meta->free == PGNO_INVALID) { - if (FLD_ISSET(type, P_DONTEXTEND)) { - *pagepp = NULL; - goto err; - } - last = pgno = meta->last_pgno + 1; - ZERO_LSN(lsn); - extend = 1; - } else { - pgno = meta->free; - /* - * Lock the new page. Do this here because we must do it - * before getting the page and the caller may need the lock - * to keep readers from seeing the page before the transaction - * commits. We can do this because no one will hold a free - * page locked. - */ - if (lockp != NULL && (ret = - __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, lockp)) != 0) - goto err; - if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, - DB_MPOOL_DIRTY, &h)) != 0) - goto err; - - /* - * We want to take the first page off the free list and - * then set meta->free to the that page's next_pgno, but - * we need to log the change first. - */ - newnext = h->next_pgno; - lsn = h->lsn; - extend = 0; - DB_ASSERT(env, TYPE(h) == P_INVALID); - - if (TYPE(h) != P_INVALID) { - __db_errx(env, - "%s page %lu is on free list with type %lu", - dbp->fname, (u_long)PGNO(h), (u_long)TYPE(h)); - return (__env_panic(env, EINVAL)); - } - - } - - FLD_CLR(type, P_DONTEXTEND); - - /* - * Log the allocation before fetching the new page. If we - * don't have room in the log then we don't want to tell - * mpool to extend the file. - */ - if (DBC_LOGGING(dbc)) { - if ((ret = __db_pg_alloc_log(dbp, dbc->txn, &LSN(meta), 0, - &LSN(meta), PGNO_BASE_MD, &lsn, - pgno, (u_int32_t)type, newnext, meta->last_pgno)) != 0) - goto err; - } else - LSN_NOT_LOGGED(LSN(meta)); - - meta->free = newnext; - - if (extend == 1) { - if (lockp != NULL && (ret = - __db_lget(dbc, 0, pgno, DB_LOCK_WRITE, 0, lockp)) != 0) - goto err; - if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, - DB_MPOOL_NEW, &h)) != 0) - goto err; - DB_ASSERT(env, last == pgno); - meta->last_pgno = pgno; - ZERO_LSN(h->lsn); - h->pgno = pgno; - } - LSN(h) = LSN(meta); - - if (hash == 0) - ret = __memp_fput(mpf, dbc->thread_info, meta, dbc->priority); - meta = NULL; - if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0) - ret = t_ret; - if (ret != 0) - goto err; - - switch (type) { - case P_BTREEMETA: - case P_HASHMETA: - case P_QAMMETA: - __db_init_meta(dbp, h, h->pgno, type); - break; - default: - P_INIT(h, dbp->pgsize, - h->pgno, PGNO_INVALID, PGNO_INVALID, 0, type); - break; - } - - /* Fix up the sorted free list if necessary. */ -#ifdef HAVE_FTRUNCATE - if (extend == 0) { - u_int32_t nelems = 0; - - if ((ret = __memp_get_freelist(dbp->mpf, &nelems, &list)) != 0) - goto err; - if (nelems != 0) { - DB_ASSERT(env, h->pgno == list[0]); - memmove(list, &list[1], (nelems - 1) * sizeof(*list)); - if ((ret = __memp_extend_freelist( - dbp->mpf, nelems - 1, &list)) != 0) - goto err; - } - } -#else - COMPQUIET(list, NULL); -#endif - - *pagepp = h; - return (0); - -err: if (h != NULL) - (void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority); - if (meta != NULL && hash == 0) - (void)__memp_fput(mpf, dbc->thread_info, meta, dbc->priority); - (void)__TLPUT(dbc, metalock); - if (lockp != NULL) - (void)__LPUT(dbc, *lockp); - return (ret); -} - -/* - * __db_free -- - * Add a page to the head of the freelist. - * - * PUBLIC: int __db_free __P((DBC *, PAGE *)); - */ -int -__db_free(dbc, h) - DBC *dbc; - PAGE *h; -{ - DB *dbp; - DBMETA *meta; - DBT ddbt, ldbt; - DB_LOCK metalock; - DB_LSN *lsnp; - DB_MPOOLFILE *mpf; - PAGE *prev; - db_pgno_t last_pgno, next_pgno, pgno, prev_pgno; - u_int32_t lflag; - int hash, ret, t_ret; -#ifdef HAVE_FTRUNCATE - db_pgno_t *list, *lp; - u_int32_t nelem, position, start; - int do_truncate; -#endif - - dbp = dbc->dbp; - mpf = dbp->mpf; - prev_pgno = PGNO_INVALID; - meta = NULL; - prev = NULL; - LOCK_INIT(metalock); -#ifdef HAVE_FTRUNCATE - lp = NULL; - nelem = 0; - do_truncate = 0; -#endif - - /* - * Retrieve the metadata page. If we are not keeping a sorted - * free list put the page at the head of the the free list. - * If we are keeping a sorted free list, for truncation, - * then figure out where this page belongs and either - * link it in or truncate the file as much as possible. - * If either the lock get or page get routines - * fail, then we need to put the page with which we were called - * back because our caller assumes we take care of it. - */ - hash = 0; - - pgno = PGNO_BASE_MD; -#ifdef HAVE_HASH - if (dbp->type == DB_HASH) { - if ((ret = __ham_return_meta(dbc, -#ifdef HAVE_FTRUNCATE - 0, -#else - DB_MPOOL_DIRTY, -#endif - &meta)) != 0) - goto err; - if (meta != NULL) - hash = 1; - } -#endif - if (meta == NULL) { - if ((ret = __db_lget(dbc, - LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - - /* If we support truncate, we might not dirty the meta page. */ - if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, -#ifdef HAVE_FTRUNCATE - 0, -#else - DB_MPOOL_DIRTY, -#endif - &meta)) != 0) - goto err1; - } - - last_pgno = meta->last_pgno; - next_pgno = meta->free; - /* - * Assign lsnp here so it always initialized when - * HAVE_FTRUNCATE is not defined. - */ - lsnp = &LSN(meta); - - DB_ASSERT(dbp->env, h->pgno != next_pgno); - -#ifdef HAVE_FTRUNCATE - /* - * If we are maintaining a sorted free list see if we either have a - * new truncation point or the page goes somewhere in the middle of - * the list. If it goes in the middle of the list, we will drop the - * meta page and get the previous page. - */ - if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0) - goto err1; - if (list == NULL) - goto no_sort; - - if (h->pgno != last_pgno) { - /* - * Put the page number in the sorted list. - * Finds its position and the previous page, - * extend the list, make room and insert. - */ - position = 0; - if (nelem != 0) { - __db_freelist_pos(h->pgno, list, nelem, &position); - - DB_ASSERT(dbp->env, h->pgno != list[position]); - - /* Get the previous page if this is not the smallest. */ - if (position != 0 || h->pgno > list[0]) - prev_pgno = list[position]; - } - - } else if (nelem != 0) { - /* Find the truncation point. */ - for (lp = &list[nelem - 1]; lp >= list; lp--) - if (--last_pgno != *lp) - break; - if (lp < list || last_pgno < h->pgno - 1) - do_truncate = 1; - last_pgno = meta->last_pgno; - } - -no_sort: - if (prev_pgno == PGNO_INVALID) { -#ifdef HAVE_HASH - if (hash) { - if ((ret = - __ham_return_meta(dbc, DB_MPOOL_DIRTY, &meta)) != 0) - goto err1; - } else -#endif - if ((ret = __memp_dirty(mpf, - &meta, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) - goto err1; - lsnp = &LSN(meta); - } else { - pgno = prev_pgno; - if ((ret = __memp_fget(mpf, &pgno, - dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &prev)) != 0) - goto err1; - next_pgno = NEXT_PGNO(prev); - lsnp = &LSN(prev); - } -#endif - - /* - * Log the change. - * We are either logging an update to the metapage or to the - * previous page in the sorted list. - */ - if (DBC_LOGGING(dbc)) { - memset(&ldbt, 0, sizeof(ldbt)); - ldbt.data = h; - ldbt.size = P_OVERHEAD(dbp); - /* - * If we are truncating the file, we need to make sure - * the logging happens before the truncation. If we - * are truncating multiple pages we don't need to flush the - * log here as it will be flushed by __db_truncate_freelist. - * If we are zeroing pages rather than truncating we still - * need to flush since they will not have valid LSNs. - */ - lflag = 0; - - if (h->pgno == last_pgno -#ifdef HAVE_FTRUNCATE - && do_truncate == 0 -#endif - ) - lflag = DB_FLUSH; - switch (h->type) { - case P_HASH: - case P_IBTREE: - case P_IRECNO: - case P_LBTREE: - case P_LRECNO: - case P_LDUP: - if (h->entries > 0) { - ldbt.size += h->entries * sizeof(db_indx_t); - ddbt.data = (u_int8_t *)h + HOFFSET(h); - ddbt.size = dbp->pgsize - HOFFSET(h); - if ((ret = __db_pg_freedata_log(dbp, dbc->txn, - lsnp, lflag, - h->pgno, lsnp, pgno, - &ldbt, next_pgno, last_pgno, &ddbt)) != 0) - goto err1; - goto logged; - } - break; - case P_HASHMETA: - ldbt.size = sizeof(HMETA); - break; - case P_BTREEMETA: - ldbt.size = sizeof(BTMETA); - break; - case P_OVERFLOW: - ldbt.size += OV_LEN(h); - break; - default: - DB_ASSERT(dbp->env, h->type != P_QAMDATA); - } - - if ((ret = __db_pg_free_log(dbp, - dbc->txn, lsnp, lflag, h->pgno, - lsnp, pgno, &ldbt, next_pgno, last_pgno)) != 0) - goto err1; - } else - LSN_NOT_LOGGED(*lsnp); - -logged: -#ifdef HAVE_FTRUNCATE - if (do_truncate) { - start = (u_int32_t) (lp - list) + 1; - meta->last_pgno--; - ret = __db_truncate_freelist( - dbc, meta, h, list, start, nelem); - h = NULL; - } else -#endif - if (h->pgno == last_pgno) { - /* - * We are going to throw this page away, but if we are - * using MVCC then this version may stick around and we - * might have to make a copy. - */ - if (mpf->mfp->multiversion && (ret = __memp_dirty(mpf, - &h, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) - goto err1; - LSN(h) = *lsnp; - P_INIT(h, dbp->pgsize, - h->pgno, PGNO_INVALID, next_pgno, 0, P_INVALID); - if ((ret = __memp_fput(mpf, - dbc->thread_info, h, DB_PRIORITY_VERY_LOW)) != 0) - goto err1; - h = NULL; - /* Give the page back to the OS. */ - if ((ret = __memp_ftruncate(mpf, dbc->txn, dbc->thread_info, - last_pgno, 0)) != 0) - goto err1; - DB_ASSERT(dbp->env, meta->pgno == PGNO_BASE_MD); - meta->last_pgno--; - h = NULL; - } else { -#ifdef HAVE_FTRUNCATE - if (list != NULL) { - /* Put the page number into the list. */ - if ((ret = - __memp_extend_freelist(mpf, nelem + 1, &list)) != 0) - goto err1; - if (prev_pgno != PGNO_INVALID) - lp = &list[position + 1]; - else - lp = list; - if (nelem != 0 && position != nelem) - memmove(lp + 1, lp, (size_t) - ((u_int8_t*)&list[nelem] - (u_int8_t*)lp)); - *lp = h->pgno; - } -#endif - /* - * If we are not truncating the page then we - * reinitialize it and put it at the head of - * the free list. - */ - if ((ret = __memp_dirty(mpf, - &h, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) - goto err1; - LSN(h) = *lsnp; - P_INIT(h, dbp->pgsize, - h->pgno, PGNO_INVALID, next_pgno, 0, P_INVALID); -#ifdef DIAGNOSTIC - memset((u_int8_t *) h + P_OVERHEAD(dbp), - CLEAR_BYTE, dbp->pgsize - P_OVERHEAD(dbp)); -#endif - if (prev_pgno == PGNO_INVALID) - meta->free = h->pgno; - else - NEXT_PGNO(prev) = h->pgno; - } - - /* Discard the metadata or previous page. */ -err1: if (hash == 0 && meta != NULL && (t_ret = __memp_fput(mpf, - dbc->thread_info, (PAGE *)meta, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0) - ret = t_ret; - if (prev != (PAGE*) meta && prev != NULL && (t_ret = __memp_fput(mpf, - dbc->thread_info, prev, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - - /* Discard the caller's page reference. */ -err: if (h != NULL && (t_ret = __memp_fput(mpf, - dbc->thread_info, h, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - - /* - * XXX - * We have to unlock the caller's page in the caller! - */ - return (ret); -} - -#ifdef HAVE_FTRUNCATE -/* - * __db_freelist_pos -- find the position of a page in the freelist. - * The list is sorted, we do a binary search. - * - * PUBLIC: #ifdef HAVE_FTRUNCATE - * PUBLIC: void __db_freelist_pos __P((db_pgno_t, - * PUBLIC: db_pgno_t *, u_int32_t, u_int32_t *)); - * PUBLIC: #endif - */ -void -__db_freelist_pos(pgno, list, nelem, posp) - db_pgno_t pgno; - db_pgno_t *list; - u_int32_t nelem; - u_int32_t *posp; -{ - u_int32_t base, indx, lim; - - indx = 0; - for (base = 0, lim = nelem; lim != 0; lim >>= 1) { - indx = base + (lim >> 1); - if (pgno == list[indx]) { - *posp = indx; - return; - } - if (pgno > list[indx]) { - base = indx + 1; - --lim; - } - } - if (base != 0) - base--; - *posp = base; - return; -} - -static int -__db_pglistcmp(a, b) - const void *a, *b; -{ - db_pglist_t *ap, *bp; - - ap = (db_pglist_t *)a; - bp = (db_pglist_t *)b; - - return ((ap->pgno > bp->pgno) ? 1 : (ap->pgno < bp->pgno) ? -1: 0); -} - -/* - * __db_freelist_sort -- sort a list of free pages. - * PUBLIC: void __db_freelist_sort __P((db_pglist_t *, u_int32_t)); - */ -void -__db_freelist_sort(list, nelems) - db_pglist_t *list; - u_int32_t nelems; -{ - qsort(list, (size_t)nelems, sizeof(db_pglist_t), __db_pglistcmp); -} - -/* - * __db_pg_truncate -- find the truncation point in a sorted freelist. - * - * PUBLIC: #ifdef HAVE_FTRUNCATE - * PUBLIC: int __db_pg_truncate __P((DBC *, DB_TXN *, - * PUBLIC: db_pglist_t *, DB_COMPACT *, u_int32_t *, - * PUBLIC: db_pgno_t , db_pgno_t *, DB_LSN *, int)); - * PUBLIC: #endif - */ -int -__db_pg_truncate(dbc, txn, - list, c_data, nelemp, free_pgno, last_pgno, lsnp, in_recovery) - DBC *dbc; - DB_TXN *txn; - db_pglist_t *list; - DB_COMPACT *c_data; - u_int32_t *nelemp; - db_pgno_t free_pgno, *last_pgno; - DB_LSN *lsnp; - int in_recovery; -{ - DB *dbp; - DBT ddbt; - DB_LSN null_lsn; - DB_MPOOLFILE *mpf; - PAGE *h; - db_pglist_t *lp, *slp; - db_pgno_t lpgno, pgno; - u_int32_t elems, log_size, tpoint; - int last, ret; - - ret = 0; - h = NULL; - - dbp = dbc->dbp; - mpf = dbp->mpf; - elems = tpoint = *nelemp; - - /* - * Figure out what (if any) pages can be truncated immediately and - * record the place from which we can truncate, so we can do the - * memp_ftruncate below. We also use this to avoid ever putting - * these pages on the freelist, which we are about to relink. - */ - pgno = *last_pgno; - lp = &list[elems - 1]; - last = 1; - while (tpoint != 0) { - if (lp->pgno != pgno) - break; - pgno--; - tpoint--; - lp--; - } - - lp = list; - slp = &list[elems]; - /* - * Log the sorted list. We log the whole list so it can be rebuilt. - * Don't overflow the log file. - */ -again: if (DBC_LOGGING(dbc)) { - last = 1; - lpgno = *last_pgno; - ddbt.size = elems * sizeof(*lp); - ddbt.data = lp; - log_size = ((LOG *)dbc->env-> - lg_handle->reginfo.primary)->log_size; - if (ddbt.size > log_size / 2) { - elems = (log_size / 2) / sizeof(*lp); - ddbt.size = elems * sizeof(*lp); - last = 0; - /* - * If we stopped after the truncation point - * then we need to truncate from here. - */ - if (lp + elems >= &list[tpoint]) - lpgno = lp[elems - 1].pgno; - } - /* - * If this is not the begining of the list fetch the end - * of the previous segment. This page becomes the last_free - * page and will link to this segment if it is not truncated. - */ - if (lp != list) { - if ((ret = __memp_fget(mpf, &lp[-1].pgno, - dbc->thread_info, txn, 0, &h)) != 0) - goto err; - } - - slp = &lp[elems]; - - ZERO_LSN(null_lsn); - if ((ret = __db_pg_trunc_log(dbp, dbc->txn, - lsnp, last == 1 ? DB_FLUSH : 0, PGNO_BASE_MD, - lsnp, h != NULL ? PGNO(h) : PGNO_INVALID, - h != NULL ? &LSN(h) : &null_lsn, - free_pgno, lpgno, &ddbt)) != 0) - goto err; - if (h != NULL) { - LSN(h) = *lsnp; - if ((ret = __memp_fput(mpf, - dbc->thread_info, h, dbc->priority)) != 0) - goto err; - } - h = NULL; - } else if (!in_recovery) - LSN_NOT_LOGGED(*lsnp); - - for (; lp < slp && lp < &list[tpoint]; lp++) { - if ((ret = __memp_fget(mpf, &lp->pgno, dbc->thread_info, - txn, !in_recovery ? DB_MPOOL_DIRTY : 0, &h)) != 0) { - /* Page may have been truncated later. */ - if (in_recovery && ret == DB_PAGE_NOTFOUND) { - ret = 0; - continue; - } - goto err; - } - if (in_recovery) { - if (LOG_COMPARE(&LSN(h), &lp->lsn) == 0) { - if ((ret = __memp_dirty(mpf, &h, - dbc->thread_info, - txn, dbp->priority, 0)) != 0) { - (void)__memp_fput(mpf, - dbc->thread_info, h, dbp->priority); - goto err; - } - } else - goto skip; - } - - if (lp == &list[tpoint - 1]) - NEXT_PGNO(h) = PGNO_INVALID; - else - NEXT_PGNO(h) = lp[1].pgno; - DB_ASSERT(mpf->env, NEXT_PGNO(h) < *last_pgno); - - LSN(h) = *lsnp; -skip: if ((ret = __memp_fput(mpf, - dbc->thread_info, h, dbp->priority)) != 0) - goto err; - h = NULL; - } - - /* - * If we did not log everything try again. We start from slp and - * try to go to the end of the list. - */ - if (last == 0) { - elems = (u_int32_t)(&list[*nelemp] - slp); - lp = slp; - goto again; - } - - /* - * Truncate the file. Its possible that the last page is the - * only one that got truncated and that's done in the caller. - */ - if (pgno != *last_pgno) { - if (tpoint != *nelemp && - (ret = __memp_ftruncate(mpf, dbc->txn, dbc->thread_info, - pgno + 1, in_recovery ? MP_TRUNC_RECOVER : 0)) != 0) - goto err; - if (c_data) - c_data->compact_pages_truncated += *last_pgno - pgno; - *last_pgno = pgno; - } - *nelemp = tpoint; - - if (0) { -err: if (h != NULL) - (void)__memp_fput(mpf, - dbc->thread_info, h, dbc->priority); - } - return (ret); -} - -/* - * __db_free_truncate -- - * Build a sorted free list and truncate free pages at the end - * of the file. - * - * PUBLIC: #ifdef HAVE_FTRUNCATE - * PUBLIC: int __db_free_truncate __P((DB *, DB_THREAD_INFO *, DB_TXN *, - * PUBLIC: u_int32_t, DB_COMPACT *, db_pglist_t **, u_int32_t *, - * PUBLIC: db_pgno_t *)); - * PUBLIC: #endif - */ -int -__db_free_truncate(dbp, ip, txn, flags, c_data, listp, nelemp, last_pgnop) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - u_int32_t flags; - DB_COMPACT *c_data; - db_pglist_t **listp; - u_int32_t *nelemp; - db_pgno_t *last_pgnop; -{ - DBC *dbc; - DBMETA *meta; - DB_LOCK metalock; - DB_MPOOLFILE *mpf; - ENV *env; - PAGE *h; - db_pglist_t *list, *lp; - db_pgno_t pgno; - u_int32_t nelems; - int ret, t_ret; - size_t size; - - COMPQUIET(flags, 0); - list = NULL; - meta = NULL; - env = dbp->env; - mpf = dbp->mpf; - h = NULL; - nelems = 0; - if (listp != NULL) { - *listp = NULL; - DB_ASSERT(env, nelemp != NULL); - *nelemp = 0; - } - - if ((ret = __db_cursor(dbp, ip, txn, &dbc, DB_WRITELOCK)) != 0) - return (ret); - - pgno = PGNO_BASE_MD; - if ((ret = __db_lget(dbc, - LCK_ALWAYS, pgno, DB_LOCK_WRITE, 0, &metalock)) != 0) - goto err; - if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, dbc->txn, 0, - &meta)) != 0) - goto err; - - if (last_pgnop != NULL) - *last_pgnop = meta->last_pgno; - if ((pgno = meta->free) == PGNO_INVALID) - goto done; - - size = 128; - if ((ret = __os_malloc(env, size * sizeof(*list), &list)) != 0) - goto err; - lp = list; - - do { - if (lp == &list[size]) { - size *= 2; - if ((ret = __os_realloc(env, - size * sizeof(*list), &list)) != 0) - goto err; - lp = &list[size / 2]; - } - if ((ret = __memp_fget(mpf, &pgno, - dbc->thread_info, dbc->txn, 0, &h)) != 0) - goto err; - - lp->pgno = pgno; - lp->next_pgno = NEXT_PGNO(h); - lp->lsn = LSN(h); - pgno = NEXT_PGNO(h); - if ((ret = __memp_fput(mpf, - dbc->thread_info, h, dbc->priority)) != 0) - goto err; - lp++; - } while (pgno != PGNO_INVALID); - nelems = (u_int32_t)(lp - list); - - if ((ret = __memp_dirty(mpf, - &meta, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) - goto err; - - /* Sort the list */ - __db_freelist_sort(list, nelems); - - if ((ret = __db_pg_truncate(dbc, txn, list, c_data, - &nelems, meta->free, &meta->last_pgno, &LSN(meta), 0)) != 0) - goto err; - - if (nelems == 0) - meta->free = PGNO_INVALID; - else - meta->free = list[0].pgno; - -done: if (last_pgnop != NULL) - *last_pgnop = meta->last_pgno; - - /* - * The truncate point is the number of pages in the free - * list back from the last page. The number of pages - * in the free list are the number that we can swap in. - */ - if (c_data) - c_data->compact_truncate = (u_int32_t)meta->last_pgno - nelems; - - if (nelems != 0 && listp != NULL) { - *listp = list; - *nelemp = nelems; - list = NULL; - } - -err: if (list != NULL) - __os_free(env, list); - if (meta != NULL && (t_ret = __memp_fput(mpf, - dbc->thread_info, (PAGE *)meta, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __TLPUT(dbc, metalock)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -static int -__db_truncate_freelist(dbc, meta, h, list, start, nelem) - DBC *dbc; - DBMETA *meta; - PAGE *h; - db_pgno_t *list; - u_int32_t start, nelem; -{ - DB *dbp; - DBT ddbt; - DB_LSN null_lsn; - DB_MPOOLFILE *mpf; - PAGE *last_free, *pg; - db_pgno_t *lp, free_pgno, lpgno; - db_pglist_t *plist, *pp, *spp; - u_int32_t elem, log_size; - int last, ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - plist = NULL; - last_free = NULL; - pg = NULL; - - if (start != 0 && - (ret = __memp_fget(mpf, &list[start - 1], - dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &last_free)) != 0) - goto err; - - if (DBC_LOGGING(dbc)) { - if ((ret = __os_malloc(dbp->env, - (nelem - start) * sizeof(*pp), &plist)) != 0) - goto err; - - pp = plist; - for (lp = &list[start]; lp < &list[nelem]; lp++) { - pp->pgno = *lp; - if ((ret = __memp_fget(mpf, lp, - dbc->thread_info, dbc->txn, 0, &pg)) != 0) - goto err; - pp->lsn = LSN(pg); - pp->next_pgno = NEXT_PGNO(pg); - if ((ret = __memp_fput(mpf, - dbc->thread_info, pg, DB_PRIORITY_VERY_LOW)) != 0) - goto err; - pg = NULL; - pp++; - } - ZERO_LSN(null_lsn); - pp = plist; - elem = nelem - start; - log_size = ((LOG *)dbc->env-> - lg_handle->reginfo.primary)->log_size; -again: ddbt.data = spp = pp; - free_pgno = pp->pgno; - lpgno = meta->last_pgno; - ddbt.size = elem * sizeof(*pp); - if (ddbt.size > log_size / 2) { - elem = (log_size / 2) / (u_int32_t)sizeof(*pp); - ddbt.size = elem * sizeof(*pp); - pp += elem; - elem = (nelem - start) - (u_int32_t)(pp - plist); - lpgno = pp[-1].pgno; - last = 0; - } else - last = 1; - /* - * Get the page which will link to this section if we abort. - * If this is the first segment then its last_free. - */ - if (spp == plist) - pg = last_free; - else if ((ret = __memp_fget(mpf, &spp[-1].pgno, - dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &pg)) != 0) - goto err; - - if ((ret = __db_pg_trunc_log(dbp, dbc->txn, - &LSN(meta), last == 1 ? DB_FLUSH : 0, - PGNO(meta), &LSN(meta), - pg != NULL ? PGNO(pg) : PGNO_INVALID, - pg != NULL ? &LSN(pg) : &null_lsn, - free_pgno, lpgno, &ddbt)) != 0) - goto err; - if (pg != NULL) { - LSN(pg) = LSN(meta); - if (pg != last_free && (ret = __memp_fput(mpf, - dbc->thread_info, pg, DB_PRIORITY_VERY_LOW)) != 0) - goto err; - pg = NULL; - } - if (last == 0) - goto again; - } else - LSN_NOT_LOGGED(LSN(meta)); - - if ((ret = __memp_fput(mpf, - dbc->thread_info, h, DB_PRIORITY_VERY_LOW)) != 0) - goto err; - h = NULL; - if ((ret = __memp_ftruncate(mpf, dbc->txn, dbc->thread_info, - list[start], 0)) != 0) - goto err; - meta->last_pgno = list[start] - 1; - - if (start == 0) - meta->free = PGNO_INVALID; - else { - NEXT_PGNO(last_free) = PGNO_INVALID; - if ((ret = __memp_fput(mpf, - dbc->thread_info, last_free, dbc->priority)) != 0) - goto err; - last_free = NULL; - } - - /* Shrink the number of elements in the list. */ - ret = __memp_extend_freelist(mpf, start, &list); - -err: if (plist != NULL) - __os_free(dbp->env, plist); - - /* We need to put the page on error. */ - if (h != NULL) - (void)__memp_fput(mpf, dbc->thread_info, h, dbc->priority); - if (pg != NULL && pg != last_free) - (void)__memp_fput(mpf, dbc->thread_info, pg, dbc->priority); - if (last_free != NULL) - (void)__memp_fput(mpf, - dbc->thread_info, last_free, dbc->priority); - - return (ret); -} -#endif - -#ifdef DEBUG -/* - * __db_lprint -- - * Print out the list of locks currently held by a cursor. - * - * PUBLIC: int __db_lprint __P((DBC *)); - */ -int -__db_lprint(dbc) - DBC *dbc; -{ - DB *dbp; - DB_LOCKREQ req; - ENV *env; - - dbp = dbc->dbp; - env = dbp->env; - - if (LOCKING_ON(env)) { - req.op = DB_LOCK_DUMP; - (void)__lock_vec(env, dbc->locker, 0, &req, 1, NULL); - } - return (0); -} -#endif - -/* - * __db_lget -- - * The standard lock get call. - * - * PUBLIC: int __db_lget __P((DBC *, - * PUBLIC: int, db_pgno_t, db_lockmode_t, u_int32_t, DB_LOCK *)); - */ -int -__db_lget(dbc, action, pgno, mode, lkflags, lockp) - DBC *dbc; - int action; - db_pgno_t pgno; - db_lockmode_t mode; - u_int32_t lkflags; - DB_LOCK *lockp; -{ - DB *dbp; - DB_LOCKREQ couple[3], *reqp; - DB_TXN *txn; - ENV *env; - int has_timeout, i, ret; - - dbp = dbc->dbp; - env = dbp->env; - txn = dbc->txn; - - /* - * We do not always check if we're configured for locking before - * calling __db_lget to acquire the lock. - */ - if (CDB_LOCKING(env) || !LOCKING_ON(env) || - (MULTIVERSION(dbp) && mode == DB_LOCK_READ && - dbc->txn != NULL && F_ISSET(dbc->txn, TXN_SNAPSHOT)) || - F_ISSET(dbc, DBC_DONTLOCK) || (F_ISSET(dbc, DBC_RECOVER) && - (action != LCK_ROLLBACK || IS_REP_CLIENT(env))) || - (action != LCK_ALWAYS && F_ISSET(dbc, DBC_OPD))) { - LOCK_INIT(*lockp); - return (0); - } - - dbc->lock.pgno = pgno; - if (lkflags & DB_LOCK_RECORD) - dbc->lock.type = DB_RECORD_LOCK; - else - dbc->lock.type = DB_PAGE_LOCK; - lkflags &= ~DB_LOCK_RECORD; - - /* - * If the transaction enclosing this cursor has DB_LOCK_NOWAIT set, - * pass that along to the lock call. - */ - if (DB_NONBLOCK(dbc)) - lkflags |= DB_LOCK_NOWAIT; - - if (F_ISSET(dbc, DBC_READ_UNCOMMITTED) && mode == DB_LOCK_READ) - mode = DB_LOCK_READ_UNCOMMITTED; - - has_timeout = F_ISSET(dbc, DBC_RECOVER) || - (txn != NULL && F_ISSET(txn, TXN_LOCKTIMEOUT)); - - /* - * Transactional locking. - * Hold on to the previous read lock only if we are in full isolation. - * COUPLE_ALWAYS indicates we are holding an interior node which need - * not be isolated. - * Downgrade write locks if we are supporting dirty readers. - */ - if ((action != LCK_COUPLE && action != LCK_COUPLE_ALWAYS) || - !LOCK_ISSET(*lockp)) - action = 0; - else if (dbc->txn == NULL || action == LCK_COUPLE_ALWAYS) - action = LCK_COUPLE; - else if (F_ISSET(dbc, DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED) && - lockp->mode == DB_LOCK_READ) - action = LCK_COUPLE; - else if (lockp->mode == DB_LOCK_READ_UNCOMMITTED) - action = LCK_COUPLE; - else if (F_ISSET(dbc->dbp, - DB_AM_READ_UNCOMMITTED) && lockp->mode == DB_LOCK_WRITE) - action = LCK_DOWNGRADE; - else - action = 0; - - i = 0; - switch (action) { - default: - if (has_timeout) - goto do_couple; - ret = __lock_get(env, - dbc->locker, lkflags, &dbc->lock_dbt, mode, lockp); - break; - - case LCK_DOWNGRADE: - couple[0].op = DB_LOCK_GET; - couple[0].obj = NULL; - couple[0].lock = *lockp; - couple[0].mode = DB_LOCK_WWRITE; - UMRW_SET(couple[0].timeout); - i++; - /* FALLTHROUGH */ - case LCK_COUPLE: -do_couple: couple[i].op = has_timeout? DB_LOCK_GET_TIMEOUT : DB_LOCK_GET; - couple[i].obj = &dbc->lock_dbt; - couple[i].mode = mode; - UMRW_SET(couple[i].timeout); - i++; - if (has_timeout) - couple[0].timeout = - F_ISSET(dbc, DBC_RECOVER) ? 0 : txn->lock_timeout; - if (action == LCK_COUPLE || action == LCK_DOWNGRADE) { - couple[i].op = DB_LOCK_PUT; - couple[i].lock = *lockp; - i++; - } - - ret = __lock_vec(env, - dbc->locker, lkflags, couple, i, &reqp); - if (ret == 0 || reqp == &couple[i - 1]) - *lockp = i == 1 ? couple[0].lock : couple[i - 2].lock; - break; - } - - if (txn != NULL && ret == DB_LOCK_DEADLOCK) - F_SET(txn, TXN_DEADLOCK); - return ((ret == DB_LOCK_NOTGRANTED && !F_ISSET(env->dbenv, - DB_ENV_TIME_NOTGRANTED)) ? DB_LOCK_DEADLOCK : ret); -} - -/* - * __db_lput -- - * The standard lock put call. - * - * PUBLIC: int __db_lput __P((DBC *, DB_LOCK *)); - */ -int -__db_lput(dbc, lockp) - DBC *dbc; - DB_LOCK *lockp; -{ - DB_LOCKREQ couple[2], *reqp; - ENV *env; - int action, ret; - - /* - * Transactional locking. - * Hold on to the read locks only if we are in full isolation. - * Downgrade write locks if we are supporting dirty readers. - */ - if (F_ISSET(dbc->dbp, - DB_AM_READ_UNCOMMITTED) && lockp->mode == DB_LOCK_WRITE) - action = LCK_DOWNGRADE; - else if (dbc->txn == NULL) - action = LCK_COUPLE; - else if (F_ISSET(dbc, DBC_READ_COMMITTED | DBC_WAS_READ_COMMITTED) && - lockp->mode == DB_LOCK_READ) - action = LCK_COUPLE; - else if (lockp->mode == DB_LOCK_READ_UNCOMMITTED) - action = LCK_COUPLE; - else - action = 0; - - env = dbc->env; - switch (action) { - case LCK_COUPLE: - ret = __lock_put(env, lockp); - break; - case LCK_DOWNGRADE: - couple[0].op = DB_LOCK_GET; - couple[0].obj = NULL; - couple[0].mode = DB_LOCK_WWRITE; - couple[0].lock = *lockp; - UMRW_SET(couple[0].timeout); - couple[1].op = DB_LOCK_PUT; - couple[1].lock = *lockp; - ret = __lock_vec(env, dbc->locker, 0, couple, 2, &reqp); - if (ret == 0 || reqp == &couple[1]) - *lockp = couple[0].lock; - break; - default: - ret = 0; - break; - } - - return (ret); -} diff --git a/db/db_method.c b/db/db_method.c deleted file mode 100644 index 1182f97..0000000 --- a/db/db_method.c +++ /dev/null @@ -1,1052 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1999-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/crypto.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/mp.h" -#include "dbinc/qam.h" -#include "dbinc/txn.h" - -#ifdef HAVE_RPC -#ifdef HAVE_SYSTEM_INCLUDE_FILES -#include <rpc/rpc.h> -#endif -#include "db_server.h" -#include "dbinc_auto/rpc_client_ext.h" -#endif - -static int __db_get_byteswapped __P((DB *, int *)); -static int __db_get_dbname __P((DB *, const char **, const char **)); -static DB_ENV *__db_get_env __P((DB *)); -static void __db_get_msgcall - __P((DB *, void (**)(const DB_ENV *, const char *))); -static DB_MPOOLFILE *__db_get_mpf __P((DB *)); -static int __db_get_multiple __P((DB *)); -static int __db_get_transactional __P((DB *)); -static int __db_get_type __P((DB *, DBTYPE *dbtype)); -static int __db_init __P((DB *, u_int32_t)); -static int __db_get_alloc __P((DB *, void *(**)(size_t), - void *(**)(void *, size_t), void (**)(void *))); -static int __db_set_alloc __P((DB *, void *(*)(size_t), - void *(*)(void *, size_t), void (*)(void *))); -static int __db_get_append_recno __P((DB *, - int (**)(DB *, DBT *, db_recno_t))); -static int __db_set_append_recno __P((DB *, int (*)(DB *, DBT *, db_recno_t))); -static int __db_get_cachesize __P((DB *, u_int32_t *, u_int32_t *, int *)); -static int __db_set_cachesize __P((DB *, u_int32_t, u_int32_t, int)); -static int __db_get_create_dir __P((DB *, const char **)); -static int __db_set_create_dir __P((DB *, const char *)); -static int __db_get_dup_compare - __P((DB *, int (**)(DB *, const DBT *, const DBT *))); -static int __db_set_dup_compare - __P((DB *, int (*)(DB *, const DBT *, const DBT *))); -static int __db_get_encrypt_flags __P((DB *, u_int32_t *)); -static int __db_set_encrypt __P((DB *, const char *, u_int32_t)); -static int __db_get_feedback __P((DB *, void (**)(DB *, int, int))); -static int __db_set_feedback __P((DB *, void (*)(DB *, int, int))); -static void __db_map_flags __P((DB *, u_int32_t *, u_int32_t *)); -static int __db_get_pagesize __P((DB *, u_int32_t *)); -static int __db_set_paniccall __P((DB *, void (*)(DB_ENV *, int))); -static int __db_set_priority __P((DB *, DB_CACHE_PRIORITY)); -static int __db_get_priority __P((DB *, DB_CACHE_PRIORITY *)); -static void __db_get_errcall __P((DB *, - void (**)(const DB_ENV *, const char *, const char *))); -static void __db_set_errcall - __P((DB *, void (*)(const DB_ENV *, const char *, const char *))); -static void __db_get_errfile __P((DB *, FILE **)); -static void __db_set_errfile __P((DB *, FILE *)); -static void __db_get_errpfx __P((DB *, const char **)); -static void __db_set_errpfx __P((DB *, const char *)); -static void __db_set_msgcall - __P((DB *, void (*)(const DB_ENV *, const char *))); -static void __db_get_msgfile __P((DB *, FILE **)); -static void __db_set_msgfile __P((DB *, FILE *)); -static void __dbh_err __P((DB *, int, const char *, ...)); -static void __dbh_errx __P((DB *, const char *, ...)); - -/* - * db_create -- - * DB constructor. - * - * EXTERN: int db_create __P((DB **, DB_ENV *, u_int32_t)); - */ -int -db_create(dbpp, dbenv, flags) - DB **dbpp; - DB_ENV *dbenv; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int ret; - - ip = NULL; - env = dbenv == NULL ? NULL : dbenv->env; - - /* Check for invalid function flags. */ - if (flags != 0) - return (__db_ferr(env, "db_create", 0)); - - if (env != NULL) - ENV_ENTER(env, ip); - ret = __db_create_internal(dbpp, env, flags); - if (env != NULL) - ENV_LEAVE(env, ip); - - return (ret); -} - -/* - * __db_create_internal -- - * DB constructor internal routine. - * - * PUBLIC: int __db_create_internal __P((DB **, ENV *, u_int32_t)); - */ -int -__db_create_internal(dbpp, env, flags) - DB **dbpp; - ENV *env; - u_int32_t flags; -{ - DB *dbp; - DB_ENV *dbenv; - DB_REP *db_rep; - int ret; - - *dbpp = NULL; - - /* If we don't have an environment yet, allocate a local one. */ - if (env == NULL) { - if ((ret = db_env_create(&dbenv, 0)) != 0) - return (ret); - env = dbenv->env; - F_SET(env, ENV_DBLOCAL); - } else - dbenv = env->dbenv; - - /* Allocate and initialize the DB handle. */ - if ((ret = __os_calloc(env, 1, sizeof(*dbp), &dbp)) != 0) - goto err; - - dbp->dbenv = env->dbenv; - dbp->env = env; - if ((ret = __db_init(dbp, flags)) != 0) - goto err; - - MUTEX_LOCK(env, env->mtx_dblist); - ++env->db_ref; - MUTEX_UNLOCK(env, env->mtx_dblist); - - /* - * Set the replication timestamp; it's 0 if we're not in a replicated - * environment. Don't acquire a lock to read the value, even though - * it's opaque: all we check later is value equality, nothing else. - */ - dbp->timestamp = REP_ON(env) ? - ((REGENV *)env->reginfo->primary)->rep_timestamp : 0; - /* - * Set the replication generation number for fid management; valid - * replication generations start at 1. Don't acquire a lock to - * read the value. All we check later is value equality. - */ - db_rep = env->rep_handle; - dbp->fid_gen = REP_ON(env) ? ((REP *)db_rep->region)->gen : 0; - - /* If not RPC, open a backing DB_MPOOLFILE handle in the memory pool. */ - if (!RPC_ON(dbenv) && (ret = __memp_fcreate(env, &dbp->mpf)) != 0) - goto err; - - dbp->type = DB_UNKNOWN; - - *dbpp = dbp; - return (0); - -err: if (dbp != NULL) { - if (dbp->mpf != NULL) - (void)__memp_fclose(dbp->mpf, 0); - __os_free(env, dbp); - } - - if (F_ISSET(env, ENV_DBLOCAL)) - (void)__env_close(dbp->dbenv, 0); - - return (ret); -} - -/* - * __db_init -- - * Initialize a DB structure. - */ -static int -__db_init(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - int ret; - - dbp->locker = NULL; - LOCK_INIT(dbp->handle_lock); - - TAILQ_INIT(&dbp->free_queue); - TAILQ_INIT(&dbp->active_queue); - TAILQ_INIT(&dbp->join_queue); - LIST_INIT(&dbp->s_secondaries); - - FLD_SET(dbp->am_ok, - DB_OK_BTREE | DB_OK_HASH | DB_OK_QUEUE | DB_OK_RECNO); - - /* DB PUBLIC HANDLE LIST BEGIN */ - dbp->associate = __db_associate_pp; - dbp->associate_foreign = __db_associate_foreign_pp; - dbp->close = __db_close_pp; - dbp->compact = __db_compact_pp; - dbp->cursor = __db_cursor_pp; - dbp->del = __db_del_pp; - dbp->dump = __db_dump_pp; - dbp->err = __dbh_err; - dbp->errx = __dbh_errx; - dbp->exists = __db_exists; - dbp->fd = __db_fd_pp; - dbp->get = __db_get_pp; - dbp->get_alloc = __db_get_alloc; - dbp->get_append_recno = __db_get_append_recno; - dbp->get_byteswapped = __db_get_byteswapped; - dbp->get_cachesize = __db_get_cachesize; - dbp->get_create_dir = __db_get_create_dir; - dbp->get_dbname = __db_get_dbname; - dbp->get_dup_compare = __db_get_dup_compare; - dbp->get_encrypt_flags = __db_get_encrypt_flags; - dbp->get_env = __db_get_env; - dbp->get_errcall = __db_get_errcall; - dbp->get_errfile = __db_get_errfile; - dbp->get_errpfx = __db_get_errpfx; - dbp->get_feedback = __db_get_feedback; - dbp->get_flags = __db_get_flags; - dbp->get_lorder = __db_get_lorder; - dbp->get_mpf = __db_get_mpf; - dbp->get_msgcall = __db_get_msgcall; - dbp->get_msgfile = __db_get_msgfile; - dbp->get_multiple = __db_get_multiple; - dbp->get_open_flags = __db_get_open_flags; - dbp->get_partition_dirs = __partition_get_dirs; - dbp->get_partition_callback = __partition_get_callback; - dbp->get_partition_keys = __partition_get_keys; - dbp->get_pagesize = __db_get_pagesize; - dbp->get_priority = __db_get_priority; - dbp->get_transactional = __db_get_transactional; - dbp->get_type = __db_get_type; - dbp->join = __db_join_pp; - dbp->key_range = __db_key_range_pp; - dbp->open = __db_open_pp; - dbp->pget = __db_pget_pp; - dbp->put = __db_put_pp; - dbp->remove = __db_remove_pp; - dbp->rename = __db_rename_pp; - dbp->set_alloc = __db_set_alloc; - dbp->set_append_recno = __db_set_append_recno; - dbp->set_cachesize = __db_set_cachesize; - dbp->set_create_dir = __db_set_create_dir; - dbp->set_dup_compare = __db_set_dup_compare; - dbp->set_encrypt = __db_set_encrypt; - dbp->set_errcall = __db_set_errcall; - dbp->set_errfile = __db_set_errfile; - dbp->set_errpfx = __db_set_errpfx; - dbp->set_feedback = __db_set_feedback; - dbp->set_flags = __db_set_flags; - dbp->set_lorder = __db_set_lorder; - dbp->set_msgcall = __db_set_msgcall; - dbp->set_msgfile = __db_set_msgfile; - dbp->set_pagesize = __db_set_pagesize; - dbp->set_paniccall = __db_set_paniccall; - dbp->set_partition = __partition_set; - dbp->set_partition_dirs = __partition_set_dirs; - dbp->set_priority = __db_set_priority; - dbp->sort_multiple = __db_sort_multiple; - dbp->stat = __db_stat_pp; - dbp->stat_print = __db_stat_print_pp; - dbp->sync = __db_sync_pp; - dbp->truncate = __db_truncate_pp; - dbp->upgrade = __db_upgrade_pp; - dbp->verify = __db_verify_pp; - /* DB PUBLIC HANDLE LIST END */ - - /* Access method specific. */ - if ((ret = __bam_db_create(dbp)) != 0) - return (ret); - if ((ret = __ham_db_create(dbp)) != 0) - return (ret); - if ((ret = __qam_db_create(dbp)) != 0) - return (ret); - -#ifdef HAVE_RPC - /* - * RPC specific: must be last, as we replace methods set by the - * access methods. - */ - if (RPC_ON(dbp->dbenv)) { - __dbcl_dbp_init(dbp); - /* - * !!! - * We wrap the DB->open method for RPC, and the rpc.src file - * can't handle that. - */ - dbp->open = __dbcl_db_open_wrap; - if ((ret = __dbcl_db_create(dbp, dbp->dbenv, flags)) != 0) - return (ret); - } -#else - COMPQUIET(flags, 0); -#endif - - return (0); -} - -/* - * __dbh_am_chk -- - * Error if an unreasonable method is called. - * - * PUBLIC: int __dbh_am_chk __P((DB *, u_int32_t)); - */ -int -__dbh_am_chk(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - /* - * We start out allowing any access methods to be called, and as the - * application calls the methods the options become restricted. The - * idea is to quit as soon as an illegal method combination is called. - */ - if ((LF_ISSET(DB_OK_BTREE) && FLD_ISSET(dbp->am_ok, DB_OK_BTREE)) || - (LF_ISSET(DB_OK_HASH) && FLD_ISSET(dbp->am_ok, DB_OK_HASH)) || - (LF_ISSET(DB_OK_QUEUE) && FLD_ISSET(dbp->am_ok, DB_OK_QUEUE)) || - (LF_ISSET(DB_OK_RECNO) && FLD_ISSET(dbp->am_ok, DB_OK_RECNO))) { - FLD_CLR(dbp->am_ok, ~flags); - return (0); - } - - __db_errx(dbp->env, - "call implies an access method which is inconsistent with previous calls"); - return (EINVAL); -} - -/* - * __dbh_err -- - * Db.err method. - */ -static void -#ifdef STDC_HEADERS -__dbh_err(DB *dbp, int error, const char *fmt, ...) -#else -__dbh_err(dbp, error, fmt, va_alist) - DB *dbp; - int error; - const char *fmt; - va_dcl -#endif -{ - /* Message with error string, to stderr by default. */ - DB_REAL_ERR(dbp->dbenv, error, DB_ERROR_SET, 1, fmt); -} - -/* - * __dbh_errx -- - * Db.errx method. - */ -static void -#ifdef STDC_HEADERS -__dbh_errx(DB *dbp, const char *fmt, ...) -#else -__dbh_errx(dbp, fmt, va_alist) - DB *dbp; - const char *fmt; - va_dcl -#endif -{ - /* Message without error string, to stderr by default. */ - DB_REAL_ERR(dbp->dbenv, 0, DB_ERROR_NOT_SET, 1, fmt); -} - -/* - * __db_get_byteswapped -- - * Return if database requires byte swapping. - */ -static int -__db_get_byteswapped(dbp, isswapped) - DB *dbp; - int *isswapped; -{ - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_byteswapped"); - - *isswapped = F_ISSET(dbp, DB_AM_SWAP) ? 1 : 0; - return (0); -} - -/* - * __db_get_dbname -- - * Get the name of the database as passed to DB->open. - */ -static int -__db_get_dbname(dbp, fnamep, dnamep) - DB *dbp; - const char **fnamep, **dnamep; -{ - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_dbname"); - - if (fnamep != NULL) - *fnamep = dbp->fname; - if (dnamep != NULL) - *dnamep = dbp->dname; - return (0); -} - -/* - * __db_get_env -- - * Get the DB_ENV handle that was passed to db_create. - */ -static DB_ENV * -__db_get_env(dbp) - DB *dbp; -{ - return (dbp->dbenv); -} - -/* - * __db_get_mpf -- - * Get the underlying DB_MPOOLFILE handle. - */ -static DB_MPOOLFILE * -__db_get_mpf(dbp) - DB *dbp; -{ - return (dbp->mpf); -} - -/* - * get_multiple -- - * Return whether this DB handle references a physical file with multiple - * databases. - */ -static int -__db_get_multiple(dbp) - DB *dbp; -{ - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_multiple"); - - /* - * Only return TRUE if the handle is for the master database, not for - * any subdatabase in the physical file. If it's a Btree, with the - * subdatabases flag set, and the meta-data page has the right value, - * return TRUE. (We don't need to check it's a Btree, I suppose, but - * it doesn't hurt.) - */ - return (dbp->type == DB_BTREE && - F_ISSET(dbp, DB_AM_SUBDB) && - dbp->meta_pgno == PGNO_BASE_MD ? 1 : 0); -} - -/* - * get_transactional -- - * Return whether this database was created in a transaction. - */ -static int -__db_get_transactional(dbp) - DB *dbp; -{ - return (F_ISSET(dbp, DB_AM_TXN) ? 1 : 0); -} - -/* - * __db_get_type -- - * Return type of underlying database. - */ -static int -__db_get_type(dbp, dbtype) - DB *dbp; - DBTYPE *dbtype; -{ - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_type"); - - *dbtype = dbp->type; - return (0); -} - -/* - * __db_get_append_recno -- - * Get record number append routine. - */ -static int -__db_get_append_recno(dbp, funcp) - DB *dbp; - int (**funcp) __P((DB *, DBT *, db_recno_t)); -{ - DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO); - if (funcp) - *funcp = dbp->db_append_recno; - - return (0); -} -/* - * __db_set_append_recno -- - * Set record number append routine. - */ -static int -__db_set_append_recno(dbp, func) - DB *dbp; - int (*func) __P((DB *, DBT *, db_recno_t)); -{ - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_append_recno"); - DB_ILLEGAL_METHOD(dbp, DB_OK_QUEUE | DB_OK_RECNO); - - dbp->db_append_recno = func; - - return (0); -} - -/* - * __db_get_cachesize -- - * Get underlying cache size. - */ -static int -__db_get_cachesize(dbp, cache_gbytesp, cache_bytesp, ncachep) - DB *dbp; - u_int32_t *cache_gbytesp, *cache_bytesp; - int *ncachep; -{ - DB_ILLEGAL_IN_ENV(dbp, "DB->get_cachesize"); - - return (__memp_get_cachesize(dbp->dbenv, - cache_gbytesp, cache_bytesp, ncachep)); -} - -/* - * __db_set_cachesize -- - * Set underlying cache size. - */ -static int -__db_set_cachesize(dbp, cache_gbytes, cache_bytes, ncache) - DB *dbp; - u_int32_t cache_gbytes, cache_bytes; - int ncache; -{ - DB_ILLEGAL_IN_ENV(dbp, "DB->set_cachesize"); - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_cachesize"); - - return (__memp_set_cachesize( - dbp->dbenv, cache_gbytes, cache_bytes, ncache)); -} - -static int -__db_set_create_dir(dbp, dir) - DB *dbp; - const char *dir; -{ - DB_ENV *dbenv; - int i; - - dbenv = dbp->dbenv; - - for (i = 0; i < dbenv->data_next; i++) - if (strcmp(dir, dbenv->db_data_dir[i]) == 0) - break; - - if (i == dbenv->data_next) { - __db_errx(dbp->env, - "Directory %s not in environment list.", dir); - return (EINVAL); - } - - dbp->dirname = dbenv->db_data_dir[i]; - return (0); -} - -static int -__db_get_create_dir(dbp, dirp) - DB *dbp; - const char **dirp; -{ - *dirp = dbp->dirname; - return (0); -} - -/* - * __db_get_dup_compare -- - * Get duplicate comparison routine. - */ -static int -__db_get_dup_compare(dbp, funcp) - DB *dbp; - int (**funcp) __P((DB *, const DBT *, const DBT *)); -{ - - DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH); - - if (funcp != NULL) { -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(dbp)) { - *funcp = - ((BTREE *)dbp->bt_internal)->compress_dup_compare; - } else -#endif - *funcp = dbp->dup_compare; - } - - return (0); -} - -/* - * __db_set_dup_compare -- - * Set duplicate comparison routine. - */ -static int -__db_set_dup_compare(dbp, func) - DB *dbp; - int (*func) __P((DB *, const DBT *, const DBT *)); -{ - int ret; - - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_dup_compare"); - DB_ILLEGAL_METHOD(dbp, DB_OK_BTREE | DB_OK_HASH); - - if ((ret = __db_set_flags(dbp, DB_DUPSORT)) != 0) - return (ret); - -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(dbp)) { - dbp->dup_compare = __bam_compress_dupcmp; - ((BTREE *)dbp->bt_internal)->compress_dup_compare = func; - } else -#endif - dbp->dup_compare = func; - - return (0); -} - -/* - * __db_get_encrypt_flags -- - */ -static int -__db_get_encrypt_flags(dbp, flagsp) - DB *dbp; - u_int32_t *flagsp; -{ - DB_ILLEGAL_IN_ENV(dbp, "DB->get_encrypt_flags"); - - return (__env_get_encrypt_flags(dbp->dbenv, flagsp)); -} - -/* - * __db_set_encrypt -- - * Set database passwd. - */ -static int -__db_set_encrypt(dbp, passwd, flags) - DB *dbp; - const char *passwd; - u_int32_t flags; -{ - DB_CIPHER *db_cipher; - int ret; - - DB_ILLEGAL_IN_ENV(dbp, "DB->set_encrypt"); - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_encrypt"); - - if ((ret = __env_set_encrypt(dbp->dbenv, passwd, flags)) != 0) - return (ret); - - /* - * In a real env, this gets initialized with the region. In a local - * env, we must do it here. - */ - db_cipher = dbp->env->crypto_handle; - if (!F_ISSET(db_cipher, CIPHER_ANY) && - (ret = db_cipher->init(dbp->env, db_cipher)) != 0) - return (ret); - - return (__db_set_flags(dbp, DB_ENCRYPT)); -} - -static void -__db_get_errcall(dbp, errcallp) - DB *dbp; - void (**errcallp) __P((const DB_ENV *, const char *, const char *)); -{ - __env_get_errcall(dbp->dbenv, errcallp); -} - -static void -__db_set_errcall(dbp, errcall) - DB *dbp; - void (*errcall) __P((const DB_ENV *, const char *, const char *)); -{ - __env_set_errcall(dbp->dbenv, errcall); -} - -static void -__db_get_errfile(dbp, errfilep) - DB *dbp; - FILE **errfilep; -{ - __env_get_errfile(dbp->dbenv, errfilep); -} - -static void -__db_set_errfile(dbp, errfile) - DB *dbp; - FILE *errfile; -{ - __env_set_errfile(dbp->dbenv, errfile); -} - -static void -__db_get_errpfx(dbp, errpfxp) - DB *dbp; - const char **errpfxp; -{ - __env_get_errpfx(dbp->dbenv, errpfxp); -} - -static void -__db_set_errpfx(dbp, errpfx) - DB *dbp; - const char *errpfx; -{ - __env_set_errpfx(dbp->dbenv, errpfx); -} - -static int -__db_get_feedback(dbp, feedbackp) - DB *dbp; - void (**feedbackp) __P((DB *, int, int)); -{ - if (feedbackp != NULL) - *feedbackp = dbp->db_feedback; - return (0); -} - -static int -__db_set_feedback(dbp, feedback) - DB *dbp; - void (*feedback) __P((DB *, int, int)); -{ - dbp->db_feedback = feedback; - return (0); -} - -/* - * __db_map_flags -- - * Maps between public and internal flag values. - * This function doesn't check for validity, so it can't fail. - */ -static void -__db_map_flags(dbp, inflagsp, outflagsp) - DB *dbp; - u_int32_t *inflagsp, *outflagsp; -{ - COMPQUIET(dbp, NULL); - - if (FLD_ISSET(*inflagsp, DB_CHKSUM)) { - FLD_SET(*outflagsp, DB_AM_CHKSUM); - FLD_CLR(*inflagsp, DB_CHKSUM); - } - if (FLD_ISSET(*inflagsp, DB_ENCRYPT)) { - FLD_SET(*outflagsp, DB_AM_ENCRYPT | DB_AM_CHKSUM); - FLD_CLR(*inflagsp, DB_ENCRYPT); - } - if (FLD_ISSET(*inflagsp, DB_TXN_NOT_DURABLE)) { - FLD_SET(*outflagsp, DB_AM_NOT_DURABLE); - FLD_CLR(*inflagsp, DB_TXN_NOT_DURABLE); - } -} - -/* - * __db_get_flags -- - * The DB->get_flags method. - * - * PUBLIC: int __db_get_flags __P((DB *, u_int32_t *)); - */ -int -__db_get_flags(dbp, flagsp) - DB *dbp; - u_int32_t *flagsp; -{ - static const u_int32_t db_flags[] = { - DB_CHKSUM, - DB_DUP, - DB_DUPSORT, - DB_ENCRYPT, -#ifdef HAVE_QUEUE - DB_INORDER, -#endif - DB_RECNUM, - DB_RENUMBER, - DB_REVSPLITOFF, - DB_SNAPSHOT, - DB_TXN_NOT_DURABLE, - 0 - }; - u_int32_t f, flags, mapped_flag; - int i; - - flags = 0; - for (i = 0; (f = db_flags[i]) != 0; i++) { - mapped_flag = 0; - __db_map_flags(dbp, &f, &mapped_flag); - __bam_map_flags(dbp, &f, &mapped_flag); - __ram_map_flags(dbp, &f, &mapped_flag); -#ifdef HAVE_QUEUE - __qam_map_flags(dbp, &f, &mapped_flag); -#endif - DB_ASSERT(dbp->env, f == 0); - if (F_ISSET(dbp, mapped_flag) == mapped_flag) - LF_SET(db_flags[i]); - } - - *flagsp = flags; - return (0); -} - -/* - * __db_set_flags -- - * DB->set_flags. - * - * PUBLIC: int __db_set_flags __P((DB *, u_int32_t)); - */ -int -__db_set_flags(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - ENV *env; - int ret; - - env = dbp->env; - - if (LF_ISSET(DB_ENCRYPT) && !CRYPTO_ON(env)) { - __db_errx(env, - "Database environment not configured for encryption"); - return (EINVAL); - } - if (LF_ISSET(DB_TXN_NOT_DURABLE)) - ENV_REQUIRES_CONFIG(env, - env->tx_handle, "DB_NOT_DURABLE", DB_INIT_TXN); - - __db_map_flags(dbp, &flags, &dbp->flags); - - if ((ret = __bam_set_flags(dbp, &flags)) != 0) - return (ret); - if ((ret = __ram_set_flags(dbp, &flags)) != 0) - return (ret); -#ifdef HAVE_QUEUE - if ((ret = __qam_set_flags(dbp, &flags)) != 0) - return (ret); -#endif - - return (flags == 0 ? 0 : __db_ferr(env, "DB->set_flags", 0)); -} - -/* - * __db_get_lorder -- - * Get whether lorder is swapped or not. - * - * PUBLIC: int __db_get_lorder __P((DB *, int *)); - */ -int -__db_get_lorder(dbp, db_lorderp) - DB *dbp; - int *db_lorderp; -{ - int ret; - - /* Flag if the specified byte order requires swapping. */ - switch (ret = __db_byteorder(dbp->env, 1234)) { - case 0: - *db_lorderp = F_ISSET(dbp, DB_AM_SWAP) ? 4321 : 1234; - break; - case DB_SWAPBYTES: - *db_lorderp = F_ISSET(dbp, DB_AM_SWAP) ? 1234 : 4321; - break; - default: - return (ret); - /* NOTREACHED */ - } - - return (0); -} - -/* - * __db_set_lorder -- - * Set whether lorder is swapped or not. - * - * PUBLIC: int __db_set_lorder __P((DB *, int)); - */ -int -__db_set_lorder(dbp, db_lorder) - DB *dbp; - int db_lorder; -{ - int ret; - - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_lorder"); - - /* Flag if the specified byte order requires swapping. */ - switch (ret = __db_byteorder(dbp->env, db_lorder)) { - case 0: - F_CLR(dbp, DB_AM_SWAP); - break; - case DB_SWAPBYTES: - F_SET(dbp, DB_AM_SWAP); - break; - default: - return (ret); - /* NOTREACHED */ - } - return (0); -} - -static int -__db_get_alloc(dbp, mal_funcp, real_funcp, free_funcp) - DB *dbp; - void *(**mal_funcp) __P((size_t)); - void *(**real_funcp) __P((void *, size_t)); - void (**free_funcp) __P((void *)); -{ - DB_ILLEGAL_IN_ENV(dbp, "DB->get_alloc"); - - return (__env_get_alloc(dbp->dbenv, mal_funcp, - real_funcp, free_funcp)); -} - -static int -__db_set_alloc(dbp, mal_func, real_func, free_func) - DB *dbp; - void *(*mal_func) __P((size_t)); - void *(*real_func) __P((void *, size_t)); - void (*free_func) __P((void *)); -{ - DB_ILLEGAL_IN_ENV(dbp, "DB->set_alloc"); - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_alloc"); - - return (__env_set_alloc(dbp->dbenv, mal_func, real_func, free_func)); -} - -static void -__db_get_msgcall(dbp, msgcallp) - DB *dbp; - void (**msgcallp) __P((const DB_ENV *, const char *)); -{ - __env_get_msgcall(dbp->dbenv, msgcallp); -} - -static void -__db_set_msgcall(dbp, msgcall) - DB *dbp; - void (*msgcall) __P((const DB_ENV *, const char *)); -{ - __env_set_msgcall(dbp->dbenv, msgcall); -} - -static void -__db_get_msgfile(dbp, msgfilep) - DB *dbp; - FILE **msgfilep; -{ - __env_get_msgfile(dbp->dbenv, msgfilep); -} - -static void -__db_set_msgfile(dbp, msgfile) - DB *dbp; - FILE *msgfile; -{ - __env_set_msgfile(dbp->dbenv, msgfile); -} - -static int -__db_get_pagesize(dbp, db_pagesizep) - DB *dbp; - u_int32_t *db_pagesizep; -{ - *db_pagesizep = dbp->pgsize; - return (0); -} - -/* - * __db_set_pagesize -- - * DB->set_pagesize - * - * PUBLIC: int __db_set_pagesize __P((DB *, u_int32_t)); - */ -int -__db_set_pagesize(dbp, db_pagesize) - DB *dbp; - u_int32_t db_pagesize; -{ - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_pagesize"); - - if (db_pagesize < DB_MIN_PGSIZE) { - __db_errx(dbp->env, "page sizes may not be smaller than %lu", - (u_long)DB_MIN_PGSIZE); - return (EINVAL); - } - if (db_pagesize > DB_MAX_PGSIZE) { - __db_errx(dbp->env, "page sizes may not be larger than %lu", - (u_long)DB_MAX_PGSIZE); - return (EINVAL); - } - - /* - * We don't want anything that's not a power-of-2, as we rely on that - * for alignment of various types on the pages. - */ - if (!POWER_OF_TWO(db_pagesize)) { - __db_errx(dbp->env, "page sizes must be a power-of-2"); - return (EINVAL); - } - - /* - * XXX - * Should we be checking for a page size that's not a multiple of 512, - * so that we never try and write less than a disk sector? - */ - dbp->pgsize = db_pagesize; - - return (0); -} - -static int -__db_set_paniccall(dbp, paniccall) - DB *dbp; - void (*paniccall) __P((DB_ENV *, int)); -{ - return (__env_set_paniccall(dbp->dbenv, paniccall)); -} - -static int -__db_set_priority(dbp, priority) - DB *dbp; - DB_CACHE_PRIORITY priority; -{ - dbp->priority = priority; - return (0); -} - -static int -__db_get_priority(dbp, priority) - DB *dbp; - DB_CACHE_PRIORITY *priority; -{ - *priority = dbp->priority; - return (0); -} diff --git a/db/db_open.c b/db/db_open.c deleted file mode 100644 index 5c5db09..0000000 --- a/db/db_open.c +++ /dev/null @@ -1,628 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_swap.h" -#include "dbinc/btree.h" -#include "dbinc/crypto.h" -#include "dbinc/hmac.h" -#include "dbinc/fop.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/qam.h" -#include "dbinc/txn.h" - -/* - * __db_open -- - * DB->open method. - * - * This routine gets called in three different ways: - * - * 1. It can be called to open a file/database. In this case, subdb will - * be NULL and meta_pgno will be PGNO_BASE_MD. - * 2. It can be called to open a subdatabase during normal operation. In - * this case, name and subname will both be non-NULL and meta_pgno will - * be PGNO_BASE_MD (also PGNO_INVALID). - * 3. It can be called to open an in-memory database (name == NULL; - * subname = name). - * 4. It can be called during recovery to open a file/database, in which case - * name will be non-NULL, subname will be NULL, and meta-pgno will be - * PGNO_BASE_MD. - * 5. It can be called during recovery to open a subdatabase, in which case - * name will be non-NULL, subname may be NULL and meta-pgno will be - * a valid pgno (i.e., not PGNO_BASE_MD). - * 6. It can be called during recovery to open an in-memory database. - * - * PUBLIC: int __db_open __P((DB *, DB_THREAD_INFO *, DB_TXN *, - * PUBLIC: const char *, const char *, DBTYPE, u_int32_t, int, db_pgno_t)); - */ -int -__db_open(dbp, ip, txn, fname, dname, type, flags, mode, meta_pgno) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *fname, *dname; - DBTYPE type; - u_int32_t flags; - int mode; - db_pgno_t meta_pgno; -{ - DB *tdbp; - ENV *env; - int ret; - u_int32_t id; - - env = dbp->env; - id = TXN_INVALID; - - /* - * We must flush any existing pages before truncating the file - * since they could age out of mpool and overwrite new pages. - */ - if (LF_ISSET(DB_TRUNCATE)) { - if ((ret = __db_create_internal(&tdbp, dbp->env, 0)) != 0) - goto err; - ret = __db_open(tdbp, ip, txn, fname, dname, DB_UNKNOWN, - DB_NOERROR | (flags & ~(DB_TRUNCATE|DB_CREATE)), - mode, meta_pgno); - if (ret == 0) - ret = __memp_ftruncate(tdbp->mpf, txn, ip, 0, 0); - (void)__db_close(tdbp, txn, DB_NOSYNC); - if (ret != 0 && ret != ENOENT && ret != EINVAL) - goto err; - ret = 0; - } - - DB_TEST_RECOVERY(dbp, DB_TEST_PREOPEN, ret, fname); - - /* - * If the environment was configured with threads, the DB handle - * must also be free-threaded, so we force the DB_THREAD flag on. - * (See SR #2033 for why this is a requirement--recovery needs - * to be able to grab a dbp using __db_fileid_to_dbp, and it has - * no way of knowing which dbp goes with which thread, so whichever - * one it finds has to be usable in any of them.) - */ - if (F_ISSET(env, ENV_THREAD)) - LF_SET(DB_THREAD); - - /* Convert any DB->open flags. */ - if (LF_ISSET(DB_RDONLY)) - F_SET(dbp, DB_AM_RDONLY); - if (LF_ISSET(DB_READ_UNCOMMITTED)) - F_SET(dbp, DB_AM_READ_UNCOMMITTED); - - if (IS_REAL_TXN(txn)) - F_SET(dbp, DB_AM_TXN); - - /* Fill in the type. */ - dbp->type = type; - - /* - * If both fname and subname are NULL, it's always a create, so make - * sure that we have both DB_CREATE and a type specified. It would - * be nice if this checking were done in __db_open where most of the - * interface checking is done, but this interface (__db_dbopen) is - * used by the recovery and limbo system, so we need to safeguard - * this interface as well. - */ - if (fname == NULL) { - if (dbp->p_internal != NULL) { - __db_errx(env, - "Partitioned databases may not be in memory."); - return (ENOENT); - } - if (dname == NULL) { - if (!LF_ISSET(DB_CREATE)) { - __db_errx(env, - "DB_CREATE must be specified to create databases."); - return (ENOENT); - } - - F_SET(dbp, DB_AM_INMEM); - F_SET(dbp, DB_AM_CREATED); - - if (dbp->type == DB_UNKNOWN) { - __db_errx(env, - "DBTYPE of unknown without existing file"); - return (EINVAL); - } - - if (dbp->pgsize == 0) - dbp->pgsize = DB_DEF_IOSIZE; - - /* - * If the file is a temporary file and we're - * doing locking, then we have to create a - * unique file ID. We can't use our normal - * dev/inode pair (or whatever this OS uses - * in place of dev/inode pairs) because no - * backing file will be created until the - * mpool cache is filled forcing the buffers - * to disk. Grab a random locker ID to use - * as a file ID. The created ID must never - * match a potential real file ID -- we know - * it won't because real file IDs contain a - * time stamp after the dev/inode pair, and - * we're simply storing a 4-byte value. - - * !!! - * Store the locker in the file id structure - * -- we can get it from there as necessary, - * and it saves having two copies. - */ - if (LOCKING_ON(env) && (ret = __lock_id(env, - (u_int32_t *)dbp->fileid, NULL)) != 0) - return (ret); - } else - MAKE_INMEM(dbp); - - /* - * Normally we would do handle locking here, however, with - * in-memory files, we cannot do any database manipulation - * until the mpool is open, so it happens later. - */ - } else if (dname == NULL && meta_pgno == PGNO_BASE_MD) { - /* Open/create the underlying file. Acquire locks. */ - if ((ret = __fop_file_setup(dbp, ip, - txn, fname, mode, flags, &id)) != 0) - return (ret); - } else { - if (dbp->p_internal != NULL) { - __db_errx(env, - "Partitioned databases may not be included with multiple databases."); - return (ENOENT); - } - if ((ret = __fop_subdb_setup(dbp, ip, - txn, fname, dname, mode, flags)) != 0) - return (ret); - meta_pgno = dbp->meta_pgno; - } - - /* Set up the underlying environment. */ - if ((ret = __env_setup(dbp, txn, fname, dname, id, flags)) != 0) - return (ret); - - /* For in-memory databases, we now need to open/create the database. */ - if (F_ISSET(dbp, DB_AM_INMEM)) { - if (dname == NULL) - ret = __db_new_file(dbp, ip, txn, NULL, NULL); - else { - id = TXN_INVALID; - if ((ret = __fop_file_setup(dbp, ip, - txn, dname, mode, flags, &id)) == 0 && - DBENV_LOGGING(env) && !F_ISSET(dbp, DB_AM_RECOVER) -#if !defined(DEBUG_ROP) && !defined(DEBUG_WOP) && !defined(DIAGNOSTIC) - && txn != NULL -#endif -#if !defined(DEBUG_ROP) - && !F_ISSET(dbp, DB_AM_RDONLY) -#endif - ) - ret = __dbreg_log_id(dbp, - txn, dbp->log_filename->id, 1); - } - if (ret != 0) - goto err; - } - - switch (dbp->type) { - case DB_BTREE: - ret = __bam_open(dbp, ip, txn, fname, meta_pgno, flags); - break; - case DB_HASH: - ret = __ham_open(dbp, ip, txn, fname, meta_pgno, flags); - break; - case DB_RECNO: - ret = __ram_open(dbp, ip, txn, fname, meta_pgno, flags); - break; - case DB_QUEUE: - ret = __qam_open( - dbp, ip, txn, fname, meta_pgno, mode, flags); - break; - case DB_UNKNOWN: - return ( - __db_unknown_type(env, "__db_dbopen", dbp->type)); - } - if (ret != 0) - goto err; - -#ifdef HAVE_PARTITION - if (dbp->p_internal != NULL && (ret = - __partition_open(dbp, ip, txn, fname, type, flags, mode, 1)) != 0) - goto err; -#endif - DB_TEST_RECOVERY(dbp, DB_TEST_POSTOPEN, ret, fname); - - /* - * Temporary files don't need handle locks, so we only have to check - * for a handle lock downgrade or lockevent in the case of named - * files. - */ - if (!F_ISSET(dbp, DB_AM_RECOVER) && (fname != NULL || dname != NULL) && - LOCK_ISSET(dbp->handle_lock)) { - if (IS_REAL_TXN(txn)) - ret = __txn_lockevent(env, - txn, dbp, &dbp->handle_lock, dbp->locker); - else if (LOCKING_ON(env)) - /* Trade write handle lock for read handle lock. */ - ret = __lock_downgrade(env, - &dbp->handle_lock, DB_LOCK_READ, 0); - } -DB_TEST_RECOVERY_LABEL -err: - return (ret); -} - -/* - * __db_get_open_flags -- - * Accessor for flags passed into DB->open call - * - * PUBLIC: int __db_get_open_flags __P((DB *, u_int32_t *)); - */ -int -__db_get_open_flags(dbp, flagsp) - DB *dbp; - u_int32_t *flagsp; -{ - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->get_open_flags"); - - *flagsp = dbp->open_flags; - return (0); -} - -/* - * __db_new_file -- - * Create a new database file. - * - * PUBLIC: int __db_new_file __P((DB *, - * PUBLIC: DB_THREAD_INFO *, DB_TXN *, DB_FH *, const char *)); - */ -int -__db_new_file(dbp, ip, txn, fhp, name) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - DB_FH *fhp; - const char *name; -{ - int ret; - - switch (dbp->type) { - case DB_BTREE: - case DB_RECNO: - ret = __bam_new_file(dbp, ip, txn, fhp, name); - break; - case DB_HASH: - ret = __ham_new_file(dbp, ip, txn, fhp, name); - break; - case DB_QUEUE: - ret = __qam_new_file(dbp, ip, txn, fhp, name); - break; - case DB_UNKNOWN: - default: - __db_errx(dbp->env, - "%s: Invalid type %d specified", name, dbp->type); - ret = EINVAL; - break; - } - - DB_TEST_RECOVERY(dbp, DB_TEST_POSTLOGMETA, ret, name); - /* Sync the file in preparation for moving it into place. */ - if (ret == 0 && fhp != NULL) - ret = __os_fsync(dbp->env, fhp); - - DB_TEST_RECOVERY(dbp, DB_TEST_POSTSYNC, ret, name); - -DB_TEST_RECOVERY_LABEL - return (ret); -} - -/* - * __db_init_subdb -- - * Initialize the dbp for a subdb. - * - * PUBLIC: int __db_init_subdb __P((DB *, - * PUBLIC: DB *, const char *, DB_THREAD_INFO *, DB_TXN *)); - */ -int -__db_init_subdb(mdbp, dbp, name, ip, txn) - DB *mdbp, *dbp; - const char *name; - DB_THREAD_INFO *ip; - DB_TXN *txn; -{ - DBMETA *meta; - DB_MPOOLFILE *mpf; - int ret, t_ret; - - ret = 0; - if (!F_ISSET(dbp, DB_AM_CREATED)) { - /* Subdb exists; read meta-data page and initialize. */ - mpf = mdbp->mpf; - if ((ret = __memp_fget(mpf, &dbp->meta_pgno, - ip, txn, 0, &meta)) != 0) - goto err; - ret = __db_meta_setup(mdbp->env, dbp, name, meta, 0, 0); - if ((t_ret = __memp_fput(mpf, - ip, meta, dbp->priority)) != 0 && ret == 0) - ret = t_ret; - /* - * If __db_meta_setup found that the meta-page hadn't - * been written out during recovery, we can just return. - */ - if (ret == ENOENT) - ret = 0; - goto err; - } - - /* Handle the create case here. */ - switch (dbp->type) { - case DB_BTREE: - case DB_RECNO: - ret = __bam_new_subdb(mdbp, dbp, ip, txn); - break; - case DB_HASH: - ret = __ham_new_subdb(mdbp, dbp, ip, txn); - break; - case DB_QUEUE: - ret = EINVAL; - break; - case DB_UNKNOWN: - default: - __db_errx(dbp->env, - "Invalid subdatabase type %d specified", dbp->type); - return (EINVAL); - } - -err: return (ret); -} - -/* - * __db_chk_meta -- - * Take a buffer containing a meta-data page and check it for a valid LSN, - * checksum (and verify the checksum if necessary) and possibly decrypt it. - * - * Return 0 on success, >0 (errno) on error, -1 on checksum mismatch. - * - * PUBLIC: int __db_chk_meta __P((ENV *, DB *, DBMETA *, u_int32_t)); - */ -int -__db_chk_meta(env, dbp, meta, flags) - ENV *env; - DB *dbp; - DBMETA *meta; - u_int32_t flags; -{ - DB_LSN swap_lsn; - int is_hmac, ret, swapped; - u_int32_t magic, orig_chk; - u_int8_t *chksum; - - ret = 0; - swapped = 0; - - if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) { - if (dbp != NULL) - F_SET(dbp, DB_AM_CHKSUM); - - is_hmac = meta->encrypt_alg == 0 ? 0 : 1; - chksum = ((BTMETA *)meta)->chksum; - - /* - * If we need to swap, the checksum function overwrites the - * original checksum with 0, so we need to save a copy of the - * original for swapping later. - */ - orig_chk = *(u_int32_t *)chksum; - - /* - * We cannot add this to __db_metaswap because that gets done - * later after we've verified the checksum or decrypted. - */ - if (LF_ISSET(DB_CHK_META)) { - swapped = 0; -chk_retry: if ((ret = - __db_check_chksum(env, NULL, env->crypto_handle, - chksum, meta, DBMETASIZE, is_hmac)) != 0) { - if (is_hmac || swapped) - return (ret); - - M_32_SWAP(orig_chk); - swapped = 1; - *(u_int32_t *)chksum = orig_chk; - goto chk_retry; - } - } - } else if (dbp != NULL) - F_CLR(dbp, DB_AM_CHKSUM); - -#ifdef HAVE_CRYPTO - ret = __crypto_decrypt_meta(env, - dbp, (u_int8_t *)meta, LF_ISSET(DB_CHK_META)); -#endif - - /* Now that we're decrypted, we can check LSN. */ - if (LOGGING_ON(env) && !LF_ISSET(DB_CHK_NOLSN)) { - /* - * This gets called both before and after swapping, so we - * need to check ourselves. If we already swapped it above, - * we'll know that here. - */ - - swap_lsn = meta->lsn; - magic = meta->magic; -lsn_retry: - if (swapped) { - M_32_SWAP(swap_lsn.file); - M_32_SWAP(swap_lsn.offset); - M_32_SWAP(magic); - } - switch (magic) { - case DB_BTREEMAGIC: - case DB_HASHMAGIC: - case DB_QAMMAGIC: - case DB_RENAMEMAGIC: - break; - default: - if (swapped) - return (EINVAL); - swapped = 1; - goto lsn_retry; - } - if (!IS_REP_CLIENT(env) && - !IS_NOT_LOGGED_LSN(swap_lsn) && !IS_ZERO_LSN(swap_lsn)) - /* Need to do check. */ - ret = __log_check_page_lsn(env, dbp, &swap_lsn); - } - return (ret); -} - -/* - * __db_meta_setup -- - * - * Take a buffer containing a meta-data page and figure out if it's - * valid, and if so, initialize the dbp from the meta-data page. - * - * PUBLIC: int __db_meta_setup __P((ENV *, - * PUBLIC: DB *, const char *, DBMETA *, u_int32_t, u_int32_t)); - */ -int -__db_meta_setup(env, dbp, name, meta, oflags, flags) - ENV *env; - DB *dbp; - const char *name; - DBMETA *meta; - u_int32_t oflags; - u_int32_t flags; -{ - u_int32_t magic; - int ret; - - ret = 0; - - /* - * Figure out what access method we're dealing with, and then - * call access method specific code to check error conditions - * based on conflicts between the found file and application - * arguments. A found file overrides some user information -- - * we don't consider it an error, for example, if the user set - * an expected byte order and the found file doesn't match it. - */ - F_CLR(dbp, DB_AM_SWAP | DB_AM_IN_RENAME); - magic = meta->magic; - -swap_retry: - switch (magic) { - case DB_BTREEMAGIC: - case DB_HASHMAGIC: - case DB_QAMMAGIC: - case DB_RENAMEMAGIC: - break; - case 0: - /* - * The only time this should be 0 is if we're in the - * midst of opening a subdb during recovery and that - * subdatabase had its meta-data page allocated, but - * not yet initialized. - */ - if (F_ISSET(dbp, DB_AM_SUBDB) && ((IS_RECOVERING(env) && - F_ISSET(env->lg_handle, DBLOG_FORCE_OPEN)) || - meta->pgno != PGNO_INVALID)) - return (ENOENT); - - goto bad_format; - default: - if (F_ISSET(dbp, DB_AM_SWAP)) - goto bad_format; - - M_32_SWAP(magic); - F_SET(dbp, DB_AM_SWAP); - goto swap_retry; - } - - /* - * We can only check the meta page if we are sure we have a meta page. - * If it is random data, then this check can fail. So only now can we - * checksum and decrypt. Don't distinguish between configuration and - * checksum match errors here, because we haven't opened the database - * and even a checksum error isn't a reason to panic the environment. - */ - if ((ret = __db_chk_meta(env, dbp, meta, flags)) != 0) { - if (ret == -1) - __db_errx(env, - "%s: metadata page checksum error", name); - goto bad_format; - } - - switch (magic) { - case DB_BTREEMAGIC: - if (dbp->type != DB_UNKNOWN && - dbp->type != DB_RECNO && dbp->type != DB_BTREE) - goto bad_format; - - flags = meta->flags; - if (F_ISSET(dbp, DB_AM_SWAP)) - M_32_SWAP(flags); - if (LF_ISSET(BTM_RECNO)) - dbp->type = DB_RECNO; - else - dbp->type = DB_BTREE; - if ((oflags & DB_TRUNCATE) == 0 && (ret = - __bam_metachk(dbp, name, (BTMETA *)meta)) != 0) - return (ret); - break; - case DB_HASHMAGIC: - if (dbp->type != DB_UNKNOWN && dbp->type != DB_HASH) - goto bad_format; - - dbp->type = DB_HASH; - if ((oflags & DB_TRUNCATE) == 0 && (ret = - __ham_metachk(dbp, name, (HMETA *)meta)) != 0) - return (ret); - break; - case DB_QAMMAGIC: - if (dbp->type != DB_UNKNOWN && dbp->type != DB_QUEUE) - goto bad_format; - dbp->type = DB_QUEUE; - if ((oflags & DB_TRUNCATE) == 0 && (ret = - __qam_metachk(dbp, name, (QMETA *)meta)) != 0) - return (ret); - break; - case DB_RENAMEMAGIC: - F_SET(dbp, DB_AM_IN_RENAME); - - /* Copy the file's ID. */ - memcpy(dbp->fileid, ((DBMETA *)meta)->uid, DB_FILE_ID_LEN); - - break; - default: - goto bad_format; - } - - if (FLD_ISSET(meta->metaflags, - DBMETA_PART_RANGE | DBMETA_PART_CALLBACK)) - if ((ret = - __partition_init(dbp, meta->metaflags)) != 0) - return (ret); - return (0); - -bad_format: - if (F_ISSET(dbp, DB_AM_RECOVER)) - ret = ENOENT; - else - __db_errx(env, - "__db_meta_setup: %s: unexpected file type or format", - name); - return (ret == 0 ? EINVAL : ret); -} diff --git a/db/db_overflow.c b/db/db_overflow.c deleted file mode 100644 index a718182..0000000 --- a/db/db_overflow.c +++ /dev/null @@ -1,706 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_am.h" -#include "dbinc/mp.h" - -/* - * Big key/data code. - * - * Big key and data entries are stored on linked lists of pages. The initial - * reference is a structure with the total length of the item and the page - * number where it begins. Each entry in the linked list contains a pointer - * to the next page of data, and so on. - */ - -/* - * __db_goff -- - * Get an offpage item. - * - * PUBLIC: int __db_goff __P((DBC *, - * PUBLIC: DBT *, u_int32_t, db_pgno_t, void **, u_int32_t *)); - */ -int -__db_goff(dbc, dbt, tlen, pgno, bpp, bpsz) - DBC *dbc; - DBT *dbt; - u_int32_t tlen; - db_pgno_t pgno; - void **bpp; - u_int32_t *bpsz; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - DB_TXN *txn; - DBC_INTERNAL *cp; - ENV *env; - PAGE *h; - DB_THREAD_INFO *ip; - db_indx_t bytes; - u_int32_t curoff, needed, start; - u_int8_t *p, *src; - int ret; - - dbp = dbc->dbp; - cp = dbc->internal; - env = dbp->env; - ip = dbc->thread_info; - mpf = dbp->mpf; - txn = dbc->txn; - - /* - * Check if the buffer is big enough; if it is not and we are - * allowed to malloc space, then we'll malloc it. If we are - * not (DB_DBT_USERMEM), then we'll set the dbt and return - * appropriately. - */ - if (F_ISSET(dbt, DB_DBT_PARTIAL)) { - start = dbt->doff; - if (start > tlen) - needed = 0; - else if (dbt->dlen > tlen - start) - needed = tlen - start; - else - needed = dbt->dlen; - } else { - start = 0; - needed = tlen; - } - - /* - * If the caller has not requested any data, return success. This - * "early-out" also avoids setting up the streaming optimization when - * no page would be retrieved. If it were removed, the streaming code - * should only initialize when needed is not 0. - */ - if (needed == 0) { - dbt->size = 0; - return (0); - } - - if (F_ISSET(dbt, DB_DBT_USERCOPY)) - goto skip_alloc; - - /* Allocate any necessary memory. */ - if (F_ISSET(dbt, DB_DBT_USERMEM)) { - if (needed > dbt->ulen) { - dbt->size = needed; - return (DB_BUFFER_SMALL); - } - } else if (F_ISSET(dbt, DB_DBT_MALLOC)) { - if ((ret = __os_umalloc(env, needed, &dbt->data)) != 0) - return (ret); - } else if (F_ISSET(dbt, DB_DBT_REALLOC)) { - if ((ret = __os_urealloc(env, needed, &dbt->data)) != 0) - return (ret); - } else if (bpsz != NULL && (*bpsz == 0 || *bpsz < needed)) { - if ((ret = __os_realloc(env, needed, bpp)) != 0) - return (ret); - *bpsz = needed; - dbt->data = *bpp; - } else if (bpp != NULL) - dbt->data = *bpp; - else { - DB_ASSERT(env, - F_ISSET(dbt, - DB_DBT_USERMEM | DB_DBT_MALLOC | DB_DBT_REALLOC) || - bpsz != NULL || bpp != NULL); - return (DB_BUFFER_SMALL); - } - -skip_alloc: - /* Set up a start page in the overflow chain if streaming. */ - if (cp->stream_start_pgno != PGNO_INVALID && - pgno == cp->stream_start_pgno && start >= cp->stream_off && - start < cp->stream_off + P_MAXSPACE(dbp, dbp->pgsize)) { - pgno = cp->stream_curr_pgno; - curoff = cp->stream_off; - } else { - cp->stream_start_pgno = cp->stream_curr_pgno = pgno; - cp->stream_off = curoff = 0; - } - - /* - * Step through the linked list of pages, copying the data on each - * one into the buffer. Never copy more than the total data length. - */ - dbt->size = needed; - for (p = dbt->data; pgno != PGNO_INVALID && needed > 0;) { - if ((ret = __memp_fget(mpf, - &pgno, ip, txn, 0, &h)) != 0) - return (ret); - DB_ASSERT(env, TYPE(h) == P_OVERFLOW); - - /* Check if we need any bytes from this page. */ - if (curoff + OV_LEN(h) >= start) { - bytes = OV_LEN(h); - src = (u_int8_t *)h + P_OVERHEAD(dbp); - if (start > curoff) { - src += start - curoff; - bytes -= start - curoff; - } - if (bytes > needed) - bytes = needed; - if (F_ISSET(dbt, DB_DBT_USERCOPY)) { - /* - * The offset into the DBT is the total size - * less the amount of data still needed. Care - * needs to be taken if doing a partial copy - * beginning at an offset other than 0. - */ - if ((ret = env->dbt_usercopy( - dbt, dbt->size - needed, - src, bytes, DB_USERCOPY_SETDATA)) != 0) { - (void)__memp_fput(mpf, - ip, h, dbp->priority); - return (ret); - } - } else - memcpy(p, src, bytes); - p += bytes; - needed -= bytes; - } - cp->stream_off = curoff; - curoff += OV_LEN(h); - cp->stream_curr_pgno = pgno; - pgno = h->next_pgno; - (void)__memp_fput(mpf, ip, h, dbp->priority); - } - - return (0); -} - -/* - * __db_poff -- - * Put an offpage item. - * - * PUBLIC: int __db_poff __P((DBC *, const DBT *, db_pgno_t *)); - */ -int -__db_poff(dbc, dbt, pgnop) - DBC *dbc; - const DBT *dbt; - db_pgno_t *pgnop; -{ - DB *dbp; - DBT tmp_dbt; - DB_LSN null_lsn; - DB_MPOOLFILE *mpf; - PAGE *pagep, *lastp; - db_indx_t pagespace; - db_pgno_t pgno; - u_int32_t space, sz, tlen; - u_int8_t *p; - int ret, t_ret; - - /* - * Allocate pages and copy the key/data item into them. Calculate the - * number of bytes we get for pages we fill completely with a single - * item. - */ - dbp = dbc->dbp; - lastp = NULL; - mpf = dbp->mpf; - pagespace = P_MAXSPACE(dbp, dbp->pgsize); - p = dbt->data; - sz = dbt->size; - - /* - * Check whether we are streaming at the end of the overflow item. - * If so, the last pgno and offset will be cached in the cursor. - */ - if (F_ISSET(dbt, DB_DBT_STREAMING)) { - tlen = dbt->size - dbt->dlen; - pgno = dbc->internal->stream_curr_pgno; - if ((ret = __memp_fget(mpf, &pgno, dbc->thread_info, - dbc->txn, DB_MPOOL_DIRTY, &lastp)) != 0) - return (ret); - - /* - * Calculate how much we can write on the last page of the - * overflow item. - */ - DB_ASSERT(dbp->env, - OV_LEN(lastp) == (tlen - dbc->internal->stream_off)); - space = pagespace - OV_LEN(lastp); - - /* Only copy as much data as we have. */ - if (space > dbt->dlen) - space = dbt->dlen; - - if (DBC_LOGGING(dbc)) { - tmp_dbt.data = dbt->data; - tmp_dbt.size = space; - ZERO_LSN(null_lsn); - if ((ret = __db_big_log(dbp, dbc->txn, - &LSN(lastp), 0, DB_APPEND_BIG, pgno, - PGNO_INVALID, PGNO_INVALID, &tmp_dbt, - &LSN(lastp), &null_lsn, &null_lsn)) != 0) - goto err; - } else - LSN_NOT_LOGGED(LSN(lastp)); - - memcpy((u_int8_t *)lastp + P_OVERHEAD(dbp) + OV_LEN(lastp), - dbt->data, space); - OV_LEN(lastp) += space; - sz -= space + dbt->doff; - p += space; - *pgnop = dbc->internal->stream_start_pgno; - } - - ret = 0; - for (; sz > 0; p += pagespace, sz -= pagespace) { - /* - * Reduce pagespace so we terminate the loop correctly and - * don't copy too much data. - */ - if (sz < pagespace) - pagespace = sz; - - /* - * Allocate and initialize a new page and copy all or part of - * the item onto the page. If sz is less than pagespace, we - * have a partial record. - */ - if ((ret = __db_new(dbc, P_OVERFLOW, NULL, &pagep)) != 0) - break; - if (DBC_LOGGING(dbc)) { - tmp_dbt.data = p; - tmp_dbt.size = pagespace; - ZERO_LSN(null_lsn); - if ((ret = __db_big_log(dbp, dbc->txn, - &LSN(pagep), 0, DB_ADD_BIG, PGNO(pagep), - lastp ? PGNO(lastp) : PGNO_INVALID, - PGNO_INVALID, &tmp_dbt, &LSN(pagep), - lastp == NULL ? &null_lsn : &LSN(lastp), - &null_lsn)) != 0) { - (void)__memp_fput(mpf, dbc->thread_info, - pagep, dbc->priority); - goto err; - } - } else - LSN_NOT_LOGGED(LSN(pagep)); - - /* Move LSN onto page. */ - if (lastp != NULL) - LSN(lastp) = LSN(pagep); - - OV_LEN(pagep) = pagespace; - OV_REF(pagep) = 1; - memcpy((u_int8_t *)pagep + P_OVERHEAD(dbp), p, pagespace); - - /* - * If this is the first entry, update the user's info and - * initialize the cursor to allow for streaming of subsequent - * updates. Otherwise, update the entry on the last page - * filled in and release that page. - */ - if (lastp == NULL) { - *pgnop = PGNO(pagep); - dbc->internal->stream_start_pgno = - dbc->internal->stream_curr_pgno = *pgnop; - dbc->internal->stream_off = 0; - } else { - lastp->next_pgno = PGNO(pagep); - pagep->prev_pgno = PGNO(lastp); - if ((ret = __memp_fput(mpf, - dbc->thread_info, lastp, dbc->priority)) != 0) { - lastp = NULL; - goto err; - } - } - lastp = pagep; - } -err: if (lastp != NULL) { - if (ret == 0) { - dbc->internal->stream_curr_pgno = PGNO(lastp); - dbc->internal->stream_off = dbt->size - OV_LEN(lastp); - } - - if ((t_ret = __memp_fput(mpf, dbc->thread_info, lastp, - dbc->priority)) != 0 && ret == 0) - ret = t_ret; - } - return (ret); -} - -/* - * __db_ovref -- - * Decrement the reference count on an overflow page. - * - * PUBLIC: int __db_ovref __P((DBC *, db_pgno_t)); - */ -int -__db_ovref(dbc, pgno) - DBC *dbc; - db_pgno_t pgno; -{ - DB *dbp; - DB_MPOOLFILE *mpf; - PAGE *h; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - - if ((ret = __memp_fget(mpf, &pgno, - dbc->thread_info, dbc->txn, DB_MPOOL_DIRTY, &h)) != 0) - return (ret); - - if (DBC_LOGGING(dbc)) { - if ((ret = __db_ovref_log(dbp, - dbc->txn, &LSN(h), 0, h->pgno, -1, &LSN(h))) != 0) { - (void)__memp_fput(mpf, - dbc->thread_info, h, dbc->priority); - return (ret); - } - } else - LSN_NOT_LOGGED(LSN(h)); - - /* - * In BDB releases before 4.5, the overflow reference counts were - * incremented when an overflow item was split onto an internal - * page. There was a lock race in that code, and rather than fix - * the race, we changed BDB to copy overflow items when splitting - * them onto internal pages. The code to decrement reference - * counts remains so databases already in the field continue to - * work. - */ - --OV_REF(h); - - return (__memp_fput(mpf, dbc->thread_info, h, dbc->priority)); -} - -/* - * __db_doff -- - * Delete an offpage chain of overflow pages. - * - * PUBLIC: int __db_doff __P((DBC *, db_pgno_t)); - */ -int -__db_doff(dbc, pgno) - DBC *dbc; - db_pgno_t pgno; -{ - DB *dbp; - DBT tmp_dbt; - DB_LSN null_lsn; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int ret; - - dbp = dbc->dbp; - mpf = dbp->mpf; - - do { - if ((ret = __memp_fget(mpf, &pgno, - dbc->thread_info, dbc->txn, 0, &pagep)) != 0) - return (ret); - - DB_ASSERT(dbp->env, TYPE(pagep) == P_OVERFLOW); - /* - * If it's referenced by more than one key/data item, - * decrement the reference count and return. - */ - if (OV_REF(pagep) > 1) { - (void)__memp_fput(mpf, - dbc->thread_info, pagep, dbc->priority); - return (__db_ovref(dbc, pgno)); - } - - if ((ret = __memp_dirty(mpf, &pagep, - dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) { - if (pagep != NULL) - (void)__memp_fput(mpf, - dbc->thread_info, pagep, dbc->priority); - return (ret); - } - - if (DBC_LOGGING(dbc)) { - tmp_dbt.data = (u_int8_t *)pagep + P_OVERHEAD(dbp); - tmp_dbt.size = OV_LEN(pagep); - ZERO_LSN(null_lsn); - if ((ret = __db_big_log(dbp, dbc->txn, - &LSN(pagep), 0, DB_REM_BIG, - PGNO(pagep), PREV_PGNO(pagep), - NEXT_PGNO(pagep), &tmp_dbt, - &LSN(pagep), &null_lsn, &null_lsn)) != 0) { - (void)__memp_fput(mpf, - dbc->thread_info, pagep, dbc->priority); - return (ret); - } - } else - LSN_NOT_LOGGED(LSN(pagep)); - pgno = pagep->next_pgno; - OV_LEN(pagep) = 0; - if ((ret = __db_free(dbc, pagep)) != 0) - return (ret); - } while (pgno != PGNO_INVALID); - - return (0); -} - -/* - * __db_moff -- - * Match on overflow pages. - * - * Given a starting page number and a key, return <0, 0, >0 to indicate if the - * key on the page is less than, equal to or greater than the key specified. - * We optimize this by doing chunk at a time comparison unless the user has - * specified a comparison function. In this case, we need to materialize - * the entire object and call their comparison routine. - * - * __db_moff and __db_coff are generic functions useful in searching and - * ordering off page items. __db_moff matches an overflow DBT with an offpage - * item. __db_coff compares two offpage items for lexicographic sort order. - * - * PUBLIC: int __db_moff __P((DBC *, const DBT *, db_pgno_t, u_int32_t, - * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *)); - */ -int -__db_moff(dbc, dbt, pgno, tlen, cmpfunc, cmpp) - DBC *dbc; - const DBT *dbt; - db_pgno_t pgno; - u_int32_t tlen; - int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp; -{ - DB *dbp; - DBT local_dbt; - DB_MPOOLFILE *mpf; - DB_THREAD_INFO *ip; - PAGE *pagep; - void *buf; - u_int32_t bufsize, cmp_bytes, key_left; - u_int8_t *p1, *p2; - int ret; - - dbp = dbc->dbp; - ip = dbc->thread_info; - mpf = dbp->mpf; - - /* - * If there is a user-specified comparison function, build a - * contiguous copy of the key, and call it. - */ - if (cmpfunc != NULL) { - memset(&local_dbt, 0, sizeof(local_dbt)); - buf = NULL; - bufsize = 0; - - if ((ret = __db_goff(dbc, - &local_dbt, tlen, pgno, &buf, &bufsize)) != 0) - return (ret); - /* Pass the key as the first argument */ - *cmpp = cmpfunc(dbp, dbt, &local_dbt); - __os_free(dbp->env, buf); - return (0); - } - - /* While there are both keys to compare. */ - for (*cmpp = 0, p1 = dbt->data, - key_left = dbt->size; key_left > 0 && pgno != PGNO_INVALID;) { - if ((ret = - __memp_fget(mpf, &pgno, ip, dbc->txn, 0, &pagep)) != 0) - return (ret); - - cmp_bytes = OV_LEN(pagep) < key_left ? OV_LEN(pagep) : key_left; - tlen -= cmp_bytes; - key_left -= cmp_bytes; - for (p2 = (u_int8_t *)pagep + P_OVERHEAD(dbp); - cmp_bytes-- > 0; ++p1, ++p2) - if (*p1 != *p2) { - *cmpp = (long)*p1 - (long)*p2; - break; - } - pgno = NEXT_PGNO(pagep); - if ((ret = __memp_fput(mpf, ip, pagep, dbp->priority)) != 0) - return (ret); - if (*cmpp != 0) - return (0); - } - if (key_left > 0) /* DBT is longer than the page key. */ - *cmpp = 1; - else if (tlen > 0) /* DBT is shorter than the page key. */ - *cmpp = -1; - else - *cmpp = 0; - - return (0); -} - -/* - * __db_coff -- - * Match two offpage dbts. - * - * The DBTs must both refer to offpage items. - * The match happens a chunk (page) at a time unless a user defined comparison - * function exists. It is not possible to optimize this comparison away when - * a lexicographic sort order is required on mismatch. - * - * NOTE: For now this function only works for H_OFFPAGE type items. It would - * be simple to extend it for use with B_OVERFLOW type items. It would only - * require extracting the total length, and page number, dependent on the - * DBT type. - * - * PUBLIC: int __db_coff __P((DBC *, const DBT *, const DBT *, - * PUBLIC: int (*)(DB *, const DBT *, const DBT *), int *)); - */ -int -__db_coff(dbc, dbt, match, cmpfunc, cmpp) - DBC *dbc; - const DBT *dbt, *match; - int (*cmpfunc) __P((DB *, const DBT *, const DBT *)), *cmpp; -{ - DB *dbp; - DB_THREAD_INFO *ip; - DB_MPOOLFILE *mpf; - DB_TXN *txn; - DBT local_key, local_match; - PAGE *dbt_pagep, *match_pagep; - db_pgno_t dbt_pgno, match_pgno; - u_int32_t cmp_bytes, dbt_bufsz, dbt_len, match_bufsz; - u_int32_t match_len, max_data, page_sz; - u_int8_t *p1, *p2; - int ret; - void *dbt_buf, *match_buf; - - dbp = dbc->dbp; - ip = dbc->thread_info; - txn = dbc->txn; - mpf = dbp->mpf; - page_sz = dbp->pgsize; - *cmpp = 0; - dbt_buf = match_buf = NULL; - - DB_ASSERT(dbp->env, HPAGE_PTYPE(dbt->data) == H_OFFPAGE); - DB_ASSERT(dbp->env, HPAGE_PTYPE(match->data) == H_OFFPAGE); - - /* Extract potentially unaligned length and pgno fields from DBTs */ - memcpy(&dbt_len, HOFFPAGE_TLEN(dbt->data), sizeof(u_int32_t)); - memcpy(&dbt_pgno, HOFFPAGE_PGNO(dbt->data), sizeof(db_pgno_t)); - memcpy(&match_len, HOFFPAGE_TLEN(match->data), sizeof(u_int32_t)); - memcpy(&match_pgno, HOFFPAGE_PGNO(match->data), sizeof(db_pgno_t)); - max_data = (dbt_len < match_len ? dbt_len : match_len); - - /* - * If there is a custom comparator, fully resolve both DBTs. - * Then call the users comparator. - */ - if (cmpfunc != NULL) { - memset(&local_key, 0, sizeof(local_key)); - memset(&local_match, 0, sizeof(local_match)); - dbt_buf = match_buf = NULL; - dbt_bufsz = match_bufsz = 0; - - if ((ret = __db_goff(dbc, &local_key, dbt_len, - dbt_pgno, &dbt_buf, &dbt_bufsz)) != 0) - goto err1; - if ((ret = __db_goff(dbc, &local_match, match_len, - match_pgno, &match_buf, &match_bufsz)) != 0) - goto err1; - /* The key needs to be the first argument for sort order */ - *cmpp = cmpfunc(dbp, &local_key, &local_match); - -err1: if (dbt_buf != NULL) - __os_free(dbp->env, dbt_buf); - if (match_buf != NULL) - __os_free(dbp->env, match_buf); - return (ret); - } - - /* Match the offpage DBTs a page at a time. */ - while (dbt_pgno != PGNO_INVALID && match_pgno != PGNO_INVALID) { - if ((ret = - __memp_fget(mpf, &dbt_pgno, ip, txn, 0, &dbt_pagep)) != 0) - return (ret); - if ((ret = - __memp_fget(mpf, &match_pgno, - ip, txn, 0, &match_pagep)) != 0) { - (void)__memp_fput( - mpf, ip, dbt_pagep, DB_PRIORITY_UNCHANGED); - return (ret); - } - cmp_bytes = page_sz < max_data ? page_sz : max_data; - for (p1 = (u_int8_t *)dbt_pagep + P_OVERHEAD(dbp), - p2 = (u_int8_t *)match_pagep + P_OVERHEAD(dbp); - cmp_bytes-- > 0; ++p1, ++p2) - if (*p1 != *p2) { - *cmpp = (long)*p1 - (long)*p2; - break; - } - - dbt_pgno = NEXT_PGNO(dbt_pagep); - match_pgno = NEXT_PGNO(match_pagep); - max_data -= page_sz; - if ((ret = __memp_fput(mpf, - ip, dbt_pagep, DB_PRIORITY_UNCHANGED)) != 0) { - (void)__memp_fput(mpf, - ip, match_pagep, DB_PRIORITY_UNCHANGED); - return (ret); - } - if ((ret = __memp_fput(mpf, - ip, match_pagep, DB_PRIORITY_UNCHANGED)) != 0) - return (ret); - if (*cmpp != 0) - return (0); - } - - /* If a lexicographic mismatch was found, then the result has already - * been returned. If the DBTs matched, consider the lengths of the - * items, and return appropriately. - */ - if (dbt_len > match_len) /* DBT is longer than the match key. */ - *cmpp = 1; - else if (match_len > dbt_len) /* DBT is shorter than the match key. */ - *cmpp = -1; - else - *cmpp = 0; - - return (0); - -} diff --git a/db/db_ovfl_vrfy.c b/db/db_ovfl_vrfy.c deleted file mode 100644 index fdd0201..0000000 --- a/db/db_ovfl_vrfy.c +++ /dev/null @@ -1,409 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995, 1996 - * Keith Bostic. All rights reserved. - */ -/* - * Copyright (c) 1990, 1993, 1994, 1995 - * The Regents of the University of California. All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * Mike Olson. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_am.h" -#include "dbinc/db_verify.h" -#include "dbinc/mp.h" - -/* - * __db_vrfy_overflow -- - * Verify overflow page. - * - * PUBLIC: int __db_vrfy_overflow __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, - * PUBLIC: u_int32_t)); - */ -int -__db_vrfy_overflow(dbp, vdp, h, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - PAGE *h; - db_pgno_t pgno; - u_int32_t flags; -{ - VRFY_PAGEINFO *pip; - int isbad, ret, t_ret; - - isbad = 0; - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - if ((ret = __db_vrfy_datapage(dbp, vdp, h, pgno, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - pip->refcount = OV_REF(h); - if (pip->refcount < 1) { - EPRINT((dbp->env, - "Page %lu: overflow page has zero reference count", - (u_long)pgno)); - isbad = 1; - } - - /* Just store for now. */ - pip->olen = HOFFSET(h); - -err: if ((t_ret = __db_vrfy_putpageinfo(dbp->env, vdp, pip)) != 0) - ret = t_ret; - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __db_vrfy_ovfl_structure -- - * Walk a list of overflow pages, avoiding cycles and marking - * pages seen. - * - * PUBLIC: int __db_vrfy_ovfl_structure - * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, u_int32_t)); - */ -int -__db_vrfy_ovfl_structure(dbp, vdp, pgno, tlen, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - u_int32_t tlen; - u_int32_t flags; -{ - DB *pgset; - ENV *env; - VRFY_PAGEINFO *pip; - db_pgno_t next, prev; - int isbad, ret, seen_cnt, t_ret; - u_int32_t refcount; - - env = dbp->env; - pgset = vdp->pgset; - DB_ASSERT(env, pgset != NULL); - isbad = 0; - - /* This shouldn't happen, but just to be sure. */ - if (!IS_VALID_PGNO(pgno)) - return (DB_VERIFY_BAD); - - /* - * Check the first prev_pgno; it ought to be PGNO_INVALID, - * since there's no prev page. - */ - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - /* The refcount is stored on the first overflow page. */ - refcount = pip->refcount; - - if (pip->type != P_OVERFLOW) { - EPRINT((env, - "Page %lu: overflow page of invalid type %lu", - (u_long)pgno, (u_long)pip->type)); - ret = DB_VERIFY_BAD; - goto err; /* Unsafe to continue. */ - } - - prev = pip->prev_pgno; - if (prev != PGNO_INVALID) { - EPRINT((env, - "Page %lu: first page in overflow chain has a prev_pgno %lu", - (u_long)pgno, (u_long)prev)); - isbad = 1; - } - - for (;;) { - /* - * We may have seen this page elsewhere, if the overflow entry - * has been promoted to an internal page; we just want to - * make sure that each overflow page is seen exactly as many - * times as its refcount dictates. - * - * Note that this code also serves to keep us from looping - * infinitely if there's a cycle in an overflow chain. - */ - if ((ret = __db_vrfy_pgset_get(pgset, - vdp->thread_info, pgno, &seen_cnt)) != 0) - goto err; - if ((u_int32_t)seen_cnt > refcount) { - EPRINT((env, - "Page %lu: encountered too many times in overflow traversal", - (u_long)pgno)); - ret = DB_VERIFY_BAD; - goto err; - } - if ((ret = - __db_vrfy_pgset_inc(pgset, vdp->thread_info, pgno)) != 0) - goto err; - - /* - * Each overflow page can be referenced multiple times, - * because it's possible for overflow Btree keys to get - * promoted to internal pages. We want to make sure that - * each page is referenced from a Btree leaf (or Hash data - * page, which we consider a "leaf" here) exactly once; if - * the parent was a leaf, set a flag to indicate that we've - * seen this page in a leaf context. - * - * If the parent is not a leaf--in which case it's a Btree - * internal page--we don't need to bother doing any further - * verification, as we'll do it when we hit the leaf (or - * complain that we never saw the leaf). Only the first - * page in an overflow chain should ever have a refcount - * greater than 1, and the combination of the LEAFSEEN check - * and the fact that we bail after the first page for - * non-leaves should ensure this. - * - * Note that each "child" of a page, such as an overflow page, - * is stored and verified in a structure check exactly once, - * so this code does not need to contend with the fact that - * overflow chains used as Btree duplicate keys may be - * referenced multiply from a single Btree leaf page. - */ - if (LF_ISSET(DB_ST_OVFL_LEAF)) { - if (F_ISSET(pip, VRFY_OVFL_LEAFSEEN)) { - EPRINT((env, - "Page %lu: overflow page linked twice from leaf or data page", - (u_long)pgno)); - ret = DB_VERIFY_BAD; - goto err; - } - F_SET(pip, VRFY_OVFL_LEAFSEEN); - } - - /* - * We want to verify each overflow chain only once, and - * although no chain should be linked more than once from a - * leaf page, we can't guarantee that it'll be linked that - * once if it's linked from an internal page and the key - * is gone. - * - * seen_cnt is the number of times we'd encountered this page - * before calling this function. - */ - if (seen_cnt == 0) { - /* - * Keep a running tab on how much of the item we've - * seen. - */ - tlen -= pip->olen; - - /* Send the application feedback about our progress. */ - if (!LF_ISSET(DB_SALVAGE)) - __db_vrfy_struct_feedback(dbp, vdp); - } else - goto done; - - next = pip->next_pgno; - - /* Are we there yet? */ - if (next == PGNO_INVALID) - break; - - /* - * We've already checked this when we saved it, but just - * to be sure... - */ - if (!IS_VALID_PGNO(next)) { - EPRINT((env, - "Page %lu: bad next_pgno %lu on overflow page", - (u_long)pgno, (u_long)next)); - ret = DB_VERIFY_BAD; - goto err; - } - - if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 || - (ret = __db_vrfy_getpageinfo(vdp, next, &pip)) != 0) - return (ret); - if (pip->prev_pgno != pgno) { - EPRINT((env, - "Page %lu: bad prev_pgno %lu on overflow page (should be %lu)", - (u_long)next, (u_long)pip->prev_pgno, - (u_long)pgno)); - isbad = 1; - /* - * It's safe to continue because we have separate - * cycle detection. - */ - } - - pgno = next; - } - - if (tlen > 0) { - isbad = 1; - EPRINT((env, - "Page %lu: overflow item incomplete", (u_long)pgno)); - } - -done: -err: if ((t_ret = - __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __db_safe_goff -- - * Get an overflow item, very carefully, from an untrusted database, - * in the context of the salvager. - * - * PUBLIC: int __db_safe_goff __P((DB *, VRFY_DBINFO *, - * PUBLIC: db_pgno_t, DBT *, void *, u_int32_t *, u_int32_t)); - */ -int -__db_safe_goff(dbp, vdp, pgno, dbt, buf, bufsz, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - DBT *dbt; - void *buf; - u_int32_t *bufsz; - u_int32_t flags; -{ - DB_MPOOLFILE *mpf; - PAGE *h; - int ret, t_ret; - u_int32_t bytesgot, bytes; - u_int8_t *src, *dest; - - mpf = dbp->mpf; - h = NULL; - ret = t_ret = 0; - bytesgot = bytes = 0; - - DB_ASSERT(dbp->env, bufsz != NULL); - - /* - * Back up to the start of the overflow chain (if necessary) via the - * prev pointer of the overflow page. This guarantees we transverse the - * longest possible chains of overflow pages and won't be called again - * with a pgno earlier in the chain, stepping on ourselves. - */ - for (;;) { - if ((ret = __memp_fget( - mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) - return (ret); - - if (PREV_PGNO(h) == PGNO_INVALID || - !IS_VALID_PGNO(PREV_PGNO(h))) - break; - - pgno = PREV_PGNO(h); - - if ((ret = __memp_fput(mpf, - vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) - return (ret); - } - if ((ret = __memp_fput( - mpf, vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) - return (ret); - - h = NULL; - - while ((pgno != PGNO_INVALID) && (IS_VALID_PGNO(pgno))) { - /* - * Mark that we're looking at this page; if we've seen it - * already, quit. - */ - if ((ret = __db_salvage_markdone(vdp, pgno)) != 0) - break; - - if ((ret = __memp_fget(mpf, &pgno, - vdp->thread_info, NULL, 0, &h)) != 0) - break; - - /* - * Make sure it's really an overflow page, unless we're - * being aggressive, in which case we pretend it is. - */ - if (!LF_ISSET(DB_AGGRESSIVE) && TYPE(h) != P_OVERFLOW) { - ret = DB_VERIFY_BAD; - break; - } - - src = (u_int8_t *)h + P_OVERHEAD(dbp); - bytes = OV_LEN(h); - - if (bytes + P_OVERHEAD(dbp) > dbp->pgsize) - bytes = dbp->pgsize - P_OVERHEAD(dbp); - - /* - * Realloc if buf is too small - */ - if (bytesgot + bytes > *bufsz) { - if ((ret = - __os_realloc(dbp->env, bytesgot + bytes, buf)) != 0) - break; - *bufsz = bytesgot + bytes; - } - - dest = *(u_int8_t **)buf + bytesgot; - bytesgot += bytes; - - memcpy(dest, src, bytes); - - pgno = NEXT_PGNO(h); - - if ((ret = __memp_fput(mpf, - vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0) - break; - h = NULL; - } - - /* - * If we're being aggressive, salvage a partial datum if there - * was an error somewhere along the way. - */ - if (ret == 0 || LF_ISSET(DB_AGGRESSIVE)) { - dbt->size = bytesgot; - dbt->data = *(void **)buf; - } - - /* If we broke out on error, don't leave pages pinned. */ - if (h != NULL && (t_ret = __memp_fput(mpf, - vdp->thread_info, h, DB_PRIORITY_UNCHANGED)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} diff --git a/db/db_pr.c b/db/db_pr.c deleted file mode 100644 index 69fb7c7..0000000 --- a/db/db_pr.c +++ /dev/null @@ -1,1659 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/mp.h" -#include "dbinc/partition.h" -#include "dbinc/qam.h" -#include "dbinc/db_verify.h" - -/* - * __db_loadme -- - * A nice place to put a breakpoint. - * - * PUBLIC: void __db_loadme __P((void)); - */ -void -__db_loadme() -{ - pid_t pid; - - __os_id(NULL, &pid, NULL); -} - -#ifdef HAVE_STATISTICS -static int __db_bmeta __P((DB *, BTMETA *, u_int32_t)); -static int __db_hmeta __P((DB *, HMETA *, u_int32_t)); -static void __db_meta __P((DB *, DBMETA *, FN const *, u_int32_t)); -static const char *__db_pagetype_to_string __P((u_int32_t)); -static void __db_prdb __P((DB *, u_int32_t)); -static void __db_proff __P((ENV *, DB_MSGBUF *, void *)); -static int __db_prtree __P((DB *, DB_TXN *, u_int32_t)); -static int __db_qmeta __P((DB *, QMETA *, u_int32_t)); - -/* - * __db_dumptree -- - * Dump the tree to a file. - * - * PUBLIC: int __db_dumptree __P((DB *, DB_TXN *, char *, char *)); - */ -int -__db_dumptree(dbp, txn, op, name) - DB *dbp; - DB_TXN *txn; - char *op, *name; -{ - ENV *env; - FILE *fp, *orig_fp; - u_int32_t flags; - int ret; - - env = dbp->env; - - for (flags = 0; *op != '\0'; ++op) - switch (*op) { - case 'a': - LF_SET(DB_PR_PAGE); - break; - case 'h': - break; - case 'r': - LF_SET(DB_PR_RECOVERYTEST); - break; - default: - return (EINVAL); - } - - if (name != NULL) { - if ((fp = fopen(name, "w")) == NULL) - return (__os_get_errno()); - - orig_fp = dbp->dbenv->db_msgfile; - dbp->dbenv->db_msgfile = fp; - } else - fp = orig_fp = NULL; - - __db_prdb(dbp, flags); - - __db_msg(env, "%s", DB_GLOBAL(db_line)); - - ret = __db_prtree(dbp, txn, flags); - - if (fp != NULL) { - (void)fclose(fp); - env->dbenv->db_msgfile = orig_fp; - } - - return (ret); -} - -static const FN __db_flags_fn[] = { - { DB_AM_CHKSUM, "checksumming" }, - { DB_AM_COMPENSATE, "created by compensating transaction" }, - { DB_AM_CREATED, "database created" }, - { DB_AM_CREATED_MSTR, "encompassing file created" }, - { DB_AM_DBM_ERROR, "dbm/ndbm error" }, - { DB_AM_DELIMITER, "variable length" }, - { DB_AM_DISCARD, "discard cached pages" }, - { DB_AM_DUP, "duplicates" }, - { DB_AM_DUPSORT, "sorted duplicates" }, - { DB_AM_ENCRYPT, "encrypted" }, - { DB_AM_FIXEDLEN, "fixed-length records" }, - { DB_AM_INMEM, "in-memory" }, - { DB_AM_IN_RENAME, "file is being renamed" }, - { DB_AM_NOT_DURABLE, "changes not logged" }, - { DB_AM_OPEN_CALLED, "open called" }, - { DB_AM_PAD, "pad value" }, - { DB_AM_PGDEF, "default page size" }, - { DB_AM_RDONLY, "read-only" }, - { DB_AM_READ_UNCOMMITTED, "read-uncommitted" }, - { DB_AM_RECNUM, "Btree record numbers" }, - { DB_AM_RECOVER, "opened for recovery" }, - { DB_AM_RENUMBER, "renumber" }, - { DB_AM_REVSPLITOFF, "no reverse splits" }, - { DB_AM_SECONDARY, "secondary" }, - { DB_AM_SNAPSHOT, "load on open" }, - { DB_AM_SUBDB, "subdatabases" }, - { DB_AM_SWAP, "needswap" }, - { DB_AM_TXN, "transactional" }, - { DB_AM_VERIFYING, "verifier" }, - { 0, NULL } -}; - -/* - * __db_get_flags_fn -- - * Return the __db_flags_fn array. - * - * PUBLIC: const FN * __db_get_flags_fn __P((void)); - */ -const FN * -__db_get_flags_fn() -{ - return (__db_flags_fn); -} - -/* - * __db_prdb -- - * Print out the DB structure information. - */ -static void -__db_prdb(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - BTREE *bt; - DB_MSGBUF mb; - ENV *env; - HASH *h; - QUEUE *q; - - env = dbp->env; - - DB_MSGBUF_INIT(&mb); - __db_msg(env, "In-memory DB structure:"); - __db_msgadd(env, &mb, "%s: %#lx", - __db_dbtype_to_string(dbp->type), (u_long)dbp->flags); - __db_prflags(env, &mb, dbp->flags, __db_flags_fn, " (", ")"); - DB_MSGBUF_FLUSH(env, &mb); - - switch (dbp->type) { - case DB_BTREE: - case DB_RECNO: - bt = dbp->bt_internal; - __db_msg(env, "bt_meta: %lu bt_root: %lu", - (u_long)bt->bt_meta, (u_long)bt->bt_root); - __db_msg(env, "bt_minkey: %lu", (u_long)bt->bt_minkey); - if (!LF_ISSET(DB_PR_RECOVERYTEST)) - __db_msg(env, "bt_compare: %#lx bt_prefix: %#lx", - P_TO_ULONG(bt->bt_compare), - P_TO_ULONG(bt->bt_prefix)); -#ifdef HAVE_COMPRESSION - if (!LF_ISSET(DB_PR_RECOVERYTEST)) - __db_msg(env, "bt_compress: %#lx bt_decompress: %#lx", - P_TO_ULONG(bt->bt_compress), - P_TO_ULONG(bt->bt_decompress)); -#endif - __db_msg(env, "bt_lpgno: %lu", (u_long)bt->bt_lpgno); - if (dbp->type == DB_RECNO) { - __db_msg(env, - "re_pad: %#lx re_delim: %#lx re_len: %lu re_source: %s", - (u_long)bt->re_pad, (u_long)bt->re_delim, - (u_long)bt->re_len, - bt->re_source == NULL ? "" : bt->re_source); - __db_msg(env, - "re_modified: %d re_eof: %d re_last: %lu", - bt->re_modified, bt->re_eof, (u_long)bt->re_last); - } - break; - case DB_HASH: - h = dbp->h_internal; - __db_msg(env, "meta_pgno: %lu", (u_long)h->meta_pgno); - __db_msg(env, "h_ffactor: %lu", (u_long)h->h_ffactor); - __db_msg(env, "h_nelem: %lu", (u_long)h->h_nelem); - if (!LF_ISSET(DB_PR_RECOVERYTEST)) - __db_msg(env, "h_hash: %#lx", P_TO_ULONG(h->h_hash)); - break; - case DB_QUEUE: - q = dbp->q_internal; - __db_msg(env, "q_meta: %lu", (u_long)q->q_meta); - __db_msg(env, "q_root: %lu", (u_long)q->q_root); - __db_msg(env, "re_pad: %#lx re_len: %lu", - (u_long)q->re_pad, (u_long)q->re_len); - __db_msg(env, "rec_page: %lu", (u_long)q->rec_page); - __db_msg(env, "page_ext: %lu", (u_long)q->page_ext); - break; - case DB_UNKNOWN: - default: - break; - } -} - -/* - * __db_prtree -- - * Print out the entire tree. - */ -static int -__db_prtree(dbp, txn, flags) - DB *dbp; - DB_TXN *txn; - u_int32_t flags; -{ - DB_MPOOLFILE *mpf; - PAGE *h; - db_pgno_t i, last; - int ret; - - mpf = dbp->mpf; - - if (dbp->type == DB_QUEUE) - return (__db_prqueue(dbp, flags)); - - /* - * Find out the page number of the last page in the database, then - * dump each page. - */ - if ((ret = __memp_get_last_pgno(mpf, &last)) != 0) - return (ret); - for (i = 0; i <= last; ++i) { - if ((ret = __memp_fget(mpf, &i, NULL, txn, 0, &h)) != 0) - return (ret); - (void)__db_prpage(dbp, h, flags); - if ((ret = __memp_fput(mpf, NULL, h, dbp->priority)) != 0) - return (ret); - } - - return (0); -} - -/* - * __db_meta -- - * Print out common metadata information. - */ -static void -__db_meta(dbp, dbmeta, fn, flags) - DB *dbp; - DBMETA *dbmeta; - FN const *fn; - u_int32_t flags; -{ - DB_MPOOLFILE *mpf; - DB_MSGBUF mb; - ENV *env; - PAGE *h; - db_pgno_t pgno; - u_int8_t *p; - int cnt, ret; - const char *sep; - - env = dbp->env; - mpf = dbp->mpf; - DB_MSGBUF_INIT(&mb); - - __db_msg(env, "\tmagic: %#lx", (u_long)dbmeta->magic); - __db_msg(env, "\tversion: %lu", (u_long)dbmeta->version); - __db_msg(env, "\tpagesize: %lu", (u_long)dbmeta->pagesize); - __db_msg(env, "\ttype: %lu", (u_long)dbmeta->type); - __db_msg(env, "\tmetaflags %#lx", (u_long)dbmeta->metaflags); - __db_msg(env, "\tkeys: %lu\trecords: %lu", - (u_long)dbmeta->key_count, (u_long)dbmeta->record_count); - if (dbmeta->nparts) - __db_msg(env, "\tnparts: %lu", (u_long)dbmeta->nparts); - - /* - * If we're doing recovery testing, don't display the free list, - * it may have changed and that makes the dump diff not work. - */ - if (!LF_ISSET(DB_PR_RECOVERYTEST)) { - __db_msgadd( - env, &mb, "\tfree list: %lu", (u_long)dbmeta->free); - for (pgno = dbmeta->free, - cnt = 0, sep = ", "; pgno != PGNO_INVALID;) { - if ((ret = __memp_fget(mpf, - &pgno, NULL, NULL, 0, &h)) != 0) { - DB_MSGBUF_FLUSH(env, &mb); - __db_msg(env, - "Unable to retrieve free-list page: %lu: %s", - (u_long)pgno, db_strerror(ret)); - break; - } - pgno = h->next_pgno; - (void)__memp_fput(mpf, NULL, h, dbp->priority); - __db_msgadd(env, &mb, "%s%lu", sep, (u_long)pgno); - if (++cnt % 10 == 0) { - DB_MSGBUF_FLUSH(env, &mb); - cnt = 0; - sep = "\t"; - } else - sep = ", "; - } - DB_MSGBUF_FLUSH(env, &mb); - __db_msg(env, "\tlast_pgno: %lu", (u_long)dbmeta->last_pgno); - } - - if (fn != NULL) { - DB_MSGBUF_FLUSH(env, &mb); - __db_msgadd(env, &mb, "\tflags: %#lx", (u_long)dbmeta->flags); - __db_prflags(env, &mb, dbmeta->flags, fn, " (", ")"); - } - - DB_MSGBUF_FLUSH(env, &mb); - __db_msgadd(env, &mb, "\tuid: "); - for (p = (u_int8_t *)dbmeta->uid, - cnt = 0; cnt < DB_FILE_ID_LEN; ++cnt) { - __db_msgadd(env, &mb, "%x", *p++); - if (cnt < DB_FILE_ID_LEN - 1) - __db_msgadd(env, &mb, " "); - } - DB_MSGBUF_FLUSH(env, &mb); -} - -/* - * __db_bmeta -- - * Print out the btree meta-data page. - */ -static int -__db_bmeta(dbp, h, flags) - DB *dbp; - BTMETA *h; - u_int32_t flags; -{ - static const FN fn[] = { - { BTM_DUP, "duplicates" }, - { BTM_RECNO, "recno" }, - { BTM_RECNUM, "btree:recnum" }, - { BTM_FIXEDLEN, "recno:fixed-length" }, - { BTM_RENUMBER, "recno:renumber" }, - { BTM_SUBDB, "multiple-databases" }, - { BTM_DUPSORT, "sorted duplicates" }, - { BTM_COMPRESS, "compressed" }, - { 0, NULL } - }; - ENV *env; - - env = dbp->env; - - __db_meta(dbp, (DBMETA *)h, fn, flags); - - __db_msg(env, "\tminkey: %lu", (u_long)h->minkey); - if (dbp->type == DB_RECNO) - __db_msg(env, "\tre_len: %#lx re_pad: %#lx", - (u_long)h->re_len, (u_long)h->re_pad); - __db_msg(env, "\troot: %lu", (u_long)h->root); - - return (0); -} - -/* - * __db_hmeta -- - * Print out the hash meta-data page. - */ -static int -__db_hmeta(dbp, h, flags) - DB *dbp; - HMETA *h; - u_int32_t flags; -{ - static const FN fn[] = { - { DB_HASH_DUP, "duplicates" }, - { DB_HASH_SUBDB, "multiple-databases" }, - { DB_HASH_DUPSORT, "sorted duplicates" }, - { 0, NULL } - }; - ENV *env; - DB_MSGBUF mb; - int i; - - env = dbp->env; - DB_MSGBUF_INIT(&mb); - - __db_meta(dbp, (DBMETA *)h, fn, flags); - - __db_msg(env, "\tmax_bucket: %lu", (u_long)h->max_bucket); - __db_msg(env, "\thigh_mask: %#lx", (u_long)h->high_mask); - __db_msg(env, "\tlow_mask: %#lx", (u_long)h->low_mask); - __db_msg(env, "\tffactor: %lu", (u_long)h->ffactor); - __db_msg(env, "\tnelem: %lu", (u_long)h->nelem); - __db_msg(env, "\th_charkey: %#lx", (u_long)h->h_charkey); - __db_msgadd(env, &mb, "\tspare points: "); - for (i = 0; i < NCACHED; i++) - __db_msgadd(env, &mb, "%lu ", (u_long)h->spares[i]); - DB_MSGBUF_FLUSH(env, &mb); - - return (0); -} - -/* - * __db_qmeta -- - * Print out the queue meta-data page. - */ -static int -__db_qmeta(dbp, h, flags) - DB *dbp; - QMETA *h; - u_int32_t flags; -{ - ENV *env; - - env = dbp->env; - - __db_meta(dbp, (DBMETA *)h, NULL, flags); - - __db_msg(env, "\tfirst_recno: %lu", (u_long)h->first_recno); - __db_msg(env, "\tcur_recno: %lu", (u_long)h->cur_recno); - __db_msg(env, "\tre_len: %#lx re_pad: %lu", - (u_long)h->re_len, (u_long)h->re_pad); - __db_msg(env, "\trec_page: %lu", (u_long)h->rec_page); - __db_msg(env, "\tpage_ext: %lu", (u_long)h->page_ext); - - return (0); -} - -/* - * __db_prnpage - * -- Print out a specific page. - * - * PUBLIC: int __db_prnpage __P((DB *, DB_TXN *, db_pgno_t)); - */ -int -__db_prnpage(dbp, txn, pgno) - DB *dbp; - DB_TXN *txn; - db_pgno_t pgno; -{ - DB_MPOOLFILE *mpf; - PAGE *h; - int ret, t_ret; - - mpf = dbp->mpf; - - if ((ret = __memp_fget(mpf, &pgno, NULL, txn, 0, &h)) != 0) - return (ret); - - ret = __db_prpage(dbp, h, DB_PR_PAGE); - - if ((t_ret = __memp_fput(mpf, NULL, h, dbp->priority)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_prpage - * -- Print out a page. - * - * PUBLIC: int __db_prpage __P((DB *, PAGE *, u_int32_t)); - */ -int -__db_prpage(dbp, h, flags) - DB *dbp; - PAGE *h; - u_int32_t flags; -{ - BINTERNAL *bi; - BKEYDATA *bk; - DB_MSGBUF mb; - ENV *env; - HOFFPAGE a_hkd; - QAMDATA *qp, *qep; - RINTERNAL *ri; - db_indx_t dlen, len, i, *inp; - db_pgno_t pgno; - db_recno_t recno; - u_int32_t pagesize, qlen; - u_int8_t *ep, *hk, *p; - int deleted, ret; - const char *s; - void *sp; - - env = dbp->env; - DB_MSGBUF_INIT(&mb); - - /* - * If we're doing recovery testing and this page is P_INVALID, - * assume it's a page that's on the free list, and don't display it. - */ - if (LF_ISSET(DB_PR_RECOVERYTEST) && TYPE(h) == P_INVALID) - return (0); - - if ((s = __db_pagetype_to_string(TYPE(h))) == NULL) { - __db_msg(env, "ILLEGAL PAGE TYPE: page: %lu type: %lu", - (u_long)h->pgno, (u_long)TYPE(h)); - return (EINVAL); - } - - /* - * !!! - * Find out the page size. We don't want to do it the "right" way, - * by reading the value from the meta-data page, that's going to be - * slow. Reach down into the mpool region. - */ - pagesize = (u_int32_t)dbp->mpf->mfp->stat.st_pagesize; - - /* Page number, page type. */ - __db_msgadd(env, &mb, "page %lu: %s:", (u_long)h->pgno, s); - - /* - * LSNs on a metadata page will be different from the original after an - * abort, in some cases. Don't display them if we're testing recovery. - */ - if (!LF_ISSET(DB_PR_RECOVERYTEST) || - (TYPE(h) != P_BTREEMETA && TYPE(h) != P_HASHMETA && - TYPE(h) != P_QAMMETA && TYPE(h) != P_QAMDATA)) - __db_msgadd(env, &mb, " LSN [%lu][%lu]:", - (u_long)LSN(h).file, (u_long)LSN(h).offset); - - /* - * Page level (only applicable for Btree/Recno, but we always display - * it, for no particular reason. - */ - __db_msgadd(env, &mb, " level %lu", (u_long)h->level); - - /* Record count. */ - if (TYPE(h) == P_IBTREE || - TYPE(h) == P_IRECNO || (TYPE(h) == P_LRECNO && - h->pgno == ((BTREE *)dbp->bt_internal)->bt_root)) - __db_msgadd(env, &mb, " records: %lu", (u_long)RE_NREC(h)); - DB_MSGBUF_FLUSH(env, &mb); - - switch (TYPE(h)) { - case P_BTREEMETA: - return (__db_bmeta(dbp, (BTMETA *)h, flags)); - case P_HASHMETA: - return (__db_hmeta(dbp, (HMETA *)h, flags)); - case P_QAMMETA: - return (__db_qmeta(dbp, (QMETA *)h, flags)); - case P_QAMDATA: /* Should be meta->start. */ - if (!LF_ISSET(DB_PR_PAGE)) - return (0); - - qlen = ((QUEUE *)dbp->q_internal)->re_len; - recno = (h->pgno - 1) * QAM_RECNO_PER_PAGE(dbp) + 1; - i = 0; - qep = (QAMDATA *)((u_int8_t *)h + pagesize - qlen); - for (qp = QAM_GET_RECORD(dbp, h, i); qp < qep; - recno++, i++, qp = QAM_GET_RECORD(dbp, h, i)) { - if (!F_ISSET(qp, QAM_SET)) - continue; - - __db_msgadd(env, &mb, "%s", - F_ISSET(qp, QAM_VALID) ? "\t" : " D"); - __db_msgadd(env, &mb, "[%03lu] %4lu ", (u_long)recno, - (u_long)((u_int8_t *)qp - (u_int8_t *)h)); - __db_prbytes(env, &mb, qp->data, qlen); - } - return (0); - default: - break; - } - - s = "\t"; - if (TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO) { - __db_msgadd(env, &mb, "%sprev: %4lu next: %4lu", - s, (u_long)PREV_PGNO(h), (u_long)NEXT_PGNO(h)); - s = " "; - } - if (TYPE(h) == P_OVERFLOW) { - __db_msgadd(env, &mb, - "%sref cnt: %4lu ", s, (u_long)OV_REF(h)); - __db_prbytes(env, - &mb, (u_int8_t *)h + P_OVERHEAD(dbp), OV_LEN(h)); - return (0); - } - __db_msgadd(env, &mb, "%sentries: %4lu", s, (u_long)NUM_ENT(h)); - __db_msgadd(env, &mb, " offset: %4lu", (u_long)HOFFSET(h)); - DB_MSGBUF_FLUSH(env, &mb); - - if (TYPE(h) == P_INVALID || !LF_ISSET(DB_PR_PAGE)) - return (0); - - ret = 0; - inp = P_INP(dbp, h); - for (i = 0; i < NUM_ENT(h); i++) { - if ((uintptr_t)(P_ENTRY(dbp, h, i) - (u_int8_t *)h) < - (uintptr_t)(P_OVERHEAD(dbp)) || - (size_t)(P_ENTRY(dbp, h, i) - (u_int8_t *)h) >= pagesize) { - __db_msg(env, - "ILLEGAL PAGE OFFSET: indx: %lu of %lu", - (u_long)i, (u_long)inp[i]); - ret = EINVAL; - continue; - } - deleted = 0; - switch (TYPE(h)) { - case P_HASH_UNSORTED: - case P_HASH: - case P_IBTREE: - case P_IRECNO: - sp = P_ENTRY(dbp, h, i); - break; - case P_LBTREE: - sp = P_ENTRY(dbp, h, i); - deleted = i % 2 == 0 && - B_DISSET(GET_BKEYDATA(dbp, h, i + O_INDX)->type); - break; - case P_LDUP: - case P_LRECNO: - sp = P_ENTRY(dbp, h, i); - deleted = B_DISSET(GET_BKEYDATA(dbp, h, i)->type); - break; - default: - goto type_err; - } - __db_msgadd(env, &mb, "%s", deleted ? " D" : "\t"); - __db_msgadd( - env, &mb, "[%03lu] %4lu ", (u_long)i, (u_long)inp[i]); - switch (TYPE(h)) { - case P_HASH_UNSORTED: - case P_HASH: - hk = sp; - switch (HPAGE_PTYPE(hk)) { - case H_OFFDUP: - memcpy(&pgno, - HOFFDUP_PGNO(hk), sizeof(db_pgno_t)); - __db_msgadd(env, &mb, - "%4lu [offpage dups]", (u_long)pgno); - DB_MSGBUF_FLUSH(env, &mb); - break; - case H_DUPLICATE: - /* - * If this is the first item on a page, then - * we cannot figure out how long it is, so - * we only print the first one in the duplicate - * set. - */ - if (i != 0) - len = LEN_HKEYDATA(dbp, h, 0, i); - else - len = 1; - - __db_msgadd(env, &mb, "Duplicates:"); - DB_MSGBUF_FLUSH(env, &mb); - for (p = HKEYDATA_DATA(hk), - ep = p + len; p < ep;) { - memcpy(&dlen, p, sizeof(db_indx_t)); - p += sizeof(db_indx_t); - __db_msgadd(env, &mb, "\t\t"); - __db_prbytes(env, &mb, p, dlen); - p += sizeof(db_indx_t) + dlen; - } - break; - case H_KEYDATA: - __db_prbytes(env, &mb, HKEYDATA_DATA(hk), - LEN_HKEYDATA(dbp, h, i == 0 ? - pagesize : 0, i)); - break; - case H_OFFPAGE: - memcpy(&a_hkd, hk, HOFFPAGE_SIZE); - __db_msgadd(env, &mb, - "overflow: total len: %4lu page: %4lu", - (u_long)a_hkd.tlen, (u_long)a_hkd.pgno); - DB_MSGBUF_FLUSH(env, &mb); - break; - default: - DB_MSGBUF_FLUSH(env, &mb); - __db_msg(env, "ILLEGAL HASH PAGE TYPE: %lu", - (u_long)HPAGE_PTYPE(hk)); - ret = EINVAL; - break; - } - break; - case P_IBTREE: - bi = sp; - - if (F_ISSET(dbp, DB_AM_RECNUM)) - __db_msgadd(env, &mb, - "count: %4lu ", (u_long)bi->nrecs); - __db_msgadd(env, &mb, - "pgno: %4lu type: %lu ", - (u_long)bi->pgno, (u_long)bi->type); - switch (B_TYPE(bi->type)) { - case B_KEYDATA: - __db_prbytes(env, &mb, bi->data, bi->len); - break; - case B_DUPLICATE: - case B_OVERFLOW: - __db_proff(env, &mb, bi->data); - break; - default: - DB_MSGBUF_FLUSH(env, &mb); - __db_msg(env, "ILLEGAL BINTERNAL TYPE: %lu", - (u_long)B_TYPE(bi->type)); - ret = EINVAL; - break; - } - break; - case P_IRECNO: - ri = sp; - __db_msgadd(env, &mb, "entries %4lu pgno %4lu", - (u_long)ri->nrecs, (u_long)ri->pgno); - DB_MSGBUF_FLUSH(env, &mb); - break; - case P_LBTREE: - case P_LDUP: - case P_LRECNO: - bk = sp; - switch (B_TYPE(bk->type)) { - case B_KEYDATA: - __db_prbytes(env, &mb, bk->data, bk->len); - break; - case B_DUPLICATE: - case B_OVERFLOW: - __db_proff(env, &mb, bk); - break; - default: - DB_MSGBUF_FLUSH(env, &mb); - __db_msg(env, - "ILLEGAL DUPLICATE/LBTREE/LRECNO TYPE: %lu", - (u_long)B_TYPE(bk->type)); - ret = EINVAL; - break; - } - break; - default: -type_err: DB_MSGBUF_FLUSH(env, &mb); - __db_msg(env, - "ILLEGAL PAGE TYPE: %lu", (u_long)TYPE(h)); - ret = EINVAL; - continue; - } - } - return (ret); -} - -/* - * __db_prbytes -- - * Print out a data element. - * - * PUBLIC: void __db_prbytes __P((ENV *, DB_MSGBUF *, u_int8_t *, u_int32_t)); - */ -void -__db_prbytes(env, mbp, bytes, len) - ENV *env; - DB_MSGBUF *mbp; - u_int8_t *bytes; - u_int32_t len; -{ - u_int8_t *p; - u_int32_t i; - int msg_truncated; - - __db_msgadd(env, mbp, "len: %3lu", (u_long)len); - if (len != 0) { - __db_msgadd(env, mbp, " data: "); - - /* - * Print the first 20 bytes of the data. If that chunk is - * all printable characters, print it as text, else print it - * in hex. We have this heuristic because we're displaying - * things like lock objects that could be either text or data. - */ - if (len > 20) { - len = 20; - msg_truncated = 1; - } else - msg_truncated = 0; - for (p = bytes, i = len; i > 0; --i, ++p) - if (!isprint((int)*p) && *p != '\t' && *p != '\n') - break; - if (i == 0) - for (p = bytes, i = len; i > 0; --i, ++p) - __db_msgadd(env, mbp, "%c", *p); - else - for (p = bytes, i = len; i > 0; --i, ++p) - __db_msgadd(env, mbp, "%#.2x", (u_int)*p); - if (msg_truncated) - __db_msgadd(env, mbp, "..."); - } - DB_MSGBUF_FLUSH(env, mbp); -} - -/* - * __db_proff -- - * Print out an off-page element. - */ -static void -__db_proff(env, mbp, vp) - ENV *env; - DB_MSGBUF *mbp; - void *vp; -{ - BOVERFLOW *bo; - - bo = vp; - switch (B_TYPE(bo->type)) { - case B_OVERFLOW: - __db_msgadd(env, mbp, "overflow: total len: %4lu page: %4lu", - (u_long)bo->tlen, (u_long)bo->pgno); - break; - case B_DUPLICATE: - __db_msgadd( - env, mbp, "duplicate: page: %4lu", (u_long)bo->pgno); - break; - default: - /* NOTREACHED */ - break; - } - DB_MSGBUF_FLUSH(env, mbp); -} - -/* - * __db_prflags -- - * Print out flags values. - * - * PUBLIC: void __db_prflags __P((ENV *, DB_MSGBUF *, - * PUBLIC: u_int32_t, const FN *, const char *, const char *)); - */ -void -__db_prflags(env, mbp, flags, fn, prefix, suffix) - ENV *env; - DB_MSGBUF *mbp; - u_int32_t flags; - FN const *fn; - const char *prefix, *suffix; -{ - DB_MSGBUF mb; - const FN *fnp; - int found, standalone; - const char *sep; - - if (fn == NULL) - return; - - /* - * If it's a standalone message, output the suffix (which will be the - * label), regardless of whether we found anything or not, and flush - * the line. - */ - if (mbp == NULL) { - standalone = 1; - mbp = &mb; - DB_MSGBUF_INIT(mbp); - } else - standalone = 0; - - sep = prefix == NULL ? "" : prefix; - for (found = 0, fnp = fn; fnp->mask != 0; ++fnp) - if (LF_ISSET(fnp->mask)) { - __db_msgadd(env, mbp, "%s%s", sep, fnp->name); - sep = ", "; - found = 1; - } - - if ((standalone || found) && suffix != NULL) - __db_msgadd(env, mbp, "%s", suffix); - if (standalone) - DB_MSGBUF_FLUSH(env, mbp); -} - -/* - * __db_lockmode_to_string -- - * Return the name of the lock mode. - * - * PUBLIC: const char * __db_lockmode_to_string __P((db_lockmode_t)); - */ -const char * -__db_lockmode_to_string(mode) - db_lockmode_t mode; -{ - switch (mode) { - case DB_LOCK_NG: - return ("Not granted"); - case DB_LOCK_READ: - return ("Shared/read"); - case DB_LOCK_WRITE: - return ("Exclusive/write"); - case DB_LOCK_WAIT: - return ("Wait for event"); - case DB_LOCK_IWRITE: - return ("Intent exclusive/write"); - case DB_LOCK_IREAD: - return ("Intent shared/read"); - case DB_LOCK_IWR: - return ("Intent to read/write"); - case DB_LOCK_READ_UNCOMMITTED: - return ("Read uncommitted"); - case DB_LOCK_WWRITE: - return ("Was written"); - default: - break; - } - return ("UNKNOWN LOCK MODE"); -} - -/* - * __db_pagetype_to_string -- - * Return the name of the specified page type. - */ -static const char * -__db_pagetype_to_string(type) - u_int32_t type; -{ - char *s; - - s = NULL; - switch (type) { - case P_BTREEMETA: - s = "btree metadata"; - break; - case P_LDUP: - s = "duplicate"; - break; - case P_HASH_UNSORTED: - s = "hash unsorted"; - break; - case P_HASH: - s = "hash"; - break; - case P_HASHMETA: - s = "hash metadata"; - break; - case P_IBTREE: - s = "btree internal"; - break; - case P_INVALID: - s = "invalid"; - break; - case P_IRECNO: - s = "recno internal"; - break; - case P_LBTREE: - s = "btree leaf"; - break; - case P_LRECNO: - s = "recno leaf"; - break; - case P_OVERFLOW: - s = "overflow"; - break; - case P_QAMMETA: - s = "queue metadata"; - break; - case P_QAMDATA: - s = "queue"; - break; - default: - /* Just return a NULL. */ - break; - } - return (s); -} - -#else /* !HAVE_STATISTICS */ - -/* - * __db_dumptree -- - * Dump the tree to a file. - * - * PUBLIC: int __db_dumptree __P((DB *, DB_TXN *, char *, char *)); - */ -int -__db_dumptree(dbp, txn, op, name) - DB *dbp; - DB_TXN *txn; - char *op, *name; -{ - COMPQUIET(txn, NULL); - COMPQUIET(op, NULL); - COMPQUIET(name, NULL); - - return (__db_stat_not_built(dbp->env)); -} - -/* - * __db_get_flags_fn -- - * Return the __db_flags_fn array. - * - * PUBLIC: const FN * __db_get_flags_fn __P((void)); - */ -const FN * -__db_get_flags_fn() -{ - /* - * !!! - * The Tcl API uses this interface, stub it off. - */ - return (NULL); -} -#endif - -/* - * __db_dump_pp -- - * DB->dump pre/post processing. - * - * PUBLIC: int __db_dump_pp __P((DB *, const char *, - * PUBLIC: int (*)(void *, const void *), void *, int, int)); - */ -int -__db_dump_pp(dbp, subname, callback, handle, pflag, keyflag) - DB *dbp; - const char *subname; - int (*callback) __P((void *, const void *)); - void *handle; - int pflag, keyflag; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->dump"); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 1)) != 0) { - handle_check = 0; - goto err; - } - - ret = __db_dump(dbp, subname, callback, handle, pflag, keyflag); - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - -err: ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_dump -- - * DB->dump. - * - * PUBLIC: int __db_dump __P((DB *, const char *, - * PUBLIC: int (*)(void *, const void *), void *, int, int)); - */ -int -__db_dump(dbp, subname, callback, handle, pflag, keyflag) - DB *dbp; - const char *subname; - int (*callback) __P((void *, const void *)); - void *handle; - int pflag, keyflag; -{ - DBC *dbcp; - DBT key, data; - DBT keyret, dataret; - ENV *env; - db_recno_t recno; - int is_recno, ret, t_ret; - void *pointer; - - env = dbp->env; - - if ((ret = __db_prheader( - dbp, subname, pflag, keyflag, handle, callback, NULL, 0)) != 0) - return (ret); - - /* - * Get a cursor and step through the database, printing out each - * key/data pair. - */ - if ((ret = __db_cursor(dbp, NULL, NULL, &dbcp, 0)) != 0) - return (ret); - - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - if ((ret = __os_malloc(env, 1024 * 1024, &data.data)) != 0) - goto err; - data.ulen = 1024 * 1024; - data.flags = DB_DBT_USERMEM; - is_recno = (dbp->type == DB_RECNO || dbp->type == DB_QUEUE); - keyflag = is_recno ? keyflag : 1; - if (is_recno) { - keyret.data = &recno; - keyret.size = sizeof(recno); - } - -retry: while ((ret = - __dbc_get(dbcp, &key, &data, DB_NEXT | DB_MULTIPLE_KEY)) == 0) { - DB_MULTIPLE_INIT(pointer, &data); - for (;;) { - if (is_recno) - DB_MULTIPLE_RECNO_NEXT(pointer, &data, - recno, dataret.data, dataret.size); - else - DB_MULTIPLE_KEY_NEXT(pointer, - &data, keyret.data, - keyret.size, dataret.data, dataret.size); - - if (dataret.data == NULL) - break; - - if ((keyflag && - (ret = __db_prdbt(&keyret, pflag, " ", - handle, callback, is_recno)) != 0) || - (ret = __db_prdbt(&dataret, pflag, " ", - handle, callback, 0)) != 0) - goto err; - } - } - if (ret == DB_BUFFER_SMALL) { - data.size = (u_int32_t)DB_ALIGN(data.size, 1024); - if ((ret = __os_realloc(env, data.size, &data.data)) != 0) - goto err; - data.ulen = data.size; - goto retry; - } - if (ret == DB_NOTFOUND) - ret = 0; - - if ((t_ret = __db_prfooter(handle, callback)) != 0 && ret == 0) - ret = t_ret; - -err: if ((t_ret = __dbc_close(dbcp)) != 0 && ret == 0) - ret = t_ret; - if (data.data != NULL) - __os_free(env, data.data); - - return (ret); -} - -/* - * __db_prdbt -- - * Print out a DBT data element. - * - * PUBLIC: int __db_prdbt __P((DBT *, int, const char *, void *, - * PUBLIC: int (*)(void *, const void *), int)); - */ -int -__db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno) - DBT *dbtp; - int checkprint; - const char *prefix; - void *handle; - int (*callback) __P((void *, const void *)); - int is_recno; -{ - static const u_char hex[] = "0123456789abcdef"; - db_recno_t recno; - size_t len; - int ret; -#define DBTBUFLEN 100 - u_int8_t *p, *hp; - char buf[DBTBUFLEN], hbuf[DBTBUFLEN]; - - /* - * !!! - * This routine is the routine that dumps out items in the format - * used by db_dump(1) and db_load(1). This means that the format - * cannot change. - */ - if (prefix != NULL && (ret = callback(handle, prefix)) != 0) - return (ret); - if (is_recno) { - /* - * We're printing a record number, and this has to be done - * in a platform-independent way. So we use the numeral in - * straight ASCII. - */ - (void)__ua_memcpy(&recno, dbtp->data, sizeof(recno)); - snprintf(buf, DBTBUFLEN, "%lu", (u_long)recno); - - /* If we're printing data as hex, print keys as hex too. */ - if (!checkprint) { - for (len = strlen(buf), p = (u_int8_t *)buf, - hp = (u_int8_t *)hbuf; len-- > 0; ++p) { - *hp++ = hex[(u_int8_t)(*p & 0xf0) >> 4]; - *hp++ = hex[*p & 0x0f]; - } - *hp = '\0'; - ret = callback(handle, hbuf); - } else - ret = callback(handle, buf); - - if (ret != 0) - return (ret); - } else if (checkprint) { - for (len = dbtp->size, p = dbtp->data; len--; ++p) - if (isprint((int)*p)) { - if (*p == '\\' && - (ret = callback(handle, "\\")) != 0) - return (ret); - snprintf(buf, DBTBUFLEN, "%c", *p); - if ((ret = callback(handle, buf)) != 0) - return (ret); - } else { - snprintf(buf, DBTBUFLEN, "\\%c%c", - hex[(u_int8_t)(*p & 0xf0) >> 4], - hex[*p & 0x0f]); - if ((ret = callback(handle, buf)) != 0) - return (ret); - } - } else - for (len = dbtp->size, p = dbtp->data; len--; ++p) { - snprintf(buf, DBTBUFLEN, "%c%c", - hex[(u_int8_t)(*p & 0xf0) >> 4], - hex[*p & 0x0f]); - if ((ret = callback(handle, buf)) != 0) - return (ret); - } - - return (callback(handle, "\n")); -} - -/* - * __db_prheader -- - * Write out header information in the format expected by db_load. - * - * PUBLIC: int __db_prheader __P((DB *, const char *, int, int, void *, - * PUBLIC: int (*)(void *, const void *), VRFY_DBINFO *, db_pgno_t)); - */ -int -__db_prheader(dbp, subname, pflag, keyflag, handle, callback, vdp, meta_pgno) - DB *dbp; - const char *subname; - int pflag, keyflag; - void *handle; - int (*callback) __P((void *, const void *)); - VRFY_DBINFO *vdp; - db_pgno_t meta_pgno; -{ - DBT dbt; - DBTYPE dbtype; - ENV *env; - VRFY_PAGEINFO *pip; - u_int32_t flags, tmp_u_int32; - size_t buflen; - char *buf; - int using_vdp, ret, t_ret, tmp_int; - - ret = 0; - buf = NULL; - COMPQUIET(buflen, 0); - - /* - * If dbp is NULL, then pip is guaranteed to be non-NULL; we only ever - * call __db_prheader with a NULL dbp from one case inside __db_prdbt, - * and this is a special subdatabase for "lost" items. In this case - * we have a vdp (from which we'll get a pip). In all other cases, we - * will have a non-NULL dbp (and vdp may or may not be NULL depending - * on whether we're salvaging). - */ - if (dbp == NULL) - env = NULL; - else - env = dbp->env; - DB_ASSERT(env, dbp != NULL || vdp != NULL); - - /* - * If we've been passed a verifier statistics object, use that; we're - * being called in a context where dbp->stat is unsafe. - * - * Also, the verifier may set the pflag on a per-salvage basis. If so, - * respect that. - */ - if (vdp != NULL) { - if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0) - return (ret); - - if (F_ISSET(vdp, SALVAGE_PRINTABLE)) - pflag = 1; - using_vdp = 1; - } else { - pip = NULL; - using_vdp = 0; - } - - /* - * If dbp is NULL, make it a btree. Otherwise, set dbtype to whatever - * appropriate type for the specified meta page, or the type of the dbp. - */ - if (dbp == NULL) - dbtype = DB_BTREE; - else if (using_vdp) - switch (pip->type) { - case P_BTREEMETA: - if (F_ISSET(pip, VRFY_IS_RECNO)) - dbtype = DB_RECNO; - else - dbtype = DB_BTREE; - break; - case P_HASHMETA: - dbtype = DB_HASH; - break; - case P_QAMMETA: - dbtype = DB_QUEUE; - break; - default: - /* - * If the meta page is of a bogus type, it's because - * we have a badly corrupt database. (We must be in - * the verifier for pip to be non-NULL.) Pretend we're - * a Btree and salvage what we can. - */ - DB_ASSERT(env, F_ISSET(dbp, DB_AM_VERIFYING)); - dbtype = DB_BTREE; - break; - } - else - dbtype = dbp->type; - - if ((ret = callback(handle, "VERSION=3\n")) != 0) - goto err; - if (pflag) { - if ((ret = callback(handle, "format=print\n")) != 0) - goto err; - } else if ((ret = callback(handle, "format=bytevalue\n")) != 0) - goto err; - - /* - * 64 bytes is long enough, as a minimum bound, for any of the - * fields besides subname. Subname uses __db_prdbt and therefore - * does not need buffer space here. - */ - buflen = 64; - if ((ret = __os_malloc(env, buflen, &buf)) != 0) - goto err; - if (subname != NULL) { - snprintf(buf, buflen, "database="); - if ((ret = callback(handle, buf)) != 0) - goto err; - DB_INIT_DBT(dbt, subname, strlen(subname)); - if ((ret = __db_prdbt(&dbt, 1, NULL, handle, callback, 0)) != 0) - goto err; - } - switch (dbtype) { - case DB_BTREE: - if ((ret = callback(handle, "type=btree\n")) != 0) - goto err; - if (using_vdp) - tmp_int = F_ISSET(pip, VRFY_HAS_RECNUMS) ? 1 : 0; - else { - if ((ret = __db_get_flags(dbp, &flags)) != 0) { - __db_err(env, ret, "DB->get_flags"); - goto err; - } - tmp_int = F_ISSET(dbp, DB_AM_RECNUM) ? 1 : 0; - } - if (tmp_int && (ret = callback(handle, "recnum=1\n")) != 0) - goto err; - - if (using_vdp) - tmp_u_int32 = pip->bt_minkey; - else - if ((ret = - __bam_get_bt_minkey(dbp, &tmp_u_int32)) != 0) { - __db_err(env, ret, "DB->get_bt_minkey"); - goto err; - } - if (tmp_u_int32 != 0 && tmp_u_int32 != DEFMINKEYPAGE) { - snprintf(buf, buflen, - "bt_minkey=%lu\n", (u_long)tmp_u_int32); - if ((ret = callback(handle, buf)) != 0) - goto err; - } - break; - case DB_HASH: -#ifdef HAVE_HASH - if ((ret = callback(handle, "type=hash\n")) != 0) - goto err; - if (using_vdp) - tmp_u_int32 = pip->h_ffactor; - else - if ((ret = - __ham_get_h_ffactor(dbp, &tmp_u_int32)) != 0) { - __db_err(env, ret, "DB->get_h_ffactor"); - goto err; - } - if (tmp_u_int32 != 0) { - snprintf(buf, buflen, - "h_ffactor=%lu\n", (u_long)tmp_u_int32); - if ((ret = callback(handle, buf)) != 0) - goto err; - } - - if (using_vdp) - tmp_u_int32 = pip->h_nelem; - else - if ((ret = __ham_get_h_nelem(dbp, &tmp_u_int32)) != 0) { - __db_err(env, ret, "DB->get_h_nelem"); - goto err; - } - /* - * Hash databases have an h_nelem field of 0 or 1, neither - * of those values is interesting. - */ - if (tmp_u_int32 > 1) { - snprintf(buf, buflen, - "h_nelem=%lu\n", (u_long)tmp_u_int32); - if ((ret = callback(handle, buf)) != 0) - goto err; - } - break; -#else - ret = __db_no_hash_am(env); - goto err; -#endif - case DB_QUEUE: -#ifdef HAVE_QUEUE - if ((ret = callback(handle, "type=queue\n")) != 0) - goto err; - if (using_vdp) - tmp_u_int32 = vdp->re_len; - else - if ((ret = __ram_get_re_len(dbp, &tmp_u_int32)) != 0) { - __db_err(env, ret, "DB->get_re_len"); - goto err; - } - snprintf(buf, buflen, "re_len=%lu\n", (u_long)tmp_u_int32); - if ((ret = callback(handle, buf)) != 0) - goto err; - - if (using_vdp) - tmp_int = (int)vdp->re_pad; - else - if ((ret = __ram_get_re_pad(dbp, &tmp_int)) != 0) { - __db_err(env, ret, "DB->get_re_pad"); - goto err; - } - if (tmp_int != 0 && tmp_int != ' ') { - snprintf(buf, buflen, "re_pad=%#x\n", tmp_int); - if ((ret = callback(handle, buf)) != 0) - goto err; - } - - if (using_vdp) - tmp_u_int32 = vdp->page_ext; - else - if ((ret = - __qam_get_extentsize(dbp, &tmp_u_int32)) != 0) { - __db_err(env, ret, "DB->get_q_extentsize"); - goto err; - } - if (tmp_u_int32 != 0) { - snprintf(buf, buflen, - "extentsize=%lu\n", (u_long)tmp_u_int32); - if ((ret = callback(handle, buf)) != 0) - goto err; - } - break; -#else - ret = __db_no_queue_am(env); - goto err; -#endif - case DB_RECNO: - if ((ret = callback(handle, "type=recno\n")) != 0) - goto err; - if (using_vdp) - tmp_int = F_ISSET(pip, VRFY_IS_RRECNO) ? 1 : 0; - else - tmp_int = F_ISSET(dbp, DB_AM_RENUMBER) ? 1 : 0; - if (tmp_int != 0 && - (ret = callback(handle, "renumber=1\n")) != 0) - goto err; - - if (using_vdp) - tmp_int = F_ISSET(pip, VRFY_IS_FIXEDLEN) ? 1 : 0; - else - tmp_int = F_ISSET(dbp, DB_AM_FIXEDLEN) ? 1 : 0; - if (tmp_int) { - if (using_vdp) - tmp_u_int32 = pip->re_len; - else - if ((ret = - __ram_get_re_len(dbp, &tmp_u_int32)) != 0) { - __db_err(env, ret, "DB->get_re_len"); - goto err; - } - snprintf(buf, buflen, - "re_len=%lu\n", (u_long)tmp_u_int32); - if ((ret = callback(handle, buf)) != 0) - goto err; - - if (using_vdp) - tmp_int = (int)pip->re_pad; - else - if ((ret = - __ram_get_re_pad(dbp, &tmp_int)) != 0) { - __db_err(env, ret, "DB->get_re_pad"); - goto err; - } - if (tmp_int != 0 && tmp_int != ' ') { - snprintf(buf, - buflen, "re_pad=%#x\n", (u_int)tmp_int); - if ((ret = callback(handle, buf)) != 0) - goto err; - } - } - break; - case DB_UNKNOWN: /* Impossible. */ - ret = __db_unknown_path(env, "__db_prheader"); - goto err; - } - - if (using_vdp) { - if (F_ISSET(pip, VRFY_HAS_CHKSUM)) - if ((ret = callback(handle, "chksum=1\n")) != 0) - goto err; - if (F_ISSET(pip, VRFY_HAS_DUPS)) - if ((ret = callback(handle, "duplicates=1\n")) != 0) - goto err; - if (F_ISSET(pip, VRFY_HAS_DUPSORT)) - if ((ret = callback(handle, "dupsort=1\n")) != 0) - goto err; -#ifdef HAVE_COMPRESSION - if (F_ISSET(pip, VRFY_HAS_COMPRESS)) - if ((ret = callback(handle, "compressed=1\n")) != 0) - goto err; -#endif - /* - * !!! - * We don't know if the page size was the default if we're - * salvaging. It doesn't seem that interesting to have, so - * we ignore it for now. - */ - } else { - if (F_ISSET(dbp, DB_AM_CHKSUM)) - if ((ret = callback(handle, "chksum=1\n")) != 0) - goto err; - if (F_ISSET(dbp, DB_AM_DUP)) - if ((ret = callback(handle, "duplicates=1\n")) != 0) - goto err; - if (F_ISSET(dbp, DB_AM_DUPSORT)) - if ((ret = callback(handle, "dupsort=1\n")) != 0) - goto err; -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(dbp)) - if ((ret = callback(handle, "compressed=1\n")) != 0) - goto err; -#endif - if (!F_ISSET(dbp, DB_AM_PGDEF)) { - snprintf(buf, buflen, - "db_pagesize=%lu\n", (u_long)dbp->pgsize); - if ((ret = callback(handle, buf)) != 0) - goto err; - } - } - -#ifdef HAVE_PARTITION - if (DB_IS_PARTITIONED(dbp) && - F_ISSET((DB_PARTITION *)dbp->p_internal, PART_RANGE)) { - DBT *keys; - u_int32_t i; - - if ((ret = __partition_get_keys(dbp, &tmp_u_int32, &keys)) != 0) - goto err; - if (tmp_u_int32 != 0) { - snprintf(buf, - buflen, "nparts=%lu\n", (u_long)tmp_u_int32); - if ((ret = callback(handle, buf)) != 0) - goto err; - for (i = 0; i < tmp_u_int32 - 1; i++) - if ((ret = __db_prdbt(&keys[i], - pflag, " ", handle, callback, 0)) != 0) - goto err; - } - } -#endif - - if (keyflag && (ret = callback(handle, "keys=1\n")) != 0) - goto err; - - ret = callback(handle, "HEADER=END\n"); - -err: if (using_vdp && - (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - if (buf != NULL) - __os_free(env, buf); - - return (ret); -} - -/* - * __db_prfooter -- - * Print the footer that marks the end of a DB dump. This is trivial, - * but for consistency's sake we don't want to put its literal contents - * in multiple places. - * - * PUBLIC: int __db_prfooter __P((void *, int (*)(void *, const void *))); - */ -int -__db_prfooter(handle, callback) - void *handle; - int (*callback) __P((void *, const void *)); -{ - return (callback(handle, "DATA=END\n")); -} - -/* - * __db_pr_callback -- - * Callback function for using pr_* functions from C. - * - * PUBLIC: int __db_pr_callback __P((void *, const void *)); - */ -int -__db_pr_callback(handle, str_arg) - void *handle; - const void *str_arg; -{ - char *str; - FILE *f; - - str = (char *)str_arg; - f = (FILE *)handle; - - if (fprintf(f, "%s", str) != (int)strlen(str)) - return (EIO); - - return (0); -} - -/* - * __db_dbtype_to_string -- - * Return the name of the database type. - * - * PUBLIC: const char * __db_dbtype_to_string __P((DBTYPE)); - */ -const char * -__db_dbtype_to_string(type) - DBTYPE type; -{ - switch (type) { - case DB_BTREE: - return ("btree"); - case DB_HASH: - return ("hash"); - case DB_RECNO: - return ("recno"); - case DB_QUEUE: - return ("queue"); - case DB_UNKNOWN: - default: - break; - } - return ("UNKNOWN TYPE"); -} diff --git a/db/db_rec.c b/db/db_rec.c deleted file mode 100644 index 02fe096..0000000 --- a/db/db_rec.c +++ /dev/null @@ -1,1859 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996, 2010 Oracle and/or its affiliates. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/hash.h" - -static int __db_pg_free_recover_int __P((ENV *, DB_THREAD_INFO *, - __db_pg_freedata_args *, DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int)); -static int __db_pg_free_recover_42_int __P((ENV *, DB_THREAD_INFO *, - __db_pg_freedata_42_args *, - DB *, DB_LSN *, DB_MPOOLFILE *, db_recops, int)); - -/* - * PUBLIC: int __db_addrem_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - * - * This log message is generated whenever we add or remove a duplicate - * to/from a duplicate page. On recover, we just do the opposite. - */ -int -__db_addrem_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_addrem_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, modified, ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - pagep = NULL; - REC_PRINT(__db_addrem_print); - REC_INTRO(__db_addrem_read, ip, 1); - - REC_FGET(mpf, ip, argp->pgno, &pagep, done); - modified = 0; - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); - CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); - if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_DUP) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_DUP)) { - /* Need to redo an add, or undo a delete. */ - REC_DIRTY(mpf, ip, dbc->priority, &pagep); - if ((ret = __db_pitem(dbc, pagep, argp->indx, argp->nbytes, - argp->hdr.size == 0 ? NULL : &argp->hdr, - argp->dbt.size == 0 ? NULL : &argp->dbt)) != 0) - goto out; - modified = 1; - - } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_DUP) || - (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_DUP)) { - /* Need to undo an add, or redo a delete. */ - REC_DIRTY(mpf, ip, dbc->priority, &pagep); - if ((ret = __db_ditem(dbc, - pagep, argp->indx, argp->nbytes)) != 0) - goto out; - modified = 1; - } - - if (modified) { - if (DB_REDO(op)) - LSN(pagep) = *lsnp; - else - LSN(pagep) = argp->pagelsn; - } - - if ((ret = __memp_fput(mpf, ip, pagep, dbc->priority)) != 0) - goto out; - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, ip, pagep, dbc->priority); - REC_CLOSE; -} - -/* - * PUBLIC: int __db_big_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_big_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_big_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, modified, ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - pagep = NULL; - REC_PRINT(__db_big_print); - REC_INTRO(__db_big_read, ip, 0); - - REC_FGET(mpf, ip, argp->pgno, &pagep, ppage); - modified = 0; - - /* - * There are three pages we need to check. The one on which we are - * adding data, the previous one whose next_pointer may have - * been updated, and the next one whose prev_pointer may have - * been updated. - */ - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->pagelsn); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->pagelsn); - CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); - if ((cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) || - (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_REM_BIG)) { - /* We are either redo-ing an add, or undoing a delete. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, argp->pgno, argp->prev_pgno, - argp->next_pgno, 0, P_OVERFLOW); - OV_LEN(pagep) = argp->dbt.size; - OV_REF(pagep) = 1; - memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp), argp->dbt.data, - argp->dbt.size); - PREV_PGNO(pagep) = argp->prev_pgno; - modified = 1; - } else if ((cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_ADD_BIG) || - (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_REM_BIG)) { - /* - * We are either undo-ing an add or redo-ing a delete. - * The page is about to be reclaimed in either case, so - * there really isn't anything to do here. - */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - modified = 1; - } else if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_APPEND_BIG) { - /* We are redoing an append. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - memcpy((u_int8_t *)pagep + P_OVERHEAD(file_dbp) + - OV_LEN(pagep), argp->dbt.data, argp->dbt.size); - OV_LEN(pagep) += argp->dbt.size; - modified = 1; - } else if (cmp_n == 0 && DB_UNDO(op) && argp->opcode == DB_APPEND_BIG) { - /* We are undoing an append. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - OV_LEN(pagep) -= argp->dbt.size; - memset((u_int8_t *)pagep + P_OVERHEAD(file_dbp) + - OV_LEN(pagep), 0, argp->dbt.size); - modified = 1; - } - if (modified) - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->pagelsn; - - ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); - pagep = NULL; - if (ret != 0) - goto out; - - /* - * We only delete a whole chain of overflow items, and appends only - * apply to a single page. Adding a page is the only case that - * needs to update the chain. - */ - if (argp->opcode != DB_ADD_BIG) - goto done; - - /* Now check the previous page. */ -ppage: if (argp->prev_pgno != PGNO_INVALID) { - REC_FGET(mpf, ip, argp->prev_pgno, &pagep, npage); - modified = 0; - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn); - CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); - - if (cmp_p == 0 && DB_REDO(op) && argp->opcode == DB_ADD_BIG) { - /* Redo add, undo delete. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - NEXT_PGNO(pagep) = argp->pgno; - modified = 1; - } else if (cmp_n == 0 && - DB_UNDO(op) && argp->opcode == DB_ADD_BIG) { - /* Redo delete, undo add. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - NEXT_PGNO(pagep) = argp->next_pgno; - modified = 1; - } - if (modified) - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->prevlsn; - ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); - pagep = NULL; - if (ret != 0) - goto out; - } - pagep = NULL; - - /* Now check the next page. Can only be set on a delete. */ -npage: if (argp->next_pgno != PGNO_INVALID) { - REC_FGET(mpf, ip, argp->next_pgno, &pagep, done); - modified = 0; - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->nextlsn); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->nextlsn); - CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - PREV_PGNO(pagep) = PGNO_INVALID; - modified = 1; - } else if (cmp_n == 0 && DB_UNDO(op)) { - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - PREV_PGNO(pagep) = argp->pgno; - modified = 1; - } - if (modified) - LSN(pagep) = DB_REDO(op) ? *lsnp : argp->nextlsn; - ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); - pagep = NULL; - if (ret != 0) - goto out; - } - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); - REC_CLOSE; -} - -/* - * __db_ovref_recover -- - * Recovery function for __db_ovref(). - * - * PUBLIC: int __db_ovref_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_ovref_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_ovref_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp, ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - pagep = NULL; - REC_PRINT(__db_ovref_print); - REC_INTRO(__db_ovref_read, ip, 0); - - REC_FGET(mpf, ip, argp->pgno, &pagep, done); - - cmp = LOG_COMPARE(&LSN(pagep), &argp->lsn); - CHECK_LSN(env, op, cmp, &LSN(pagep), &argp->lsn); - if (cmp == 0 && DB_REDO(op)) { - /* Need to redo update described. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - OV_REF(pagep) += argp->adjust; - pagep->lsn = *lsnp; - } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - OV_REF(pagep) -= argp->adjust; - pagep->lsn = argp->lsn; - } - ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); - pagep = NULL; - if (ret != 0) - goto out; - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); - REC_CLOSE; -} - -/* - * __db_debug_recover -- - * Recovery function for debug. - * - * PUBLIC: int __db_debug_recover __P((ENV *, - * PUBLIC: DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_debug_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_debug_args *argp; - int ret; - - COMPQUIET(op, DB_TXN_ABORT); - COMPQUIET(info, NULL); - - REC_PRINT(__db_debug_print); - REC_NOOP_INTRO(__db_debug_read); - - *lsnp = argp->prev_lsn; - ret = 0; - - REC_NOOP_CLOSE; -} - -/* - * __db_noop_recover -- - * Recovery function for noop. - * - * PUBLIC: int __db_noop_recover __P((ENV *, - * PUBLIC: DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_noop_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_noop_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - pagep = NULL; - REC_PRINT(__db_noop_print); - REC_INTRO(__db_noop_read, ip, 0); - - REC_FGET(mpf, ip, argp->pgno, &pagep, done); - - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->prevlsn); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->prevlsn); - CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - LSN(pagep) = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - LSN(pagep) = argp->prevlsn; - } - ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); - pagep = NULL; - -done: *lsnp = argp->prev_lsn; -out: if (pagep != NULL) - (void)__memp_fput(mpf, - ip, pagep, file_dbp->priority); - REC_CLOSE; -} - -/* - * __db_pg_alloc_recover -- - * Recovery function for pg_alloc. - * - * PUBLIC: int __db_pg_alloc_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_pg_alloc_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_pg_alloc_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DBMETA *meta; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - int cmp_n, cmp_p, created, level, ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - meta = NULL; - pagep = NULL; - created = 0; - REC_PRINT(__db_pg_alloc_print); - REC_INTRO(__db_pg_alloc_read, ip, 0); - - /* - * Fix up the metadata page. If we're redoing the operation, we have - * to get the metadata page and update its LSN and its free pointer. - * If we're undoing the operation and the page was ever created, we put - * it on the freelist. - */ - pgno = PGNO_BASE_MD; - if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) { - /* The metadata page must always exist on redo. */ - if (DB_REDO(op)) { - ret = __db_pgerr(file_dbp, pgno, ret); - goto out; - } else - goto done; - } - cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); - cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); - CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); - CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp); - if (cmp_p == 0 && DB_REDO(op)) { - /* Need to redo update described. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &meta); - LSN(meta) = *lsnp; - meta->free = argp->next; - if (argp->pgno > meta->last_pgno) - meta->last_pgno = argp->pgno; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo update described. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &meta); - LSN(meta) = argp->meta_lsn; - /* - * If the page has a zero LSN then its newly created and - * will be truncated rather than go on the free list. - */ - if (!IS_ZERO_LSN(argp->page_lsn)) - meta->free = argp->pgno; - meta->last_pgno = argp->last_pgno; - } - -#ifdef HAVE_FTRUNCATE - /* - * check to see if we are keeping a sorted freelist, if so put - * this back in the in memory list. It must be the first element. - */ - if (op == DB_TXN_ABORT && !IS_ZERO_LSN(argp->page_lsn)) { - db_pgno_t *list; - u_int32_t nelem; - - if ((ret = __memp_get_freelist(mpf, &nelem, &list)) != 0) - goto out; - if (list != NULL && (nelem == 0 || *list != argp->pgno)) { - if ((ret = - __memp_extend_freelist(mpf, nelem + 1, &list)) != 0) - goto out; - if (nelem != 0) - memmove(list + 1, list, nelem * sizeof(*list)); - *list = argp->pgno; - } - } -#endif - - /* - * Fix up the allocated page. If the page does not exist - * and we can truncate it then don't create it. - * Otherwise if we're redoing the operation, we have - * to get the page (creating it if it doesn't exist), and update its - * LSN. If we're undoing the operation, we have to reset the page's - * LSN and put it on the free list. - */ - if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { - /* - * We have to be able to identify if a page was newly - * created so we can recover it properly. We cannot simply - * look for an empty header, because hash uses a pgin - * function that will set the header. Instead, we explicitly - * try for the page without CREATE and if that fails, then - * create it. - */ - if (DB_UNDO(op)) - goto do_truncate; - if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, - DB_MPOOL_CREATE, &pagep)) != 0) { - if (DB_UNDO(op) && ret == ENOSPC) - goto do_truncate; - ret = __db_pgerr(file_dbp, argp->pgno, ret); - goto out; - } - created = 1; - } - - /* Fix up the allocated page. */ - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn); - - /* - * If an initial allocation is aborted and then reallocated during - * an archival restore the log record will have an LSN for the page - * but the page will be empty. - */ - if (IS_ZERO_LSN(LSN(pagep))) - cmp_p = 0; - - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn); - /* - * Another special case we have to handle is if we ended up with a - * page of all 0's which can happen if we abort between allocating a - * page in mpool and initializing it. In that case, even if we're - * undoing, we need to re-initialize the page. - */ - if (DB_REDO(op) && cmp_p == 0) { - /* Need to redo update described. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - switch (argp->ptype) { - case P_LBTREE: - case P_LRECNO: - case P_LDUP: - level = LEAFLEVEL; - break; - default: - level = 0; - break; - } - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype); - - pagep->lsn = *lsnp; - } else if (DB_UNDO(op) && (cmp_n == 0 || created)) { - /* - * This is where we handle the case of a 0'd page (pagep->pgno - * is equal to PGNO_INVALID). - * Undo the allocation, reinitialize the page and - * link its next pointer to the free list. - */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); - - pagep->lsn = argp->page_lsn; - } - -do_truncate: - /* - * If the page was newly created, give it back. - */ - if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) && - IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) { - /* Discard the page. */ - if (pagep != NULL) { - if ((ret = __memp_fput(mpf, ip, - pagep, DB_PRIORITY_VERY_LOW)) != 0) - goto out; - pagep = NULL; - } - /* Give the page back to the OS. */ - if (meta->last_pgno <= argp->pgno && (ret = __memp_ftruncate( - mpf, NULL, ip, argp->pgno, MP_TRUNC_RECOVER)) != 0) - goto out; - } - - if (pagep != NULL) { - ret = __memp_fput(mpf, ip, pagep, file_dbp->priority); - pagep = NULL; - if (ret != 0) - goto out; - } - - ret = __memp_fput(mpf, ip, meta, file_dbp->priority); - meta = NULL; - if (ret != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); - if (meta != NULL) - (void)__memp_fput(mpf, ip, meta, file_dbp->priority); - REC_CLOSE; -} - -/* - * __db_pg_free_recover_int -- - */ -static int -__db_pg_free_recover_int(env, ip, argp, file_dbp, lsnp, mpf, op, data) - ENV *env; - DB_THREAD_INFO *ip; - __db_pg_freedata_args *argp; - DB *file_dbp; - DB_LSN *lsnp; - DB_MPOOLFILE *mpf; - db_recops op; - int data; -{ - DBMETA *meta; - DB_LSN copy_lsn; - PAGE *pagep, *prevp; - int cmp_n, cmp_p, is_meta, ret; - - meta = NULL; - pagep = prevp = NULL; - - /* - * Get the "metapage". This will either be the metapage - * or the previous page in the free list if we are doing - * sorted allocations. If its a previous page then - * we will not be truncating. - */ - is_meta = argp->meta_pgno == PGNO_BASE_MD; - - REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta); - - if (argp->meta_pgno != PGNO_BASE_MD) - prevp = (PAGE *)meta; - - cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); - cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); - CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); - CHECK_ABORT(env, op, cmp_n, &LSN(meta), lsnp); - - /* - * Fix up the metadata page. If we're redoing or undoing the operation - * we get the page and update its LSN, last and free pointer. - */ - if (cmp_p == 0 && DB_REDO(op)) { - REC_DIRTY(mpf, ip, file_dbp->priority, &meta); - /* - * If we are at the end of the file truncate, otherwise - * put on the free list. - */ - if (argp->pgno == argp->last_pgno) - meta->last_pgno = argp->pgno - 1; - else if (is_meta) - meta->free = argp->pgno; - else - NEXT_PGNO(prevp) = argp->pgno; - LSN(meta) = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo the deallocation. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &meta); - if (is_meta) { - if (meta->last_pgno < argp->pgno) - meta->last_pgno = argp->pgno; - meta->free = argp->next; - } else - NEXT_PGNO(prevp) = argp->next; - LSN(meta) = argp->meta_lsn; - } - -check_meta: - if (ret != 0 && is_meta) { - /* The metadata page must always exist. */ - ret = __db_pgerr(file_dbp, argp->meta_pgno, ret); - goto out; - } - - /* - * Get the freed page. Don't create the page if we are going to - * free it. If we're redoing the operation we get the page and - * explicitly discard its contents, then update its LSN. If we're - * undoing the operation, we get the page and restore its header. - */ - if (DB_REDO(op) || (is_meta && meta->last_pgno < argp->pgno)) { - if ((ret = __memp_fget(mpf, &argp->pgno, - ip, NULL, 0, &pagep)) != 0) { - if (ret != DB_PAGE_NOTFOUND) - goto out; - if (is_meta && - DB_REDO(op) && meta->last_pgno <= argp->pgno) - goto trunc; - goto done; - } - } else if ((ret = __memp_fget(mpf, &argp->pgno, - ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - - (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); - cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); - - /* - * This page got extended by a later allocation, - * but its allocation was not in the scope of this - * recovery pass. - */ - if (IS_ZERO_LSN(LSN(pagep))) - cmp_p = 0; - - CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); - if (DB_REDO(op) && - (cmp_p == 0 || - (IS_ZERO_LSN(copy_lsn) && - LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) { - /* Need to redo the deallocation. */ - /* - * The page can be truncated if it was truncated at runtime - * and the current metapage reflects the truncation. - */ - if (is_meta && meta->last_pgno <= argp->pgno && - argp->last_pgno <= argp->pgno) { - if ((ret = __memp_fput(mpf, ip, - pagep, DB_PRIORITY_VERY_LOW)) != 0) - goto out; - pagep = NULL; -trunc: if ((ret = __memp_ftruncate(mpf, NULL, ip, - argp->pgno, MP_TRUNC_RECOVER)) != 0) - goto out; - } else if (argp->last_pgno == argp->pgno) { - /* The page was truncated at runtime, zero it out. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - P_INIT(pagep, 0, PGNO_INVALID, - PGNO_INVALID, PGNO_INVALID, 0, P_INVALID); - ZERO_LSN(pagep->lsn); - } else { - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); - pagep->lsn = *lsnp; - - } - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to reallocate the page. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - memcpy(pagep, argp->header.data, argp->header.size); - if (data) - memcpy((u_int8_t*)pagep + HOFFSET(pagep), - argp->data.data, argp->data.size); - } - if (pagep != NULL && - (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) - goto out; - - pagep = NULL; -#ifdef HAVE_FTRUNCATE - /* - * If we are keeping an in memory free list remove this - * element from the list. - */ - if (op == DB_TXN_ABORT && argp->pgno != argp->last_pgno) { - db_pgno_t *lp; - u_int32_t nelem, pos; - - if ((ret = __memp_get_freelist(mpf, &nelem, &lp)) != 0) - goto out; - if (lp != NULL) { - pos = 0; - if (!is_meta) { - __db_freelist_pos(argp->pgno, lp, nelem, &pos); - - /* - * If we aborted after logging but before - * updating the free list don't do anything. - */ - if (argp->pgno != lp[pos]) { - DB_ASSERT(env, - argp->meta_pgno == lp[pos]); - goto done; - } - DB_ASSERT(env, - argp->meta_pgno == lp[pos - 1]); - } else if (nelem != 0 && argp->pgno != lp[pos]) - goto done; - - if (pos < nelem) - memmove(&lp[pos], &lp[pos + 1], - ((nelem - pos) - 1) * sizeof(*lp)); - - /* Shrink the list */ - if ((ret = - __memp_extend_freelist(mpf, nelem - 1, &lp)) != 0) - goto out; - } - } -#endif -done: - if (meta != NULL && - (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) - goto out; - meta = NULL; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); - if (meta != NULL) - (void)__memp_fput(mpf, ip, meta, file_dbp->priority); - - return (ret); -} - -/* - * __db_pg_free_recover -- - * Recovery function for pg_free. - * - * PUBLIC: int __db_pg_free_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_pg_free_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_pg_free_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - DB_THREAD_INFO *ip; - int ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - REC_PRINT(__db_pg_free_print); - REC_INTRO(__db_pg_free_read, ip, 0); - - ret = __db_pg_free_recover_int(env, ip, - (__db_pg_freedata_args *)argp, file_dbp, lsnp, mpf, op, 0); - -done: *lsnp = argp->prev_lsn; -out: - REC_CLOSE; -} - -/* - * __db_pg_freedata_recover -- - * Recovery function for pg_freedata. - * - * PUBLIC: int __db_pg_freedata_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_pg_freedata_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_pg_freedata_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - DB_THREAD_INFO *ip; - int ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - REC_PRINT(__db_pg_freedata_print); - REC_INTRO(__db_pg_freedata_read, ip, 0); - - ret = __db_pg_free_recover_int(env, - ip, argp, file_dbp, lsnp, mpf, op, 1); - -done: *lsnp = argp->prev_lsn; -out: - REC_CLOSE; -} - -/* - * __db_cksum_recover -- - * Recovery function for checksum failure log record. - * - * PUBLIC: int __db_cksum_recover __P((ENV *, - * PUBLIC: DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_cksum_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_cksum_args *argp; - int ret; - - COMPQUIET(info, NULL); - COMPQUIET(lsnp, NULL); - COMPQUIET(op, DB_TXN_ABORT); - - REC_PRINT(__db_cksum_print); - - if ((ret = __db_cksum_read(env, dbtp->data, &argp)) != 0) - return (ret); - - /* - * We had a checksum failure -- the only option is to run catastrophic - * recovery. - */ - if (F_ISSET(env, ENV_RECOVER_FATAL)) - ret = 0; - else { - __db_errx(env, - "Checksum failure requires catastrophic recovery"); - ret = __env_panic(env, DB_RUNRECOVERY); - } - - __os_free(env, argp); - return (ret); -} - -/* - * __db_pg_init_recover -- - * Recovery function to reinit pages after truncation. - * - * PUBLIC: int __db_pg_init_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_pg_init_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_pg_init_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DB_LSN copy_lsn; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, ret, type; - - ip = ((DB_TXNHEAD *)info)->thread_info; - REC_PRINT(__db_pg_init_print); - REC_INTRO(__db_pg_init_read, ip, 0); - - mpf = file_dbp->mpf; - if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { - if (DB_UNDO(op)) { - if (ret == DB_PAGE_NOTFOUND) - goto done; - else { - ret = __db_pgerr(file_dbp, argp->pgno, ret); - goto out; - } - } - - /* - * This page was truncated and may simply not have - * had an item written to it yet. This should only - * happen on hash databases, so confirm that. - */ - DB_ASSERT(env, file_dbp->type == DB_HASH); - if ((ret = __memp_fget(mpf, &argp->pgno, - ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) { - ret = __db_pgerr(file_dbp, argp->pgno, ret); - goto out; - } - } - - (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); - CHECK_ABORT(env, op, cmp_n, &LSN(pagep), lsnp); - - if (cmp_p == 0 && DB_REDO(op)) { - if (TYPE(pagep) == P_HASH) - type = P_HASH; - else - type = file_dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE; - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, PGNO(pagep), PGNO_INVALID, - PGNO_INVALID, TYPE(pagep) == P_HASH ? 0 : 1, type); - pagep->lsn = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Put the data back on the page. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - memcpy(pagep, argp->header.data, argp->header.size); - if (argp->data.size > 0) - memcpy((u_int8_t*)pagep + HOFFSET(pagep), - argp->data.data, argp->data.size); - } - if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; -out: - REC_CLOSE; -} - -/* - * __db_pg_trunc_recover -- - * Recovery function for pg_trunc. - * - * PUBLIC: int __db_pg_trunc_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_pg_trunc_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ -#ifdef HAVE_FTRUNCATE - __db_pg_trunc_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DBMETA *meta; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pglist_t *pglist, *lp; - db_pgno_t last_pgno, *list; - u_int32_t felem, nelem, pos; - int ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - REC_PRINT(__db_pg_trunc_print); - REC_INTRO(__db_pg_trunc_read, ip, 1); - - pglist = (db_pglist_t *) argp->list.data; - nelem = argp->list.size / sizeof(db_pglist_t); - if (DB_REDO(op)) { - /* - * First call __db_pg_truncate to find the truncation - * point, truncate the file and return the new last_pgno. - */ - last_pgno = argp->last_pgno; - if ((ret = __db_pg_truncate(dbc, NULL, pglist, - NULL, &nelem, argp->next_free, &last_pgno, lsnp, 1)) != 0) - goto out; - - if (argp->last_free != PGNO_INVALID) { - /* - * Update the next pointer of the last page in - * the freelist. If the truncation point is - * beyond next_free then this is still in the freelist - * otherwise the last_free page is at the end. - */ - if ((ret = __memp_fget(mpf, - &argp->last_free, ip, NULL, 0, &meta)) == 0) { - if (LOG_COMPARE(&LSN(meta), - &argp->last_lsn) == 0) { - REC_DIRTY(mpf, - ip, dbc->priority, &meta); - if (pglist->pgno > last_pgno) - NEXT_PGNO(meta) = PGNO_INVALID; - else - NEXT_PGNO(meta) = pglist->pgno; - LSN(meta) = *lsnp; - } - if ((ret = __memp_fput(mpf, ip, - meta, file_dbp->priority)) != 0) - goto out; - meta = NULL; - } else if (ret != DB_PAGE_NOTFOUND) - goto out; - } - if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL, - 0, &meta)) != 0) - goto out; - if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) { - REC_DIRTY(mpf, ip, dbc->priority, &meta); - if (argp->last_free == PGNO_INVALID) { - if (nelem == 0) - meta->free = PGNO_INVALID; - else - meta->free = pglist->pgno; - } - meta->last_pgno = last_pgno; - LSN(meta) = *lsnp; - } - } else { - /* Put the free list back in its original order. */ - for (lp = pglist; lp < &pglist[nelem]; lp++) { - if ((ret = __memp_fget(mpf, &lp->pgno, ip, - NULL, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - if (IS_ZERO_LSN(LSN(pagep)) || - LOG_COMPARE(&LSN(pagep), lsnp) == 0) { - REC_DIRTY(mpf, ip, dbc->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, lp->pgno, - PGNO_INVALID, lp->next_pgno, 0, P_INVALID); - LSN(pagep) = lp->lsn; - } - if ((ret = __memp_fput(mpf, - ip, pagep, file_dbp->priority)) != 0) - goto out; - } - /* - * Link the truncated part back into the free list. - * Its either after the last_free page or direclty - * linked to the metadata page. - */ - if (argp->last_free != PGNO_INVALID) { - if ((ret = __memp_fget(mpf, &argp->last_free, - ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) { - if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { - NEXT_PGNO(meta) = argp->next_free; - LSN(meta) = argp->last_lsn; - } - if ((ret = __memp_fput(mpf, ip, - meta, file_dbp->priority)) != 0) - goto out; - } else if (ret != DB_PAGE_NOTFOUND) - goto out; - meta = NULL; - } - if ((ret = __memp_fget(mpf, &argp->meta, - ip, NULL, DB_MPOOL_EDIT, &meta)) != 0) - goto out; - if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { - REC_DIRTY(mpf, ip, dbc->priority, &meta); - /* - * If we had to break up the list last_pgno - * may only represent the end of the block. - */ - if (meta->last_pgno < argp->last_pgno) - meta->last_pgno = argp->last_pgno; - if (argp->last_free == PGNO_INVALID) - meta->free = argp->next_free; - LSN(meta) = argp->meta_lsn; - } - } - - if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) - goto out; - - if (op == DB_TXN_ABORT) { - /* - * Put the pages back on the in memory free list. - * If this is part of a multi-record truncate then - * we need to find this batch, it may not be at the end. - * If we aborted while writing one of the log records - * then this set may still be in the list. - */ - if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0) - goto out; - if (list != NULL) { - if (felem != 0 && list[felem - 1] > pglist->pgno) { - __db_freelist_pos( - pglist->pgno, list, felem, &pos); - DB_ASSERT(env, pos < felem); - if (pglist->pgno == list[pos]) - goto done; - pos++; - } else if (felem != 0 && - list[felem - 1] == pglist->pgno) - goto done; - else - pos = felem; - if ((ret = __memp_extend_freelist( - mpf, felem + nelem, &list)) != 0) - goto out; - if (pos != felem) - memmove(&list[nelem + pos], &list[pos], - sizeof(*list) * (felem - pos)); - for (lp = pglist; lp < &pglist[nelem]; lp++) - list[pos++] = lp->pgno; - } - } - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: REC_CLOSE; -#else - /* - * If HAVE_FTRUNCATE is not defined, we'll never see pg_trunc records - * to recover. - */ - COMPQUIET(env, NULL); - COMPQUIET(dbtp, NULL); - COMPQUIET(lsnp, NULL); - COMPQUIET(op, DB_TXN_ABORT); - COMPQUIET(info, NULL); - return (EINVAL); -#endif -} -/* - * __db_pg_sort_44_recover -- - * Recovery function for pg_sort. - * This is deprecated and kept for replication upgrades. - * - * PUBLIC: int __db_pg_sort_44_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_pg_sort_44_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ -#ifdef HAVE_FTRUNCATE - __db_pg_sort_44_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DBMETA *meta; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pglist_t *pglist, *lp; - db_pgno_t pgno, *list; - u_int32_t felem, nelem; - int ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - REC_PRINT(__db_pg_sort_44_print); - REC_INTRO(__db_pg_sort_44_read, ip, 1); - - pglist = (db_pglist_t *) argp->list.data; - nelem = argp->list.size / sizeof(db_pglist_t); - if (DB_REDO(op)) { - pgno = argp->last_pgno; - __db_freelist_sort(pglist, nelem); - if ((ret = __db_pg_truncate(dbc, NULL, - pglist, NULL, &nelem, PGNO_INVALID, &pgno, lsnp, 1)) != 0) - goto out; - - if (argp->last_free != PGNO_INVALID) { - if ((ret = __memp_fget(mpf, - &argp->last_free, ip, NULL, 0, &meta)) == 0) { - if (LOG_COMPARE(&LSN(meta), - &argp->last_lsn) == 0) { - REC_DIRTY(mpf, - ip, dbc->priority, &meta); - NEXT_PGNO(meta) = PGNO_INVALID; - LSN(meta) = *lsnp; - } - if ((ret = __memp_fput(mpf, ip, - meta, file_dbp->priority)) != 0) - goto out; - meta = NULL; - } else if (ret != DB_PAGE_NOTFOUND) - goto out; - } - if ((ret = __memp_fget(mpf, &argp->meta, ip, NULL, - 0, &meta)) != 0) - goto out; - if (LOG_COMPARE(&LSN(meta), &argp->meta_lsn) == 0) { - REC_DIRTY(mpf, ip, dbc->priority, &meta); - if (argp->last_free == PGNO_INVALID) { - if (nelem == 0) - meta->free = PGNO_INVALID; - else - meta->free = pglist->pgno; - } - meta->last_pgno = pgno; - LSN(meta) = *lsnp; - } - } else { - /* Put the free list back in its original order. */ - for (lp = pglist; lp < &pglist[nelem]; lp++) { - if ((ret = __memp_fget(mpf, &lp->pgno, ip, - NULL, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - if (IS_ZERO_LSN(LSN(pagep)) || - LOG_COMPARE(&LSN(pagep), lsnp) == 0) { - REC_DIRTY(mpf, ip, dbc->priority, &pagep); - if (lp == &pglist[nelem - 1]) - pgno = PGNO_INVALID; - else - pgno = lp[1].pgno; - - P_INIT(pagep, file_dbp->pgsize, - lp->pgno, PGNO_INVALID, pgno, 0, P_INVALID); - LSN(pagep) = lp->lsn; - } - if ((ret = __memp_fput(mpf, - ip, pagep, file_dbp->priority)) != 0) - goto out; - } - if (argp->last_free != PGNO_INVALID) { - if ((ret = __memp_fget(mpf, &argp->last_free, - ip, NULL, DB_MPOOL_EDIT, &meta)) == 0) { - if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { - NEXT_PGNO(meta) = pglist->pgno; - LSN(meta) = argp->last_lsn; - } - if ((ret = __memp_fput(mpf, ip, - meta, file_dbp->priority)) != 0) - goto out; - } else if (ret != DB_PAGE_NOTFOUND) - goto out; - meta = NULL; - } - if ((ret = __memp_fget(mpf, &argp->meta, - ip, NULL, DB_MPOOL_EDIT, &meta)) != 0) - goto out; - if (LOG_COMPARE(&LSN(meta), lsnp) == 0) { - REC_DIRTY(mpf, ip, dbc->priority, &meta); - meta->last_pgno = argp->last_pgno; - if (argp->last_free == PGNO_INVALID) - meta->free = pglist->pgno; - LSN(meta) = argp->meta_lsn; - } - } - if (op == DB_TXN_ABORT) { - if ((ret = __memp_get_freelist(mpf, &felem, &list)) != 0) - goto out; - if (list != NULL) { - DB_ASSERT(env, felem == 0 || - argp->last_free == list[felem - 1]); - if ((ret = __memp_extend_freelist( - mpf, felem + nelem, &list)) != 0) - goto out; - for (lp = pglist; lp < &pglist[nelem]; lp++) - list[felem++] = lp->pgno; - } - } - - if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) - goto out; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: REC_CLOSE; -#else - /* - * If HAVE_FTRUNCATE is not defined, we'll never see pg_sort records - * to recover. - */ - COMPQUIET(env, NULL); - COMPQUIET(dbtp, NULL); - COMPQUIET(lsnp, NULL); - COMPQUIET(op, DB_TXN_ABORT); - COMPQUIET(info, NULL); - return (EINVAL); -#endif -} - -/* - * __db_pg_alloc_42_recover -- - * Recovery function for pg_alloc. - * - * PUBLIC: int __db_pg_alloc_42_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_pg_alloc_42_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_pg_alloc_42_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DBMETA *meta; - DB_MPOOLFILE *mpf; - PAGE *pagep; - db_pgno_t pgno; - int cmp_n, cmp_p, created, level, ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - meta = NULL; - pagep = NULL; - created = 0; - REC_PRINT(__db_pg_alloc_42_print); - REC_INTRO(__db_pg_alloc_42_read, ip, 0); - - /* - * Fix up the metadata page. If we're redoing the operation, we have - * to get the metadata page and update its LSN and its free pointer. - * If we're undoing the operation and the page was ever created, we put - * it on the freelist. - */ - pgno = PGNO_BASE_MD; - if ((ret = __memp_fget(mpf, &pgno, ip, NULL, 0, &meta)) != 0) { - /* The metadata page must always exist on redo. */ - if (DB_REDO(op)) { - ret = __db_pgerr(file_dbp, pgno, ret); - goto out; - } else - goto done; - } - cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); - cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); - CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); - if (cmp_p == 0 && DB_REDO(op)) { - /* Need to redo update described. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &meta); - LSN(meta) = *lsnp; - meta->free = argp->next; - if (argp->pgno > meta->last_pgno) - meta->last_pgno = argp->pgno; - } else if (cmp_n == 0 && DB_UNDO(op)) { - goto no_rollback; - } - - /* - * Fix up the allocated page. If the page does not exist - * and we can truncate it then don't create it. - * Otherwise if we're redoing the operation, we have - * to get the page (creating it if it doesn't exist), and update its - * LSN. If we're undoing the operation, we have to reset the page's - * LSN and put it on the free list, or truncate it. - */ - if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { - /* - * We have to be able to identify if a page was newly - * created so we can recover it properly. We cannot simply - * look for an empty header, because hash uses a pgin - * function that will set the header. Instead, we explicitly - * try for the page without CREATE and if that fails, then - * create it. - */ - if ((ret = __memp_fget(mpf, &argp->pgno, - ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) { - if (DB_UNDO(op) && ret == ENOSPC) - goto do_truncate; - ret = __db_pgerr(file_dbp, argp->pgno, ret); - goto out; - } - created = 1; - } - - /* Fix up the allocated page. */ - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->page_lsn); - - /* - * If an initial allocation is aborted and then reallocated during - * an archival restore the log record will have an LSN for the page - * but the page will be empty. - */ - if (IS_ZERO_LSN(LSN(pagep)) || - (IS_ZERO_LSN(argp->page_lsn) && IS_INIT_LSN(LSN(pagep)))) - cmp_p = 0; - - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->page_lsn); - /* - * Another special case we have to handle is if we ended up with a - * page of all 0's which can happen if we abort between allocating a - * page in mpool and initializing it. In that case, even if we're - * undoing, we need to re-initialize the page. - */ - if (DB_REDO(op) && cmp_p == 0) { - /* Need to redo update described. */ - switch (argp->ptype) { - case P_LBTREE: - case P_LRECNO: - case P_LDUP: - level = LEAFLEVEL; - break; - default: - level = 0; - break; - } - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, PGNO_INVALID, level, argp->ptype); - - pagep->lsn = *lsnp; - } else if (DB_UNDO(op) && (cmp_n == 0 || created)) { - /* - * This is where we handle the case of a 0'd page (pagep->pgno - * is equal to PGNO_INVALID). - * Undo the allocation, reinitialize the page and - * link its next pointer to the free list. - */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); - - pagep->lsn = argp->page_lsn; - } - -do_truncate: - /* - * We cannot undo things from 4.2 land, because we nolonger - * have limbo processing. - */ - if ((pagep == NULL || IS_ZERO_LSN(LSN(pagep))) && - IS_ZERO_LSN(argp->page_lsn) && DB_UNDO(op)) { -no_rollback: __db_errx(env, -"Cannot replicate prepared transactions from master running release 4.2 "); - ret = __env_panic(env, EINVAL); - } - - if (pagep != NULL && - (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - - if ((ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) - goto out; - meta = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); - if (meta != NULL) - (void)__memp_fput(mpf, ip, meta, file_dbp->priority); - REC_CLOSE; -} - -/* - * __db_pg_free_recover_42_int -- - */ -static int -__db_pg_free_recover_42_int(env, ip, argp, file_dbp, lsnp, mpf, op, data) - ENV *env; - DB_THREAD_INFO *ip; - __db_pg_freedata_42_args *argp; - DB *file_dbp; - DB_LSN *lsnp; - DB_MPOOLFILE *mpf; - db_recops op; - int data; -{ - DBMETA *meta; - DB_LSN copy_lsn; - PAGE *pagep, *prevp; - int cmp_n, cmp_p, is_meta, ret; - - meta = NULL; - pagep = NULL; - prevp = NULL; - - /* - * Get the "metapage". This will either be the metapage - * or the previous page in the free list if we are doing - * sorted allocations. If its a previous page then - * we will not be truncating. - */ - is_meta = argp->meta_pgno == PGNO_BASE_MD; - - REC_FGET(mpf, ip, argp->meta_pgno, &meta, check_meta); - - if (argp->meta_pgno != PGNO_BASE_MD) - prevp = (PAGE *)meta; - - cmp_n = LOG_COMPARE(lsnp, &LSN(meta)); - cmp_p = LOG_COMPARE(&LSN(meta), &argp->meta_lsn); - CHECK_LSN(env, op, cmp_p, &LSN(meta), &argp->meta_lsn); - - /* - * Fix up the metadata page. If we're redoing or undoing the operation - * we get the page and update its LSN, last and free pointer. - */ - if (cmp_p == 0 && DB_REDO(op)) { - /* Need to redo the deallocation. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &meta); - if (prevp == NULL) - meta->free = argp->pgno; - else - NEXT_PGNO(prevp) = argp->pgno; - /* - * If this was a compensating transaction and - * we are a replica, then we never executed the - * original allocation which incremented meta->free. - */ - if (prevp == NULL && meta->last_pgno < meta->free) - meta->last_pgno = meta->free; - LSN(meta) = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to undo the deallocation. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &meta); - if (prevp == NULL) - meta->free = argp->next; - else - NEXT_PGNO(prevp) = argp->next; - LSN(meta) = argp->meta_lsn; - if (prevp == NULL && meta->last_pgno < argp->pgno) - meta->last_pgno = argp->pgno; - } - -check_meta: - if (ret != 0 && is_meta) { - /* The metadata page must always exist. */ - ret = __db_pgerr(file_dbp, argp->meta_pgno, ret); - goto out; - } - - /* - * Get the freed page. If we support truncate then don't - * create the page if we are going to free it. If we're - * redoing the operation we get the page and explicitly discard - * its contents, then update its LSN. If we're undoing the - * operation, we get the page and restore its header. - * If we don't support truncate, then we must create the page - * and roll it back. - */ - if ((ret = __memp_fget(mpf, &argp->pgno, - ip, NULL, DB_MPOOL_CREATE, &pagep)) != 0) - goto out; - - (void)__ua_memcpy(©_lsn, &LSN(argp->header.data), sizeof(DB_LSN)); - cmp_n = IS_ZERO_LSN(LSN(pagep)) ? 0 : LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), ©_lsn); - - CHECK_LSN(env, op, cmp_p, &LSN(pagep), ©_lsn); - if (DB_REDO(op) && - (cmp_p == 0 || - (IS_ZERO_LSN(copy_lsn) && - LOG_COMPARE(&LSN(pagep), &argp->meta_lsn) <= 0))) { - /* Need to redo the deallocation. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - P_INIT(pagep, file_dbp->pgsize, - argp->pgno, PGNO_INVALID, argp->next, 0, P_INVALID); - pagep->lsn = *lsnp; - } else if (cmp_n == 0 && DB_UNDO(op)) { - /* Need to reallocate the page. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - memcpy(pagep, argp->header.data, argp->header.size); - if (data) - memcpy((u_int8_t*)pagep + HOFFSET(pagep), - argp->data.data, argp->data.size); - } - if (pagep != NULL && - (ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) - goto out; - - pagep = NULL; - if (meta != NULL && - (ret = __memp_fput(mpf, ip, meta, file_dbp->priority)) != 0) - goto out; - meta = NULL; - - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); - if (meta != NULL) - (void)__memp_fput(mpf, ip, meta, file_dbp->priority); - - return (ret); -} - -/* - * __db_pg_free_42_recover -- - * Recovery function for pg_free. - * - * PUBLIC: int __db_pg_free_42_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_pg_free_42_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_pg_free_42_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - DB_THREAD_INFO *ip; - int ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - REC_PRINT(__db_pg_free_42_print); - REC_INTRO(__db_pg_free_42_read, ip, 0); - - ret = __db_pg_free_recover_42_int(env, ip, - (__db_pg_freedata_42_args *)argp, file_dbp, lsnp, mpf, op, 0); - -done: *lsnp = argp->prev_lsn; -out: - REC_CLOSE; -} - -/* - * __db_pg_freedata_42_recover -- - * Recovery function for pg_freedata. - * - * PUBLIC: int __db_pg_freedata_42_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_pg_freedata_42_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_pg_freedata_42_args *argp; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - DB_THREAD_INFO *ip; - int ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - REC_PRINT(__db_pg_freedata_42_print); - REC_INTRO(__db_pg_freedata_42_read, ip, 0); - - ret = __db_pg_free_recover_42_int( - env, ip, argp, file_dbp, lsnp, mpf, op, 1); - -done: *lsnp = argp->prev_lsn; -out: - REC_CLOSE; -} - -/* - * __db_relink_42_recover -- - * Recovery function for relink. - * - * PUBLIC: int __db_relink_42_recover - * PUBLIC: __P((ENV *, DBT *, DB_LSN *, db_recops, void *)); - */ -int -__db_relink_42_recover(env, dbtp, lsnp, op, info) - ENV *env; - DBT *dbtp; - DB_LSN *lsnp; - db_recops op; - void *info; -{ - __db_relink_42_args *argp; - DB_THREAD_INFO *ip; - DB *file_dbp; - DBC *dbc; - DB_MPOOLFILE *mpf; - PAGE *pagep; - int cmp_n, cmp_p, modified, ret; - - ip = ((DB_TXNHEAD *)info)->thread_info; - pagep = NULL; - REC_PRINT(__db_relink_42_print); - REC_INTRO(__db_relink_42_read, ip, 0); - - /* - * There are up to three pages we need to check -- the page, and the - * previous and next pages, if they existed. For a page add operation, - * the current page is the result of a split and is being recovered - * elsewhere, so all we need do is recover the next page. - */ - if ((ret = __memp_fget(mpf, &argp->pgno, ip, NULL, 0, &pagep)) != 0) { - if (DB_REDO(op)) { - ret = __db_pgerr(file_dbp, argp->pgno, ret); - goto out; - } - goto next2; - } - if (argp->opcode == DB_ADD_PAGE_COMPAT) - goto next1; - - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn); - if (cmp_p == 0 && DB_REDO(op)) { - /* Redo the relink. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - pagep->lsn = *lsnp; - } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { - /* Undo the relink. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - pagep->next_pgno = argp->next; - pagep->prev_pgno = argp->prev; - pagep->lsn = argp->lsn; - } -next1: if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - -next2: if ((ret = __memp_fget(mpf, &argp->next, ip, NULL, 0, &pagep)) != 0) { - if (DB_REDO(op)) { - ret = __db_pgerr(file_dbp, argp->next, ret); - goto out; - } - goto prev; - } - modified = 0; - cmp_n = LOG_COMPARE(lsnp, &LSN(pagep)); - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_next); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_next); - if ((argp->opcode == DB_REM_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op)) || - (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_n == 0 && DB_UNDO(op))) { - /* Redo the remove or undo the add. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - pagep->prev_pgno = argp->prev; - modified = 1; - } else if ((argp->opcode == DB_REM_PAGE_COMPAT && - cmp_n == 0 && DB_UNDO(op)) || - (argp->opcode == DB_ADD_PAGE_COMPAT && cmp_p == 0 && DB_REDO(op))) { - /* Undo the remove or redo the add. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - pagep->prev_pgno = argp->pgno; - modified = 1; - } - if (modified) { - if (DB_UNDO(op)) - pagep->lsn = argp->lsn_next; - else - pagep->lsn = *lsnp; - } - if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - if (argp->opcode == DB_ADD_PAGE_COMPAT) - goto done; - -prev: if ((ret = __memp_fget(mpf, &argp->prev, ip, NULL, 0, &pagep)) != 0) { - if (DB_REDO(op)) { - ret = __db_pgerr(file_dbp, argp->prev, ret); - goto out; - } - goto done; - } - modified = 0; - cmp_p = LOG_COMPARE(&LSN(pagep), &argp->lsn_prev); - CHECK_LSN(env, op, cmp_p, &LSN(pagep), &argp->lsn_prev); - if (cmp_p == 0 && DB_REDO(op)) { - /* Redo the relink. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - pagep->next_pgno = argp->next; - modified = 1; - } else if (LOG_COMPARE(lsnp, &LSN(pagep)) == 0 && DB_UNDO(op)) { - /* Undo the relink. */ - REC_DIRTY(mpf, ip, file_dbp->priority, &pagep); - pagep->next_pgno = argp->pgno; - modified = 1; - } - if (modified) { - if (DB_UNDO(op)) - pagep->lsn = argp->lsn_prev; - else - pagep->lsn = *lsnp; - } - if ((ret = __memp_fput(mpf, ip, pagep, file_dbp->priority)) != 0) - goto out; - pagep = NULL; - -done: *lsnp = argp->prev_lsn; - ret = 0; - -out: if (pagep != NULL) - (void)__memp_fput(mpf, ip, pagep, file_dbp->priority); - REC_CLOSE; -} diff --git a/db/db_reclaim.c b/db/db_reclaim.c deleted file mode 100644 index a44d054..0000000 --- a/db/db_reclaim.c +++ /dev/null @@ -1,246 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/mp.h" - -/* - * __db_traverse_big - * Traverse a chain of overflow pages and call the callback routine - * on each one. The calling convention for the callback is: - * callback(dbc, page, cookie, did_put), - * where did_put is a return value indicating if the page in question has - * already been returned to the mpool. - * - * PUBLIC: int __db_traverse_big __P((DBC *, db_pgno_t, - * PUBLIC: int (*)(DBC *, PAGE *, void *, int *), void *)); - */ -int -__db_traverse_big(dbc, pgno, callback, cookie) - DBC *dbc; - db_pgno_t pgno; - int (*callback) __P((DBC *, PAGE *, void *, int *)); - void *cookie; -{ - DB_MPOOLFILE *mpf; - PAGE *p; - int did_put, ret; - - mpf = dbc->dbp->mpf; - - do { - did_put = 0; - if ((ret = __memp_fget(mpf, - &pgno, dbc->thread_info, dbc->txn, 0, &p)) != 0) - return (ret); - /* - * If we are freeing pages only process the overflow - * chain if the head of the chain has a refcount of 1. - */ - pgno = NEXT_PGNO(p); - if (callback == __db_truncate_callback && OV_REF(p) != 1) - pgno = PGNO_INVALID; - if ((ret = callback(dbc, p, cookie, &did_put)) == 0 && - !did_put) - ret = __memp_fput(mpf, - dbc->thread_info, p, dbc->priority); - } while (ret == 0 && pgno != PGNO_INVALID); - - return (ret); -} - -/* - * __db_reclaim_callback - * This is the callback routine used during a delete of a subdatabase. - * we are traversing a btree or hash table and trying to free all the - * pages. Since they share common code for duplicates and overflow - * items, we traverse them identically and use this routine to do the - * actual free. The reason that this is callback is because hash uses - * the same traversal code for statistics gathering. - * - * PUBLIC: int __db_reclaim_callback __P((DBC *, PAGE *, void *, int *)); - */ -int -__db_reclaim_callback(dbc, p, cookie, putp) - DBC *dbc; - PAGE *p; - void *cookie; - int *putp; -{ - DB *dbp; - int ret; - - COMPQUIET(cookie, NULL); - dbp = dbc->dbp; - - /* - * We don't want to log the free of the root with the subdb. - * If we abort then the subdb may not be openable to undo - * the free. - */ - if ((dbp->type == DB_BTREE || dbp->type == DB_RECNO) && - PGNO(p) == ((BTREE *)dbp->bt_internal)->bt_root) - return (0); - if ((ret = __db_free(dbc, p)) != 0) - return (ret); - *putp = 1; - - return (0); -} - -/* - * __db_truncate_callback - * This is the callback routine used during a truncate. - * we are traversing a btree or hash table and trying to free all the - * pages. - * - * PUBLIC: int __db_truncate_callback __P((DBC *, PAGE *, void *, int *)); - */ -int -__db_truncate_callback(dbc, p, cookie, putp) - DBC *dbc; - PAGE *p; - void *cookie; - int *putp; -{ - DB *dbp; - DBT ddbt, ldbt; - DB_MPOOLFILE *mpf; - db_indx_t indx, len, off, tlen, top; - u_int8_t *hk, type; - u_int32_t *countp; - int ret; - - top = NUM_ENT(p); - dbp = dbc->dbp; - mpf = dbp->mpf; - countp = cookie; - *putp = 1; - - switch (TYPE(p)) { - case P_LBTREE: - /* Skip for off-page duplicates and deleted items. */ - for (indx = 0; indx < top; indx += P_INDX) { - type = GET_BKEYDATA(dbp, p, indx + O_INDX)->type; - if (!B_DISSET(type) && B_TYPE(type) != B_DUPLICATE) - ++*countp; - } - /* FALLTHROUGH */ - case P_IBTREE: - case P_IRECNO: - case P_INVALID: - if (dbp->type != DB_HASH && - ((BTREE *)dbp->bt_internal)->bt_root == PGNO(p)) { - type = dbp->type == DB_RECNO ? P_LRECNO : P_LBTREE; - goto reinit; - } - break; - case P_OVERFLOW: - if ((ret = __memp_dirty(mpf, - &p, dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) - return (ret); - if (DBC_LOGGING(dbc)) { - if ((ret = __db_ovref_log(dbp, dbc->txn, - &LSN(p), 0, p->pgno, -1, &LSN(p))) != 0) - return (ret); - } else - LSN_NOT_LOGGED(LSN(p)); - if (--OV_REF(p) != 0) - *putp = 0; - break; - case P_LRECNO: - for (indx = 0; indx < top; indx += O_INDX) { - type = GET_BKEYDATA(dbp, p, indx)->type; - if (!B_DISSET(type)) - ++*countp; - } - - if (((BTREE *)dbp->bt_internal)->bt_root == PGNO(p)) { - type = P_LRECNO; - goto reinit; - } - break; - case P_LDUP: - /* Correct for deleted items. */ - for (indx = 0; indx < top; indx += O_INDX) - if (!B_DISSET(GET_BKEYDATA(dbp, p, indx)->type)) - ++*countp; - - break; - case P_HASH: - /* Correct for on-page duplicates and deleted items. */ - for (indx = 0; indx < top; indx += P_INDX) { - switch (*H_PAIRDATA(dbp, p, indx)) { - case H_OFFDUP: - break; - case H_OFFPAGE: - case H_KEYDATA: - ++*countp; - break; - case H_DUPLICATE: - tlen = LEN_HDATA(dbp, p, 0, indx); - hk = H_PAIRDATA(dbp, p, indx); - for (off = 0; off < tlen; - off += len + 2 * sizeof(db_indx_t)) { - ++*countp; - memcpy(&len, - HKEYDATA_DATA(hk) - + off, sizeof(db_indx_t)); - } - break; - default: - return (__db_pgfmt(dbp->env, p->pgno)); - } - } - /* Don't free the head of the bucket. */ - if (PREV_PGNO(p) == PGNO_INVALID) { - type = P_HASH; - -reinit: if ((ret = __memp_dirty(mpf, &p, - dbc->thread_info, dbc->txn, dbc->priority, 0)) != 0) - return (ret); - *putp = 0; - if (DBC_LOGGING(dbc)) { - memset(&ldbt, 0, sizeof(ldbt)); - memset(&ddbt, 0, sizeof(ddbt)); - ldbt.data = p; - ldbt.size = P_OVERHEAD(dbp); - ldbt.size += p->entries * sizeof(db_indx_t); - ddbt.data = (u_int8_t *)p + HOFFSET(p); - ddbt.size = dbp->pgsize - HOFFSET(p); - if ((ret = __db_pg_init_log(dbp, - dbc->txn, &LSN(p), 0, - p->pgno, &ldbt, &ddbt)) != 0) - return (ret); - } else - LSN_NOT_LOGGED(LSN(p)); - - P_INIT(p, dbp->pgsize, PGNO(p), PGNO_INVALID, - PGNO_INVALID, type == P_HASH ? 0 : 1, type); - } - break; - default: - return (__db_pgfmt(dbp->env, p->pgno)); - } - - if (*putp == 1) { - if ((ret = __db_free(dbc, p)) != 0) - return (ret); - } else { - if ((ret = __memp_fput(mpf, dbc->thread_info, p, - dbc->priority)) != 0) - return (ret); - *putp = 1; - } - - return (0); -} diff --git a/db/db_remove.c b/db/db_remove.c deleted file mode 100644 index 6b59ec3..0000000 --- a/db/db_remove.c +++ /dev/null @@ -1,492 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2001, 2010 Oracle and/or its affiliates. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/fop.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/mp.h" -#include "dbinc/txn.h" - -static int __db_dbtxn_remove __P((DB *, - DB_THREAD_INFO *, DB_TXN *, const char *, const char *)); -static int __db_subdb_remove __P((DB *, - DB_THREAD_INFO *, DB_TXN *, const char *, const char *)); - -/* - * __env_dbremove_pp - * ENV->dbremove pre/post processing. - * - * PUBLIC: int __env_dbremove_pp __P((DB_ENV *, - * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t)); - */ -int -__env_dbremove_pp(dbenv, txn, name, subdb, flags) - DB_ENV *dbenv; - DB_TXN *txn; - const char *name, *subdb; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret, txn_local; - - dbp = NULL; - env = dbenv->env; - txn_local = 0; - - ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->dbremove"); - - /* - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if ((ret = __db_fchk(env, - "DB->remove", flags, DB_AUTO_COMMIT | DB_TXN_NOT_DURABLE)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __env_rep_enter(env, 1)) != 0) { - handle_check = 0; - goto err; - } - - /* - * Create local transaction as necessary, check for consistent - * transaction usage. - */ - if (IS_ENV_AUTO_COMMIT(env, txn, flags)) { - if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0) - goto err; - txn_local = 1; - } else - if (txn != NULL && !TXN_ON(env) && - (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_CDSGROUP))) { - ret = __db_not_txn_env(env); - goto err; - } - LF_CLR(DB_AUTO_COMMIT); - - if ((ret = __db_create_internal(&dbp, env, 0)) != 0) - goto err; - if (LF_ISSET(DB_TXN_NOT_DURABLE) && - (ret = __db_set_flags(dbp, DB_TXN_NOT_DURABLE)) != 0) - goto err; - LF_CLR(DB_TXN_NOT_DURABLE); - - ret = __db_remove_int(dbp, ip, txn, name, subdb, flags); - - if (txn_local) { - /* - * We created the DBP here and when we commit/abort, we'll - * release all the transactional locks, including the handle - * lock; mark the handle cleared explicitly. - */ - LOCK_INIT(dbp->handle_lock); - dbp->locker = NULL; - } else if (txn != NULL) { - /* - * We created this handle locally so we need to close it - * and clean it up. Unfortunately, it's holding transactional - * locks that need to persist until the end of transaction. - * If we invalidate the locker id (dbp->locker), then the close - * won't free these locks prematurely. - */ - dbp->locker = NULL; - } - -err: if (txn_local && (t_ret = - __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0) - ret = t_ret; - - /* - * We never opened this dbp for real, so don't include a transaction - * handle, and use NOSYNC to avoid calling into mpool. - * - * !!! - * Note we're reversing the order of operations: we started the txn and - * then opened the DB handle; we're resolving the txn and then closing - * closing the DB handle -- a DB handle cannot be closed before - * resolving the txn. - */ - if (dbp != NULL && - (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) - ret = t_ret; - - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_remove_pp - * DB->remove pre/post processing. - * - * PUBLIC: int __db_remove_pp - * PUBLIC: __P((DB *, const char *, const char *, u_int32_t)); - */ -int -__db_remove_pp(dbp, name, subdb, flags) - DB *dbp; - const char *name, *subdb; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - - /* - * Validate arguments, continuing to destroy the handle on failure. - * - * Cannot use DB_ILLEGAL_AFTER_OPEN directly because it returns. - * - * !!! - * We have a serious problem if we're here with a handle used to open - * a database -- we'll destroy the handle, and the application won't - * ever be able to close the database. - */ - if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) - return (__db_mi_open(env, "DB->remove", 1)); - - /* Validate arguments. */ - if ((ret = __db_fchk(env, "DB->remove", flags, 0)) != 0) - return (ret); - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, NULL, DB_LOCK_INVALIDID, 0)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __db_rep_enter(dbp, 1, 1, 0)) != 0) { - handle_check = 0; - goto err; - } - - /* Remove the file. */ - ret = __db_remove(dbp, ip, NULL, name, subdb, flags); - - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - -err: ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_remove - * DB->remove method. - * - * PUBLIC: int __db_remove __P((DB *, DB_THREAD_INFO *, - * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t)); - */ -int -__db_remove(dbp, ip, txn, name, subdb, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb; - u_int32_t flags; -{ - int ret, t_ret; - - ret = __db_remove_int(dbp, ip, txn, name, subdb, flags); - - if ((t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_remove_int - * Worker function for the DB->remove method. - * - * PUBLIC: int __db_remove_int __P((DB *, DB_THREAD_INFO *, - * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t)); - */ -int -__db_remove_int(dbp, ip, txn, name, subdb, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb; - u_int32_t flags; -{ - ENV *env; - int ret; - char *real_name, *tmpname; - - env = dbp->env; - real_name = tmpname = NULL; - - if (name == NULL && subdb == NULL) { - __db_errx(env, "Remove on temporary files invalid"); - ret = EINVAL; - goto err; - } - - if (name == NULL) { - MAKE_INMEM(dbp); - real_name = (char *)subdb; - } else if (subdb != NULL) { - ret = __db_subdb_remove(dbp, ip, txn, name, subdb); - goto err; - } - - /* Handle transactional file removes separately. */ - if (IS_REAL_TXN(txn)) { - ret = __db_dbtxn_remove(dbp, ip, txn, name, subdb); - goto err; - } - - /* - * The remaining case is a non-transactional file remove. - * - * Find the real name of the file. - */ - if (!F_ISSET(dbp, DB_AM_INMEM) && (ret = __db_appname(env, - DB_APP_DATA, name, &dbp->dirname, &real_name)) != 0) - goto err; - - /* - * If this is a file and force is set, remove the temporary file, which - * may have been left around. Ignore errors because the temporary file - * might not exist. - */ - if (!F_ISSET(dbp, DB_AM_INMEM) && LF_ISSET(DB_FORCE) && - (ret = __db_backup_name(env, real_name, NULL, &tmpname)) == 0) - (void)__os_unlink(env, tmpname, 0); - - if ((ret = __fop_remove_setup(dbp, NULL, real_name, 0)) != 0) - goto err; - - if (dbp->db_am_remove != NULL && - (ret = dbp->db_am_remove(dbp, ip, NULL, name, subdb, flags)) != 0) - goto err; - - ret = F_ISSET(dbp, DB_AM_INMEM) ? - __db_inmem_remove(dbp, NULL, real_name) : - __fop_remove(env, - NULL, dbp->fileid, name, &dbp->dirname, DB_APP_DATA, - F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0); - -err: if (!F_ISSET(dbp, DB_AM_INMEM) && real_name != NULL) - __os_free(env, real_name); - if (tmpname != NULL) - __os_free(env, tmpname); - - return (ret); -} - -/* - * __db_inmem_remove -- - * Removal of a named in-memory database. - * - * PUBLIC: int __db_inmem_remove __P((DB *, DB_TXN *, const char *)); - */ -int -__db_inmem_remove(dbp, txn, name) - DB *dbp; - DB_TXN *txn; - const char *name; -{ - DBT fid_dbt, name_dbt; - DB_LOCKER *locker; - DB_LSN lsn; - ENV *env; - int ret; - - env = dbp->env; - locker = NULL; - - DB_ASSERT(env, name != NULL); - - /* This had better exist if we are trying to do a remove. */ - (void)__memp_set_flags(dbp->mpf, DB_MPOOL_NOFILE, 1); - if ((ret = __memp_fopen(dbp->mpf, NULL, - name, &dbp->dirname, 0, 0, 0)) != 0) - return (ret); - if ((ret = __memp_get_fileid(dbp->mpf, dbp->fileid)) != 0) - return (ret); - dbp->preserve_fid = 1; - - if (LOCKING_ON(env)) { - if (dbp->locker == NULL && - (ret = __lock_id(env, NULL, &dbp->locker)) != 0) - return (ret); - locker = txn == NULL ? dbp->locker : txn->locker; - } - - /* - * In a transactional environment, we'll play the same game we play - * for databases in the file system -- create a temporary database - * and put it in with the current name and then rename this one to - * another name. We'll then use a commit-time event to remove the - * entry. - */ - if ((ret = - __fop_lock_handle(env, dbp, locker, DB_LOCK_WRITE, NULL, 0)) != 0) - return (ret); - - if (!IS_REAL_TXN(txn)) - ret = __memp_nameop(env, dbp->fileid, NULL, name, NULL, 1); - else if (LOGGING_ON(env)) { - if (txn != NULL && (ret = - __txn_remevent(env, txn, name, dbp->fileid, 1)) != 0) - return (ret); - - DB_INIT_DBT(name_dbt, name, strlen(name) + 1); - DB_INIT_DBT(fid_dbt, dbp->fileid, DB_FILE_ID_LEN); - ret = __crdel_inmem_remove_log( - env, txn, &lsn, 0, &name_dbt, &fid_dbt); - } - - return (ret); -} - -/* - * __db_subdb_remove -- - * Remove a subdatabase. - */ -static int -__db_subdb_remove(dbp, ip, txn, name, subdb) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb; -{ - DB *mdbp, *sdbp; - int ret, t_ret; - - mdbp = sdbp = NULL; - - /* Open the subdatabase. */ - if ((ret = __db_create_internal(&sdbp, dbp->env, 0)) != 0) - goto err; - if (F_ISSET(dbp, DB_AM_NOT_DURABLE) && - (ret = __db_set_flags(sdbp, DB_TXN_NOT_DURABLE)) != 0) - goto err; - if ((ret = __db_open(sdbp, ip, - txn, name, subdb, DB_UNKNOWN, DB_WRITEOPEN, 0, PGNO_BASE_MD)) != 0) - goto err; - - DB_TEST_RECOVERY(sdbp, DB_TEST_PREDESTROY, ret, name); - - /* Free up the pages in the subdatabase. */ - switch (sdbp->type) { - case DB_BTREE: - case DB_RECNO: - if ((ret = __bam_reclaim(sdbp, ip, txn)) != 0) - goto err; - break; - case DB_HASH: - if ((ret = __ham_reclaim(sdbp, ip, txn)) != 0) - goto err; - break; - case DB_QUEUE: - case DB_UNKNOWN: - default: - ret = __db_unknown_type( - sdbp->env, "__db_subdb_remove", sdbp->type); - goto err; - } - - /* - * Remove the entry from the main database and free the subdatabase - * metadata page. - */ - if ((ret = __db_master_open(sdbp, ip, txn, name, 0, 0, &mdbp)) != 0) - goto err; - - if ((ret = __db_master_update(mdbp, - sdbp, ip, txn, subdb, sdbp->type, MU_REMOVE, NULL, 0)) != 0) - goto err; - - DB_TEST_RECOVERY(sdbp, DB_TEST_POSTDESTROY, ret, name); - -DB_TEST_RECOVERY_LABEL -err: - /* Close the main and subdatabases. */ - if ((t_ret = __db_close(sdbp, txn, DB_NOSYNC)) != 0 && ret == 0) - ret = t_ret; - - if (mdbp != NULL && - (t_ret = __db_close(mdbp, txn, DB_NOSYNC)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -static int -__db_dbtxn_remove(dbp, ip, txn, name, subdb) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb; -{ - ENV *env; - int ret; - char *tmpname; - - env = dbp->env; - tmpname = NULL; - - /* - * This is a transactional remove, so we have to keep the name - * of the file locked until the transaction commits. As a result, - * we implement remove by renaming the file to some other name - * (which creates a dummy named file as a placeholder for the - * file being rename/dremoved) and then deleting that file as - * a delayed remove at commit. - */ - if ((ret = __db_backup_name(env, - F_ISSET(dbp, DB_AM_INMEM) ? subdb : name, txn, &tmpname)) != 0) - return (ret); - - DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, name); - - if ((ret = __db_rename_int(dbp, - txn->thread_info, txn, name, subdb, tmpname)) != 0) - goto err; - - /* - * The internal removes will also translate into delayed removes. - */ - if (dbp->db_am_remove != NULL && - (ret = dbp->db_am_remove(dbp, ip, txn, tmpname, NULL, 0)) != 0) - goto err; - - ret = F_ISSET(dbp, DB_AM_INMEM) ? - __db_inmem_remove(dbp, txn, tmpname) : - __fop_remove(env, - txn, dbp->fileid, tmpname, &dbp->dirname, DB_APP_DATA, - F_ISSET(dbp, DB_AM_NOT_DURABLE) ? DB_LOG_NOT_DURABLE : 0); - - DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, name); - -err: -DB_TEST_RECOVERY_LABEL - if (tmpname != NULL) - __os_free(env, tmpname); - - return (ret); -} diff --git a/db/db_rename.c b/db/db_rename.c deleted file mode 100644 index 1fdf721..0000000 --- a/db/db_rename.c +++ /dev/null @@ -1,372 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2001, 2010 Oracle and/or its affiliates. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_am.h" -#include "dbinc/fop.h" -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/txn.h" - -static int __db_rename __P((DB *, DB_THREAD_INFO *, - DB_TXN *, const char *, const char *, const char *)); -static int __db_subdb_rename __P((DB *, DB_THREAD_INFO *, - DB_TXN *, const char *, const char *, const char *)); - -/* - * __env_dbrename_pp - * ENV->dbrename pre/post processing. - * - * PUBLIC: int __env_dbrename_pp __P((DB_ENV *, DB_TXN *, - * PUBLIC: const char *, const char *, const char *, u_int32_t)); - */ -int -__env_dbrename_pp(dbenv, txn, name, subdb, newname, flags) - DB_ENV *dbenv; - DB_TXN *txn; - const char *name, *subdb, *newname; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret, txn_local; - - env = dbenv->env; - dbp = NULL; - txn_local = 0; - - ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->dbrename"); - - /* - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if ((ret = __db_fchk(env, "DB->rename", flags, DB_AUTO_COMMIT)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __env_rep_enter(env, 1)) != 0) { - handle_check = 0; - goto err; - } - - /* - * Create local transaction as necessary, check for consistent - * transaction usage. - */ - if (IS_ENV_AUTO_COMMIT(env, txn, flags)) { - if ((ret = __db_txn_auto_init(env, ip, &txn)) != 0) - goto err; - txn_local = 1; - } else - if (txn != NULL && !TXN_ON(env) && - (!CDB_LOCKING(env) || !F_ISSET(txn, TXN_CDSGROUP))) { - ret = __db_not_txn_env(env); - goto err; - } - - LF_CLR(DB_AUTO_COMMIT); - - if ((ret = __db_create_internal(&dbp, env, 0)) != 0) - goto err; - - ret = __db_rename_int(dbp, ip, txn, name, subdb, newname); - - if (txn_local) { - /* - * We created the DBP here and when we commit/abort, we'll - * release all the transactional locks, including the handle - * lock; mark the handle cleared explicitly. - */ - LOCK_INIT(dbp->handle_lock); - dbp->locker = NULL; - } else if (txn != NULL) { - /* - * We created this handle locally so we need to close it and - * clean it up. Unfortunately, it's holding transactional - * or CDS group locks that need to persist until the end of - * transaction. If we invalidate the locker (dbp->locker), - * then the close won't free these locks prematurely. - */ - dbp->locker = NULL; - } - -err: if (txn_local && (t_ret = - __db_txn_auto_resolve(env, txn, 0, ret)) != 0 && ret == 0) - ret = t_ret; - - /* - * We never opened this dbp for real, so don't include a transaction - * handle, and use NOSYNC to avoid calling into mpool. - * - * !!! - * Note we're reversing the order of operations: we started the txn and - * then opened the DB handle; we're resolving the txn and then closing - * closing the DB handle -- it's safer. - */ - if (dbp != NULL && - (t_ret = __db_close(dbp, NULL, DB_NOSYNC)) != 0 && ret == 0) - ret = t_ret; - - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_rename_pp - * DB->rename pre/post processing. - * - * PUBLIC: int __db_rename_pp __P((DB *, - * PUBLIC: const char *, const char *, const char *, u_int32_t)); - */ -int -__db_rename_pp(dbp, name, subdb, newname, flags) - DB *dbp; - const char *name, *subdb, *newname; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - handle_check = 0; - - /* - * Validate arguments, continuing to destroy the handle on failure. - * - * Cannot use DB_ILLEGAL_AFTER_OPEN directly because it returns. - * - * !!! - * We have a serious problem if we're here with a handle used to open - * a database -- we'll destroy the handle, and the application won't - * ever be able to close the database. - */ - if (F_ISSET(dbp, DB_AM_OPEN_CALLED)) - return (__db_mi_open(env, "DB->rename", 1)); - - /* Validate arguments. */ - if ((ret = __db_fchk(env, "DB->rename", flags, 0)) != 0) - return (ret); - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, NULL, DB_LOCK_INVALIDID, 0)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __db_rep_enter(dbp, 1, 1, 0)) != 0) { - handle_check = 0; - goto err; - } - - /* Rename the file. */ - ret = __db_rename(dbp, ip, NULL, name, subdb, newname); - - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; -err: ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_rename - * DB->rename method. - * - */ -static int -__db_rename(dbp, ip, txn, name, subdb, newname) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb, *newname; -{ - int ret, t_ret; - - ret = __db_rename_int(dbp, ip, txn, name, subdb, newname); - - if ((t_ret = __db_close(dbp, txn, DB_NOSYNC)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_rename_int - * Worker function for DB->rename method; the close of the dbp is - * left in the wrapper routine. - * - * PUBLIC: int __db_rename_int __P((DB *, DB_THREAD_INFO *, - * PUBLIC: DB_TXN *, const char *, const char *, const char *)); - */ -int -__db_rename_int(dbp, ip, txn, name, subdb, newname) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb, *newname; -{ - ENV *env; - int ret; - char *old, *real_name; - - env = dbp->env; - real_name = NULL; - - DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, name); - - if (name == NULL && subdb == NULL) { - __db_errx(env, "Rename on temporary files invalid"); - ret = EINVAL; - goto err; - } - - if (name == NULL) - MAKE_INMEM(dbp); - else if (subdb != NULL) { - ret = __db_subdb_rename(dbp, ip, txn, name, subdb, newname); - goto err; - } - - /* - * From here on down, this pertains to files or in-memory databases. - * - * Find the real name of the file. - */ - if (F_ISSET(dbp, DB_AM_INMEM)) { - old = (char *)subdb; - real_name = (char *)subdb; - } else { - if ((ret = __db_appname(env, DB_APP_DATA, - name, &dbp->dirname, &real_name)) != 0) - goto err; - old = (char *)name; - } - DB_ASSERT(env, old != NULL); - - if ((ret = __fop_remove_setup(dbp, txn, real_name, 0)) != 0) - goto err; - - if (dbp->db_am_rename != NULL && - (ret = dbp->db_am_rename(dbp, ip, txn, name, subdb, newname)) != 0) - goto err; - - /* - * The transactional case and non-transactional case are - * quite different. In the non-transactional case, we simply - * do the rename. In the transactional case, since we need - * the ability to back out and maintain locking, we have to - * create a temporary object as a placeholder. This is all - * taken care of in the fop layer. - */ - if (IS_REAL_TXN(txn)) { - if ((ret = __fop_dummy(dbp, txn, old, newname)) != 0) - goto err; - } else { - if ((ret = __fop_dbrename(dbp, old, newname)) != 0) - goto err; - } - - /* - * I am pretty sure that we haven't gotten a dbreg id, so calling - * dbreg_filelist_update is not necessary. - */ - DB_ASSERT(env, dbp->log_filename == NULL || - dbp->log_filename->id == DB_LOGFILEID_INVALID); - - DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, newname); - -DB_TEST_RECOVERY_LABEL -err: if (!F_ISSET(dbp, DB_AM_INMEM) && real_name != NULL) - __os_free(env, real_name); - - return (ret); -} - -/* - * __db_subdb_rename -- - * Rename a subdatabase. - */ -static int -__db_subdb_rename(dbp, ip, txn, name, subdb, newname) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb, *newname; -{ - DB *mdbp; - ENV *env; - PAGE *meta; - int ret, t_ret; - - mdbp = NULL; - meta = NULL; - env = dbp->env; - - /* - * We have not opened this dbp so it isn't marked as a subdb, - * but it ought to be. - */ - F_SET(dbp, DB_AM_SUBDB); - - /* - * Rename the entry in the main database. We need to first - * get the meta-data page number (via MU_OPEN) so that we can - * read the meta-data page and obtain a handle lock. Once we've - * done that, we can proceed to do the rename in the master. - */ - if ((ret = __db_master_open(dbp, ip, txn, name, 0, 0, &mdbp)) != 0) - goto err; - - if ((ret = __db_master_update(mdbp, dbp, ip, txn, subdb, dbp->type, - MU_OPEN, NULL, 0)) != 0) - goto err; - - if ((ret = __memp_fget(mdbp->mpf, &dbp->meta_pgno, - ip, txn, 0, &meta)) != 0) - goto err; - memcpy(dbp->fileid, ((DBMETA *)meta)->uid, DB_FILE_ID_LEN); - if ((ret = __fop_lock_handle(env, - dbp, mdbp->locker, DB_LOCK_WRITE, NULL, NOWAIT_FLAG(txn))) != 0) - goto err; - - ret = __memp_fput(mdbp->mpf, ip, meta, dbp->priority); - meta = NULL; - if (ret != 0) - goto err; - - if ((ret = __db_master_update(mdbp, dbp, ip, txn, - subdb, dbp->type, MU_RENAME, newname, 0)) != 0) - goto err; - - DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, name); - -DB_TEST_RECOVERY_LABEL -err: - if (meta != NULL && (t_ret = - __memp_fput(mdbp->mpf, ip, meta, dbp->priority)) != 0 && ret == 0) - ret = t_ret; - - if (mdbp != NULL && - (t_ret = __db_close(mdbp, txn, DB_NOSYNC)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} diff --git a/db/db_ret.c b/db/db_ret.c deleted file mode 100644 index 5ff60d1..0000000 --- a/db/db_ret.c +++ /dev/null @@ -1,156 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_am.h" - -/* - * __db_ret -- - * Build return DBT. - * - * PUBLIC: int __db_ret __P((DBC *, - * PUBLIC: PAGE *, u_int32_t, DBT *, void **, u_int32_t *)); - */ -int -__db_ret(dbc, h, indx, dbt, memp, memsize) - DBC *dbc; - PAGE *h; - u_int32_t indx; - DBT *dbt; - void **memp; - u_int32_t *memsize; -{ - BKEYDATA *bk; - BOVERFLOW *bo; - DB *dbp; - HOFFPAGE ho; - u_int32_t len; - u_int8_t *hk; - void *data; - - dbp = dbc->dbp; - - switch (TYPE(h)) { - case P_HASH_UNSORTED: - case P_HASH: - hk = P_ENTRY(dbp, h, indx); - if (HPAGE_PTYPE(hk) == H_OFFPAGE) { - memcpy(&ho, hk, sizeof(HOFFPAGE)); - return (__db_goff(dbc, dbt, - ho.tlen, ho.pgno, memp, memsize)); - } - len = LEN_HKEYDATA(dbp, h, dbp->pgsize, indx); - data = HKEYDATA_DATA(hk); - break; - case P_LBTREE: - case P_LDUP: - case P_LRECNO: - bk = GET_BKEYDATA(dbp, h, indx); - if (B_TYPE(bk->type) == B_OVERFLOW) { - bo = (BOVERFLOW *)bk; - return (__db_goff(dbc, dbt, - bo->tlen, bo->pgno, memp, memsize)); - } - len = bk->len; - data = bk->data; - break; - default: - return (__db_pgfmt(dbp->env, h->pgno)); - } - - return (__db_retcopy(dbp->env, dbt, data, len, memp, memsize)); -} - -/* - * __db_retcopy -- - * Copy the returned data into the user's DBT, handling special flags. - * - * PUBLIC: int __db_retcopy __P((ENV *, DBT *, - * PUBLIC: void *, u_int32_t, void **, u_int32_t *)); - */ -int -__db_retcopy(env, dbt, data, len, memp, memsize) - ENV *env; - DBT *dbt; - void *data; - u_int32_t len; - void **memp; - u_int32_t *memsize; -{ - int ret; - - ret = 0; - - /* If returning a partial record, reset the length. */ - if (F_ISSET(dbt, DB_DBT_PARTIAL)) { - data = (u_int8_t *)data + dbt->doff; - if (len > dbt->doff) { - len -= dbt->doff; - if (len > dbt->dlen) - len = dbt->dlen; - } else - len = 0; - } - - /* - * Allocate memory to be owned by the application: DB_DBT_MALLOC, - * DB_DBT_REALLOC. - * - * !!! - * We always allocate memory, even if we're copying out 0 bytes. This - * guarantees consistency, i.e., the application can always free memory - * without concern as to how many bytes of the record were requested. - * - * Use the memory specified by the application: DB_DBT_USERMEM. - * - * !!! - * If the length we're going to copy is 0, the application-supplied - * memory pointer is allowed to be NULL. - */ - if (F_ISSET(dbt, DB_DBT_USERCOPY)) { - dbt->size = len; - return (len == 0 ? 0 : env->dbt_usercopy(dbt, 0, data, - len, DB_USERCOPY_SETDATA)); - - } else if (F_ISSET(dbt, DB_DBT_MALLOC)) - ret = __os_umalloc(env, len, &dbt->data); - else if (F_ISSET(dbt, DB_DBT_REALLOC)) { - if (dbt->data == NULL || dbt->size == 0 || dbt->size < len) - ret = __os_urealloc(env, len, &dbt->data); - } else if (F_ISSET(dbt, DB_DBT_USERMEM)) { - if (len != 0 && (dbt->data == NULL || dbt->ulen < len)) - ret = DB_BUFFER_SMALL; - } else if (memp == NULL || memsize == NULL) - ret = EINVAL; - else { - if (len != 0 && (*memsize == 0 || *memsize < len)) { - if ((ret = __os_realloc(env, len, memp)) == 0) - *memsize = len; - else - *memsize = 0; - } - if (ret == 0) - dbt->data = *memp; - } - - if (ret == 0 && len != 0) - memcpy(dbt->data, data, len); - - /* - * Return the length of the returned record in the DBT size field. - * This satisfies the requirement that if we're using user memory - * and insufficient memory was provided, return the amount necessary - * in the size field. - */ - dbt->size = len; - - return (ret); -} diff --git a/db/db_setid.c b/db/db_setid.c deleted file mode 100644 index a78977e..0000000 --- a/db/db_setid.c +++ /dev/null @@ -1,213 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2000-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_swap.h" -#include "dbinc/db_am.h" -#include "dbinc/mp.h" - -/* - * __env_fileid_reset_pp -- - * ENV->fileid_reset pre/post processing. - * - * PUBLIC: int __env_fileid_reset_pp __P((DB_ENV *, const char *, u_int32_t)); - */ -int -__env_fileid_reset_pp(dbenv, name, flags) - DB_ENV *dbenv; - const char *name; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int ret; - - env = dbenv->env; - - ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->fileid_reset"); - - /* - * !!! - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if (flags != 0 && flags != DB_ENCRYPT) - return (__db_ferr(env, "DB_ENV->fileid_reset", 0)); - - ENV_ENTER(env, ip); - REPLICATION_WRAP(env, - (__env_fileid_reset(env, ip, name, LF_ISSET(DB_ENCRYPT) ? 1 : 0)), - 1, ret); - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __env_fileid_reset -- - * Reset the file IDs for every database in the file. - * PUBLIC: int __env_fileid_reset - * PUBLIC: __P((ENV *, DB_THREAD_INFO *, const char *, int)); - */ -int -__env_fileid_reset(env, ip, name, encrypted) - ENV *env; - DB_THREAD_INFO *ip; - const char *name; - int encrypted; -{ - DB *dbp; - DBC *dbcp; - DBMETA *meta; - DBT key, data; - DB_FH *fhp; - DB_MPOOLFILE *mpf; - DB_PGINFO cookie; - db_pgno_t pgno; - int t_ret, ret; - size_t n; - char *real_name; - u_int8_t fileid[DB_FILE_ID_LEN], mbuf[DBMETASIZE]; - void *pagep; - - dbp = NULL; - dbcp = NULL; - fhp = NULL; - real_name = NULL; - - /* Get the real backing file name. */ - if ((ret = __db_appname(env, - DB_APP_DATA, name, NULL, &real_name)) != 0) - return (ret); - - /* Get a new file ID. */ - if ((ret = __os_fileid(env, real_name, 1, fileid)) != 0) - goto err; - - /* - * The user may have physically copied a file currently open in the - * cache, which means if we open this file through the cache before - * updating the file ID on page 0, we might connect to the file from - * which the copy was made. - */ - if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) { - __db_err(env, ret, "%s", real_name); - goto err; - } - if ((ret = __os_read(env, fhp, mbuf, sizeof(mbuf), &n)) != 0) - goto err; - - if (n != sizeof(mbuf)) { - ret = EINVAL; - __db_errx(env, - "__env_fileid_reset: %s: unexpected file type or format", - real_name); - goto err; - } - - /* - * Create the DB object. - */ - if ((ret = __db_create_internal(&dbp, env, 0)) != 0) - goto err; - - /* If configured with a password, the databases are encrypted. */ - if (encrypted && (ret = __db_set_flags(dbp, DB_ENCRYPT)) != 0) - goto err; - - if ((ret = __db_meta_setup(env, - dbp, real_name, (DBMETA *)mbuf, 0, DB_CHK_META)) != 0) - goto err; - - meta = (DBMETA *)mbuf; - if (FLD_ISSET(meta->metaflags, - DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) && (ret = - __part_fileid_reset(env, ip, name, meta->nparts, encrypted)) != 0) - goto err; - - memcpy(meta->uid, fileid, DB_FILE_ID_LEN); - cookie.db_pagesize = sizeof(mbuf); - cookie.flags = dbp->flags; - cookie.type = dbp->type; - key.data = &cookie; - - if ((ret = __db_pgout(env->dbenv, 0, mbuf, &key)) != 0) - goto err; - if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) - goto err; - if ((ret = __os_write(env, fhp, mbuf, sizeof(mbuf), &n)) != 0) - goto err; - if ((ret = __os_fsync(env, fhp)) != 0) - goto err; - - /* - * Page 0 of the file has an updated file ID, and we can open it in - * the cache without connecting to a different, existing file. Open - * the file in the cache, and update the file IDs for subdatabases. - * (No existing code, as far as I know, actually uses the file ID of - * a subdatabase, but it's cleaner to get them all.) - */ - - /* - * If the database file doesn't support subdatabases, we only have - * to update a single metadata page. Otherwise, we have to open a - * cursor and step through the master database, and update all of - * the subdatabases' metadata pages. - */ - if (meta->type != P_BTREEMETA || !F_ISSET(meta, BTM_SUBDB)) - goto err; - - /* - * Open the DB file. - * - * !!! - * Note DB_RDWRMASTER flag, we need to open the master database file - * for writing in this case. - */ - if ((ret = __db_open(dbp, ip, NULL, - name, NULL, DB_UNKNOWN, DB_RDWRMASTER, 0, PGNO_BASE_MD)) != 0) - goto err; - - mpf = dbp->mpf; - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - if ((ret = __db_cursor(dbp, ip, NULL, &dbcp, 0)) != 0) - goto err; - while ((ret = __dbc_get(dbcp, &key, &data, DB_NEXT)) == 0) { - /* - * XXX - * We're handling actual data, not on-page meta-data, so it - * hasn't been converted to/from opposite endian architectures. - * Do it explicitly, now. - */ - memcpy(&pgno, data.data, sizeof(db_pgno_t)); - DB_NTOHL_SWAP(env, &pgno); - if ((ret = __memp_fget(mpf, &pgno, ip, NULL, - DB_MPOOL_DIRTY, &pagep)) != 0) - goto err; - memcpy(((DBMETA *)pagep)->uid, fileid, DB_FILE_ID_LEN); - if ((ret = __memp_fput(mpf, ip, pagep, dbcp->priority)) != 0) - goto err; - } - if (ret == DB_NOTFOUND) - ret = 0; - -err: if (dbcp != NULL && (t_ret = __dbc_close(dbcp)) != 0 && ret == 0) - ret = t_ret; - if (dbp != NULL && (t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0) - ret = t_ret; - if (fhp != NULL && - (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) - ret = t_ret; - if (real_name != NULL) - __os_free(env, real_name); - - return (ret); -} diff --git a/db/db_setlsn.c b/db/db_setlsn.c deleted file mode 100644 index 51ee7d3..0000000 --- a/db/db_setlsn.c +++ /dev/null @@ -1,137 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2000-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_am.h" -#include "dbinc/mp.h" -#include "dbinc/partition.h" -#include "dbinc/qam.h" - -static int __env_lsn_reset __P((ENV *, DB_THREAD_INFO *, const char *, int)); - -/* - * __env_lsn_reset_pp -- - * ENV->lsn_reset pre/post processing. - * - * PUBLIC: int __env_lsn_reset_pp __P((DB_ENV *, const char *, u_int32_t)); - */ -int -__env_lsn_reset_pp(dbenv, name, flags) - DB_ENV *dbenv; - const char *name; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int ret; - - env = dbenv->env; - - ENV_ILLEGAL_BEFORE_OPEN(env, "DB_ENV->lsn_reset"); - - /* - * !!! - * The actual argument checking is simple, do it inline, outside of - * the replication block. - */ - if (flags != 0 && flags != DB_ENCRYPT) - return (__db_ferr(env, "DB_ENV->lsn_reset", 0)); - - ENV_ENTER(env, ip); - REPLICATION_WRAP(env, - (__env_lsn_reset(env, ip, name, LF_ISSET(DB_ENCRYPT) ? 1 : 0)), - 1, ret); - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __env_lsn_reset -- - * Reset the LSNs for every page in the file. - */ -static int -__env_lsn_reset(env, ip, name, encrypted) - ENV *env; - DB_THREAD_INFO *ip; - const char *name; - int encrypted; -{ - DB *dbp; - int t_ret, ret; - - /* Create the DB object. */ - if ((ret = __db_create_internal(&dbp, env, 0)) != 0) - return (ret); - - /* If configured with a password, the databases are encrypted. */ - if (encrypted && (ret = __db_set_flags(dbp, DB_ENCRYPT)) != 0) - goto err; - - /* - * Open the DB file. - * - * !!! - * Note DB_RDWRMASTER flag, we need to open the master database file - * for writing in this case. - */ - if ((ret = __db_open(dbp, ip, NULL, - name, NULL, DB_UNKNOWN, DB_RDWRMASTER, 0, PGNO_BASE_MD)) != 0) { - __db_err(env, ret, "%s", name); - goto err; - } - - ret = __db_lsn_reset(dbp->mpf, ip); -#ifdef HAVE_PARTITION - if (ret == 0 && DB_IS_PARTITIONED(dbp)) - ret = __part_lsn_reset(dbp, ip); - else -#endif - if (ret == 0 && dbp->type == DB_QUEUE) -#ifdef HAVE_QUEUE - ret = __qam_lsn_reset(dbp, ip); -#else - ret = __db_no_queue_am(env); -#endif - -err: if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __db_lsn_reset -- reset the lsn for a db mpool handle. - * PUBLIC: int __db_lsn_reset __P((DB_MPOOLFILE *, DB_THREAD_INFO *)); - */ -int -__db_lsn_reset(mpf, ip) - DB_MPOOLFILE *mpf; - DB_THREAD_INFO *ip; -{ - PAGE *pagep; - db_pgno_t pgno; - int ret; - - /* Reset the LSN on every page of the database file. */ - for (pgno = 0; - (ret = __memp_fget(mpf, - &pgno, ip, NULL, DB_MPOOL_DIRTY, &pagep)) == 0; - ++pgno) { - LSN_NOT_LOGGED(pagep->lsn); - if ((ret = __memp_fput(mpf, - ip, pagep, DB_PRIORITY_UNCHANGED)) != 0) - break; - } - - if (ret == DB_PAGE_NOTFOUND) - ret = 0; - - return (ret); -} diff --git a/db/db_sort_multiple.c b/db/db_sort_multiple.c deleted file mode 100644 index 32ae2df..0000000 --- a/db/db_sort_multiple.c +++ /dev/null @@ -1,287 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" - -static int __db_quicksort __P((DB *, DBT *, DBT *, u_int32_t *, u_int32_t *, - u_int32_t *, u_int32_t *, u_int32_t)); - -/* - * __db_compare_both -- - * Use the comparison functions from db to compare akey and bkey, and if - * DB_DUPSORT adata and bdata. - * - * PUBLIC: int __db_compare_both __P((DB *, const DBT *, const DBT *, - * PUBLIC: const DBT *, const DBT *)); - */ -int -__db_compare_both(db, akey, adata, bkey, bdata) - DB *db; - const DBT *akey; - const DBT *adata; - const DBT *bkey; - const DBT *bdata; -{ - BTREE *t; - int cmp; - - t = (BTREE *)db->bt_internal; - - cmp = t->bt_compare(db, akey, bkey); - if (cmp != 0) return cmp; - if (!F_ISSET(db, DB_AM_DUPSORT)) return 0; - - if (adata == 0) return bdata == 0 ? 0 : -1; - if (bdata == 0) return 1; - -#ifdef HAVE_COMPRESSION - if (DB_IS_COMPRESSED(db)) - return t->compress_dup_compare(db, adata, bdata); -#endif - return db->dup_compare(db, adata, bdata); -} - -#define DB_SORT_SWAP(a, ad, b, bd) \ -do { \ - tmp = (a)[0]; (a)[0] = (b)[0]; (b)[0] = tmp; \ - tmp = (a)[-1]; (a)[-1] = (b)[-1]; (b)[-1] = tmp; \ - if (data != NULL) { \ - tmp = (ad)[0]; (ad)[0] = (bd)[0]; (bd)[0] = tmp; \ - tmp = (ad)[-1]; (ad)[-1] = (bd)[-1]; (bd)[-1] = tmp; \ - } \ -} while (0) - -#define DB_SORT_LOAD_DBT(a, ad, aptr, adptr) \ -do { \ - (a).data = (u_int8_t*)key->data + (aptr)[0]; \ - (a).size = (aptr)[-1]; \ - if (data != NULL) { \ - (ad).data = (u_int8_t*)data->data + (adptr)[0]; \ - (ad).size = (adptr)[-1]; \ - } \ -} while (0) - -#define DB_SORT_COMPARE(a, ad, b, bd) (data != NULL ? \ - __db_compare_both(db, &(a), &(ad), &(b), &(bd)) : \ - __db_compare_both(db, &(a), 0, &(b), 0)) - -#define DB_SORT_STACKSIZE 32 - -/* - * __db_quicksort -- - * The quicksort implementation for __db_sort_multiple() and - * __db_sort_multiple_key(). - */ -static int -__db_quicksort(db, key, data, kstart, kend, dstart, dend, size) - DB *db; - DBT *key, *data; - u_int32_t *kstart, *kend, *dstart, *dend; - u_int32_t size; -{ - int ret; - u_int32_t tmp; - u_int32_t *kmiddle, *dmiddle, *kptr, *dptr; - DBT a, ad, b, bd, m, md; - ENV *env; - - struct DB_SORT_quicksort_stack { - u_int32_t *kstart; - u_int32_t *kend; - u_int32_t *dstart; - u_int32_t *dend; - } stackbuf[DB_SORT_STACKSIZE], *stack; - u_int32_t soff, slen; - - ret = 0; - env = db->env; - - memset(&a, 0, sizeof(DBT)); - memset(&ad, 0, sizeof(DBT)); - memset(&b, 0, sizeof(DBT)); - memset(&bd, 0, sizeof(DBT)); - memset(&m, 0, sizeof(DBT)); - memset(&md, 0, sizeof(DBT)); - - /* NB end is smaller than start */ - - stack = stackbuf; - soff = 0; - slen = DB_SORT_STACKSIZE; - - start: - if (kend >= kstart) goto pop; - - /* If there's only one value, it's already sorted */ - tmp = (u_int32_t)(kstart - kend) / size; - if (tmp == 1) goto pop; - - DB_SORT_LOAD_DBT(a, ad, kstart, dstart); - DB_SORT_LOAD_DBT(b, bd, kend + size, dend + size); - - if (tmp == 2) { - /* Special case the sorting of two value sequences */ - if (DB_SORT_COMPARE(a, ad, b, bd) > 0) { - DB_SORT_SWAP(kstart, dstart, kend + size, dend + size); - } - goto pop; - } - - kmiddle = kstart - (tmp / 2) * size; - dmiddle = dstart - (tmp / 2) * size; - DB_SORT_LOAD_DBT(m, md, kmiddle, dmiddle); - - /* Find the median of three */ - if (DB_SORT_COMPARE(a, ad, b, bd) < 0) { - if (DB_SORT_COMPARE(m, md, a, ad) < 0) { - /* m < a < b */ - DB_SORT_SWAP(kstart, dstart, kend + size, dend + size); - } else if (DB_SORT_COMPARE(m, md, b, bd) < 0) { - /* a < m < b */ - DB_SORT_SWAP(kmiddle, - dmiddle, kend + size, dend + size); - } else { - /* a < b < m */ - /* Do nothing */ - } - } else { - if (DB_SORT_COMPARE(a, ad, m, md) < 0) { - /* b < a < m */ - DB_SORT_SWAP(kstart, dstart, kend + size, dend + size); - } else if (DB_SORT_COMPARE(b, bd, m, md) < 0) { - /* b < m < a */ - DB_SORT_SWAP(kmiddle, - dmiddle, kend + size, dend + size); - } else { - /* m < b < a */ - /* Do nothing */ - } - } - - /* partition */ - DB_SORT_LOAD_DBT(b, bd, kend + size, dend + size); - kmiddle = kstart; - dmiddle = dstart; - for (kptr = kstart, dptr = dstart; kptr > kend; - kptr -= size, dptr -= size) { - DB_SORT_LOAD_DBT(a, ad, kptr, dptr); - if (DB_SORT_COMPARE(a, ad, b, bd) < 0) { - DB_SORT_SWAP(kmiddle, dmiddle, kptr, dptr); - kmiddle -= size; - dmiddle -= size; - } - } - - DB_SORT_SWAP(kmiddle, dmiddle, kend + size, dend + size); - - if (soff == slen) { - /* Grow the stack */ - slen = slen * 2; - if (stack == stackbuf) { - ret = __os_malloc(env, slen * - sizeof(struct DB_SORT_quicksort_stack), &stack); - if (ret != 0) goto error; - memcpy(stack, stackbuf, soff * - sizeof(struct DB_SORT_quicksort_stack)); - } else { - ret = __os_realloc(env, slen * - sizeof(struct DB_SORT_quicksort_stack), &stack); - if (ret != 0) goto error; - } - } - - /* divide and conquer */ - stack[soff].kstart = kmiddle - size; - stack[soff].kend = kend; - stack[soff].dstart = dmiddle - size; - stack[soff].dend = dend; - ++soff; - - kend = kmiddle; - dend = dmiddle; - - goto start; - - pop: - if (soff != 0) { - --soff; - kstart = stack[soff].kstart; - kend = stack[soff].kend; - dstart = stack[soff].dstart; - dend = stack[soff].dend; - goto start; - } - - error: - if (stack != stackbuf) - __os_free(env, stack); - - return ret; -} - -#undef DB_SORT_SWAP -#undef DB_SORT_LOAD_DBT - -/* - * __db_sort_multiple -- - * If flags == DB_MULTIPLE_KEY, sorts a DB_MULTIPLE_KEY format DBT using - * the BTree comparison function and duplicate comparison function. - * - * If flags == DB_MULTIPLE, sorts one or two DB_MULTIPLE format DBTs using - * the BTree comparison function and duplicate comparison function. Will - * assume key and data specifies pairs of key/data to sort together. If - * data is NULL, will just sort key according to the btree comparison - * function. - * - * Uses an in-place quicksort algorithm, with median of three for the pivot - * point. - * - * PUBLIC: int __db_sort_multiple __P((DB *, DBT *, DBT *, u_int32_t)); - */ -int -__db_sort_multiple(db, key, data, flags) - DB *db; - DBT *key, *data; - u_int32_t flags; -{ - u_int32_t *kstart, *kend, *dstart, *dend; - - /* TODO: sanity checks on the DBTs */ - /* DB_ILLEGAL_METHOD(db, DB_OK_BTREE); */ - - kstart = (u_int32_t*)((u_int8_t *)key->data + key->ulen) - 1; - - switch (flags) { - case DB_MULTIPLE: - if (data != NULL) - dstart = (u_int32_t*)((u_int8_t *)data->data + - data->ulen) - 1; - else - dstart = kstart; - - /* Find the end */ - for (kend = kstart, dend = dstart; - *kend != (u_int32_t)-1 && *dend != (u_int32_t)-1; - kend -= 2, dend -= 2) - ; - - return (__db_quicksort(db, key, data, kstart, kend, dstart, - dend, 2)); - case DB_MULTIPLE_KEY: - /* Find the end */ - for (kend = kstart; *kend != (u_int32_t)-1; kend -= 4) - ; - - return (__db_quicksort(db, key, key, kstart, kend, kstart - 2, - kend - 2, 4)); - default: - return (__db_ferr(db->env, "DB->sort_multiple", 0)); - } -} diff --git a/db/db_stati.c b/db/db_stati.c deleted file mode 100644 index b8d3a3f..0000000 --- a/db/db_stati.c +++ /dev/null @@ -1,494 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/qam.h" -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/partition.h" - -#ifdef HAVE_STATISTICS -static int __db_print_all __P((DB *, u_int32_t)); -static int __db_print_citem __P((DBC *)); -static int __db_print_cursor __P((DB *)); -static int __db_print_stats __P((DB *, DB_THREAD_INFO *, u_int32_t)); -static int __db_stat __P((DB *, DB_THREAD_INFO *, DB_TXN *, void *, u_int32_t)); -static int __db_stat_arg __P((DB *, u_int32_t)); - -/* - * __db_stat_pp -- - * DB->stat pre/post processing. - * - * PUBLIC: int __db_stat_pp __P((DB *, DB_TXN *, void *, u_int32_t)); - */ -int -__db_stat_pp(dbp, txn, spp, flags) - DB *dbp; - DB_TXN *txn; - void *spp; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat"); - - if ((ret = __db_stat_arg(dbp, flags)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, - txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - ret = __db_stat(dbp, ip, txn, spp, flags); - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - -err: ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_stat -- - * DB->stat. - * - */ -static int -__db_stat(dbp, ip, txn, spp, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - void *spp; - u_int32_t flags; -{ - DBC *dbc; - ENV *env; - int ret, t_ret; - - env = dbp->env; - - /* Acquire a cursor. */ - if ((ret = __db_cursor(dbp, ip, txn, - &dbc, LF_ISSET(DB_READ_COMMITTED | DB_READ_UNCOMMITTED))) != 0) - return (ret); - - DEBUG_LWRITE(dbc, NULL, "DB->stat", NULL, NULL, flags); - LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED); -#ifdef HAVE_PARTITION - if (DB_IS_PARTITIONED(dbp)) - ret = __partition_stat(dbc, spp, flags); - else -#endif - switch (dbp->type) { - case DB_BTREE: - case DB_RECNO: - ret = __bam_stat(dbc, spp, flags); - break; - case DB_HASH: - ret = __ham_stat(dbc, spp, flags); - break; - case DB_QUEUE: - ret = __qam_stat(dbc, spp, flags); - break; - case DB_UNKNOWN: - default: - ret = (__db_unknown_type(env, "DB->stat", dbp->type)); - break; - } - - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_stat_arg -- - * Check DB->stat arguments. - */ -static int -__db_stat_arg(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - ENV *env; - - env = dbp->env; - - /* Check for invalid function flags. */ - LF_CLR(DB_READ_COMMITTED | DB_READ_UNCOMMITTED); - switch (flags) { - case 0: - case DB_FAST_STAT: - break; - default: - return (__db_ferr(env, "DB->stat", 0)); - } - - return (0); -} - -/* - * __db_stat_print_pp -- - * DB->stat_print pre/post processing. - * - * PUBLIC: int __db_stat_print_pp __P((DB *, u_int32_t)); - */ -int -__db_stat_print_pp(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret; - - env = dbp->env; - - DB_ILLEGAL_BEFORE_OPEN(dbp, "DB->stat_print"); - - /* - * !!! - * The actual argument checking is simple, do it inline. - */ - if ((ret = __db_fchk(env, - "DB->stat_print", flags, DB_FAST_STAT | DB_STAT_ALL)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && (ret = __db_rep_enter(dbp, 1, 0, 0)) != 0) { - handle_check = 0; - goto err; - } - - ret = __db_stat_print(dbp, ip, flags); - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - -err: ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_stat_print -- - * DB->stat_print. - * - * PUBLIC: int __db_stat_print __P((DB *, DB_THREAD_INFO *, u_int32_t)); - */ -int -__db_stat_print(dbp, ip, flags) - DB *dbp; - DB_THREAD_INFO *ip; - u_int32_t flags; -{ - time_t now; - int ret; - char time_buf[CTIME_BUFLEN]; - - (void)time(&now); - __db_msg(dbp->env, "%.24s\tLocal time", __os_ctime(&now, time_buf)); - - if (LF_ISSET(DB_STAT_ALL) && (ret = __db_print_all(dbp, flags)) != 0) - return (ret); - - if ((ret = __db_print_stats(dbp, ip, flags)) != 0) - return (ret); - - return (0); -} - -/* - * __db_print_stats -- - * Display default DB handle statistics. - */ -static int -__db_print_stats(dbp, ip, flags) - DB *dbp; - DB_THREAD_INFO *ip; - u_int32_t flags; -{ - DBC *dbc; - ENV *env; - int ret, t_ret; - - env = dbp->env; - - /* Acquire a cursor. */ - if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, NULL, "DB->stat_print", NULL, NULL, 0); - - switch (dbp->type) { - case DB_BTREE: - case DB_RECNO: - ret = __bam_stat_print(dbc, flags); - break; - case DB_HASH: - ret = __ham_stat_print(dbc, flags); - break; - case DB_QUEUE: - ret = __qam_stat_print(dbc, flags); - break; - case DB_UNKNOWN: - default: - ret = (__db_unknown_type(env, "DB->stat_print", dbp->type)); - break; - } - - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_print_all -- - * Display debugging DB handle statistics. - */ -static int -__db_print_all(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - static const FN fn[] = { - { DB_AM_CHKSUM, "DB_AM_CHKSUM" }, - { DB_AM_COMPENSATE, "DB_AM_COMPENSATE" }, - { DB_AM_CREATED, "DB_AM_CREATED" }, - { DB_AM_CREATED_MSTR, "DB_AM_CREATED_MSTR" }, - { DB_AM_DBM_ERROR, "DB_AM_DBM_ERROR" }, - { DB_AM_DELIMITER, "DB_AM_DELIMITER" }, - { DB_AM_DISCARD, "DB_AM_DISCARD" }, - { DB_AM_DUP, "DB_AM_DUP" }, - { DB_AM_DUPSORT, "DB_AM_DUPSORT" }, - { DB_AM_ENCRYPT, "DB_AM_ENCRYPT" }, - { DB_AM_FIXEDLEN, "DB_AM_FIXEDLEN" }, - { DB_AM_INMEM, "DB_AM_INMEM" }, - { DB_AM_IN_RENAME, "DB_AM_IN_RENAME" }, - { DB_AM_NOT_DURABLE, "DB_AM_NOT_DURABLE" }, - { DB_AM_OPEN_CALLED, "DB_AM_OPEN_CALLED" }, - { DB_AM_PAD, "DB_AM_PAD" }, - { DB_AM_PGDEF, "DB_AM_PGDEF" }, - { DB_AM_RDONLY, "DB_AM_RDONLY" }, - { DB_AM_READ_UNCOMMITTED, "DB_AM_READ_UNCOMMITTED" }, - { DB_AM_RECNUM, "DB_AM_RECNUM" }, - { DB_AM_RECOVER, "DB_AM_RECOVER" }, - { DB_AM_RENUMBER, "DB_AM_RENUMBER" }, - { DB_AM_REVSPLITOFF, "DB_AM_REVSPLITOFF" }, - { DB_AM_SECONDARY, "DB_AM_SECONDARY" }, - { DB_AM_SNAPSHOT, "DB_AM_SNAPSHOT" }, - { DB_AM_SUBDB, "DB_AM_SUBDB" }, - { DB_AM_SWAP, "DB_AM_SWAP" }, - { DB_AM_TXN, "DB_AM_TXN" }, - { DB_AM_VERIFYING, "DB_AM_VERIFYING" }, - { 0, NULL } - }; - ENV *env; - char time_buf[CTIME_BUFLEN]; - - env = dbp->env; - - __db_msg(env, "%s", DB_GLOBAL(db_line)); - __db_msg(env, "DB handle information:"); - STAT_ULONG("Page size", dbp->pgsize); - STAT_ISSET("Append recno", dbp->db_append_recno); - STAT_ISSET("Feedback", dbp->db_feedback); - STAT_ISSET("Dup compare", dbp->dup_compare); - STAT_ISSET("App private", dbp->app_private); - STAT_ISSET("DbEnv", dbp->env); - STAT_STRING("Type", __db_dbtype_to_string(dbp->type)); - - __mutex_print_debug_single(env, "Thread mutex", dbp->mutex, flags); - - STAT_STRING("File", dbp->fname); - STAT_STRING("Database", dbp->dname); - STAT_HEX("Open flags", dbp->open_flags); - - __db_print_fileid(env, dbp->fileid, "\tFile ID"); - - STAT_ULONG("Cursor adjust ID", dbp->adj_fileid); - STAT_ULONG("Meta pgno", dbp->meta_pgno); - if (dbp->locker != NULL) - STAT_ULONG("Locker ID", dbp->locker->id); - if (dbp->cur_locker != NULL) - STAT_ULONG("Handle lock", dbp->cur_locker->id); - if (dbp->associate_locker != NULL) - STAT_ULONG("Associate lock", dbp->associate_locker->id); - STAT_ULONG("RPC remote ID", dbp->cl_id); - - __db_msg(env, - "%.24s\tReplication handle timestamp", - dbp->timestamp == 0 ? "0" : __os_ctime(&dbp->timestamp, time_buf)); - - STAT_ISSET("Secondary callback", dbp->s_callback); - STAT_ISSET("Primary handle", dbp->s_primary); - - STAT_ISSET("api internal", dbp->api_internal); - STAT_ISSET("Btree/Recno internal", dbp->bt_internal); - STAT_ISSET("Hash internal", dbp->h_internal); - STAT_ISSET("Queue internal", dbp->q_internal); - - __db_prflags(env, NULL, dbp->flags, fn, NULL, "\tFlags"); - - if (dbp->log_filename == NULL) - STAT_ISSET("File naming information", dbp->log_filename); - else - __dbreg_print_fname(env, dbp->log_filename); - - (void)__db_print_cursor(dbp); - - return (0); -} - -/* - * __db_print_cursor -- - * Display the cursor active and free queues. - */ -static int -__db_print_cursor(dbp) - DB *dbp; -{ - DBC *dbc; - ENV *env; - int ret, t_ret; - - env = dbp->env; - - __db_msg(env, "%s", DB_GLOBAL(db_line)); - __db_msg(env, "DB handle cursors:"); - - ret = 0; - MUTEX_LOCK(dbp->env, dbp->mutex); - __db_msg(env, "Active queue:"); - TAILQ_FOREACH(dbc, &dbp->active_queue, links) - if ((t_ret = __db_print_citem(dbc)) != 0 && ret == 0) - ret = t_ret; - __db_msg(env, "Join queue:"); - TAILQ_FOREACH(dbc, &dbp->join_queue, links) - if ((t_ret = __db_print_citem(dbc)) != 0 && ret == 0) - ret = t_ret; - __db_msg(env, "Free queue:"); - TAILQ_FOREACH(dbc, &dbp->free_queue, links) - if ((t_ret = __db_print_citem(dbc)) != 0 && ret == 0) - ret = t_ret; - MUTEX_UNLOCK(dbp->env, dbp->mutex); - - return (ret); -} - -static int -__db_print_citem(dbc) - DBC *dbc; -{ - static const FN fn[] = { - { DBC_ACTIVE, "DBC_ACTIVE" }, - { DBC_DONTLOCK, "DBC_DONTLOCK" }, - { DBC_MULTIPLE, "DBC_MULTIPLE" }, - { DBC_MULTIPLE_KEY, "DBC_MULTIPLE_KEY" }, - { DBC_OPD, "DBC_OPD" }, - { DBC_OWN_LID, "DBC_OWN_LID" }, - { DBC_READ_COMMITTED, "DBC_READ_COMMITTED" }, - { DBC_READ_UNCOMMITTED, "DBC_READ_UNCOMMITTED" }, - { DBC_RECOVER, "DBC_RECOVER" }, - { DBC_RMW, "DBC_RMW" }, - { DBC_TRANSIENT, "DBC_TRANSIENT" }, - { DBC_WAS_READ_COMMITTED,"DBC_WAS_READ_COMMITTED" }, - { DBC_WRITECURSOR, "DBC_WRITECURSOR" }, - { DBC_WRITER, "DBC_WRITER" }, - { 0, NULL } - }; - DB *dbp; - DBC_INTERNAL *cp; - ENV *env; - - dbp = dbc->dbp; - env = dbp->env; - cp = dbc->internal; - - STAT_POINTER("DBC", dbc); - STAT_POINTER("Associated dbp", dbc->dbp); - STAT_POINTER("Associated txn", dbc->txn); - STAT_POINTER("Internal", cp); - STAT_HEX("Default locker ID", dbc->lref == NULL ? 0 : dbc->lref->id); - STAT_HEX("Locker", P_TO_ULONG(dbc->locker)); - STAT_STRING("Type", __db_dbtype_to_string(dbc->dbtype)); - - STAT_POINTER("Off-page duplicate cursor", cp->opd); - STAT_POINTER("Referenced page", cp->page); - STAT_ULONG("Root", cp->root); - STAT_ULONG("Page number", cp->pgno); - STAT_ULONG("Page index", cp->indx); - STAT_STRING("Lock mode", __db_lockmode_to_string(cp->lock_mode)); - __db_prflags(env, NULL, dbc->flags, fn, NULL, "\tFlags"); - - switch (dbc->dbtype) { - case DB_BTREE: - case DB_RECNO: - __bam_print_cursor(dbc); - break; - case DB_HASH: - __ham_print_cursor(dbc); - break; - case DB_UNKNOWN: - DB_ASSERT(env, dbp->type != DB_UNKNOWN); - /* FALLTHROUGH */ - case DB_QUEUE: - default: - break; - } - return (0); -} - -#else /* !HAVE_STATISTICS */ - -int -__db_stat_pp(dbp, txn, spp, flags) - DB *dbp; - DB_TXN *txn; - void *spp; - u_int32_t flags; -{ - COMPQUIET(spp, NULL); - COMPQUIET(txn, NULL); - COMPQUIET(flags, 0); - - return (__db_stat_not_built(dbp->env)); -} - -int -__db_stat_print_pp(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - COMPQUIET(flags, 0); - - return (__db_stat_not_built(dbp->env)); -} -#endif diff --git a/db/db_truncate.c b/db/db_truncate.c deleted file mode 100644 index 66f4180..0000000 --- a/db/db_truncate.c +++ /dev/null @@ -1,225 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2001-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/qam.h" -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/partition.h" -#include "dbinc/txn.h" - -static int __db_cursor_check __P((DB *)); - -/* - * __db_truncate_pp - * DB->truncate pre/post processing. - * - * PUBLIC: int __db_truncate_pp __P((DB *, DB_TXN *, u_int32_t *, u_int32_t)); - */ -int -__db_truncate_pp(dbp, txn, countp, flags) - DB *dbp; - DB_TXN *txn; - u_int32_t *countp, flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int handle_check, ret, t_ret, txn_local; - - env = dbp->env; - handle_check = txn_local = 0; - - STRIP_AUTO_COMMIT(flags); - - /* Check for invalid flags. */ - if (F_ISSET(dbp, DB_AM_SECONDARY)) { - __db_errx(env, "DB->truncate forbidden on secondary indices"); - return (EINVAL); - } - if ((ret = __db_fchk(env, "DB->truncate", flags, 0)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - /* - * Make sure there are no active cursors on this db. Since we drop - * pages we cannot really adjust cursors. - */ - if ((ret = __db_cursor_check(dbp)) != 0) { - __db_errx(env, - "DB->truncate not permitted with active cursors"); - goto err; - } - -#ifdef CONFIG_TEST - if (IS_REP_MASTER(env)) - DB_TEST_WAIT(env, env->test_check); -#endif - /* Check for replication block. */ - handle_check = IS_ENV_REPLICATED(env); - if (handle_check && - (ret = __db_rep_enter(dbp, 1, 0, txn != NULL)) != 0) { - handle_check = 0; - goto err; - } - - /* - * Check for changes to a read-only database. This must be after the - * replication block so that we cannot race master/client state changes. - */ - if (DB_IS_READONLY(dbp)) { - ret = __db_rdonly(env, "DB->truncate"); - goto err; - } - - /* - * Create local transaction as necessary, check for consistent - * transaction usage. - */ - if (IS_DB_AUTO_COMMIT(dbp, txn)) { - if ((ret = __txn_begin(env, ip, NULL, &txn, 0)) != 0) - goto err; - txn_local = 1; - } - - /* Check for consistent transaction usage. */ - if ((ret = __db_check_txn(dbp, txn, DB_LOCK_INVALIDID, 0)) != 0) - goto err; - - ret = __db_truncate(dbp, ip, txn, countp); - -err: if (txn_local && - (t_ret = __db_txn_auto_resolve(env, txn, 0, ret)) && ret == 0) - ret = t_ret; - - /* Release replication block. */ - if (handle_check && (t_ret = __env_db_rep_exit(env)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_truncate - * DB->truncate. - * - * PUBLIC: int __db_truncate __P((DB *, DB_THREAD_INFO *, DB_TXN *, - * PUBLIC: u_int32_t *)); - */ -int -__db_truncate(dbp, ip, txn, countp) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - u_int32_t *countp; -{ - DB *sdbp; - DBC *dbc; - ENV *env; - u_int32_t scount; - int ret, t_ret; - - env = dbp->env; - dbc = NULL; - ret = 0; - - /* - * Run through all secondaries and truncate them first. The count - * returned is the count of the primary only. QUEUE uses normal - * processing to truncate so it will update the secondaries normally. - */ - if (dbp->type != DB_QUEUE && DB_IS_PRIMARY(dbp)) { - if ((ret = __db_s_first(dbp, &sdbp)) != 0) - return (ret); - for (; sdbp != NULL && ret == 0; ret = __db_s_next(&sdbp, txn)) - if ((ret = __db_truncate(sdbp, ip, txn, &scount)) != 0) - break; - if (sdbp != NULL) - (void)__db_s_done(sdbp, txn); - if (ret != 0) - return (ret); - } - - DB_TEST_RECOVERY(dbp, DB_TEST_PREDESTROY, ret, NULL); - - /* Acquire a cursor. */ - if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) - return (ret); - - DEBUG_LWRITE(dbc, txn, "DB->truncate", NULL, NULL, 0); -#ifdef HAVE_PARTITION - if (DB_IS_PARTITIONED(dbp)) - ret = __part_truncate(dbc, countp); - else -#endif - switch (dbp->type) { - case DB_BTREE: - case DB_RECNO: - ret = __bam_truncate(dbc, countp); - break; - case DB_HASH: - ret = __ham_truncate(dbc, countp); - break; - case DB_QUEUE: - ret = __qam_truncate(dbc, countp); - break; - case DB_UNKNOWN: - default: - ret = __db_unknown_type(env, "DB->truncate", dbp->type); - break; - } - - /* Discard the cursor. */ - if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - DB_TEST_RECOVERY(dbp, DB_TEST_POSTDESTROY, ret, NULL); - -DB_TEST_RECOVERY_LABEL - - return (ret); -} - -/* - * __db_cursor_check -- - * See if there are any active cursors on this db. - */ -static int -__db_cursor_check(dbp) - DB *dbp; -{ - DB *ldbp; - DBC *dbc; - ENV *env; - int found; - - env = dbp->env; - - MUTEX_LOCK(env, env->mtx_dblist); - FIND_FIRST_DB_MATCH(env, dbp, ldbp); - for (found = 0; - !found && ldbp != NULL && ldbp->adj_fileid == dbp->adj_fileid; - ldbp = TAILQ_NEXT(ldbp, dblistlinks)) { - MUTEX_LOCK(env, dbp->mutex); - TAILQ_FOREACH(dbc, &ldbp->active_queue, links) - if (IS_INITIALIZED(dbc)) { - found = 1; - break; - } - MUTEX_UNLOCK(env, dbp->mutex); - } - MUTEX_UNLOCK(env, env->mtx_dblist); - - return (found ? EINVAL : 0); -} diff --git a/db/db_upg.c b/db/db_upg.c deleted file mode 100644 index 5a6db94..0000000 --- a/db/db_upg.c +++ /dev/null @@ -1,510 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_swap.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/qam.h" - -/* - * __db_upgrade_pp -- - * DB->upgrade pre/post processing. - * - * PUBLIC: int __db_upgrade_pp __P((DB *, const char *, u_int32_t)); - */ -int -__db_upgrade_pp(dbp, fname, flags) - DB *dbp; - const char *fname; - u_int32_t flags; -{ -#ifdef HAVE_UPGRADE_SUPPORT - DB_THREAD_INFO *ip; - ENV *env; - int ret; - - env = dbp->env; - - /* - * !!! - * The actual argument checking is simple, do it inline. - */ - if ((ret = __db_fchk(env, "DB->upgrade", flags, DB_DUPSORT)) != 0) - return (ret); - - ENV_ENTER(env, ip); - ret = __db_upgrade(dbp, fname, flags); - ENV_LEAVE(env, ip); - return (ret); -#else - COMPQUIET(dbp, NULL); - COMPQUIET(fname, NULL); - COMPQUIET(flags, 0); - - __db_errx(dbp->env, "upgrade not supported"); - return (EINVAL); -#endif -} - -#ifdef HAVE_UPGRADE_SUPPORT -static int (* const func_31_list[P_PAGETYPE_MAX]) - __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = { - NULL, /* P_INVALID */ - NULL, /* __P_DUPLICATE */ - __ham_31_hash, /* P_HASH_UNSORTED */ - NULL, /* P_IBTREE */ - NULL, /* P_IRECNO */ - __bam_31_lbtree, /* P_LBTREE */ - NULL, /* P_LRECNO */ - NULL, /* P_OVERFLOW */ - __ham_31_hashmeta, /* P_HASHMETA */ - __bam_31_btreemeta, /* P_BTREEMETA */ - NULL, /* P_QAMMETA */ - NULL, /* P_QAMDATA */ - NULL, /* P_LDUP */ - NULL, /* P_HASH */ -}; - -static int (* const func_46_list[P_PAGETYPE_MAX]) - __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)) = { - NULL, /* P_INVALID */ - NULL, /* __P_DUPLICATE */ - __ham_46_hash, /* P_HASH_UNSORTED */ - NULL, /* P_IBTREE */ - NULL, /* P_IRECNO */ - NULL, /* P_LBTREE */ - NULL, /* P_LRECNO */ - NULL, /* P_OVERFLOW */ - __ham_46_hashmeta, /* P_HASHMETA */ - NULL, /* P_BTREEMETA */ - NULL, /* P_QAMMETA */ - NULL, /* P_QAMDATA */ - NULL, /* P_LDUP */ - NULL, /* P_HASH */ -}; - -static int __db_page_pass __P((DB *, char *, u_int32_t, int (* const []) - (DB *, char *, u_int32_t, DB_FH *, PAGE *, int *), DB_FH *)); -static int __db_set_lastpgno __P((DB *, char *, DB_FH *)); - -/* - * __db_upgrade -- - * Upgrade an existing database. - * - * PUBLIC: int __db_upgrade __P((DB *, const char *, u_int32_t)); - */ -int -__db_upgrade(dbp, fname, flags) - DB *dbp; - const char *fname; - u_int32_t flags; -{ - DBMETA *meta; - DB_FH *fhp; - ENV *env; - size_t n; - int ret, t_ret, use_mp_open; - u_int8_t mbuf[256], tmpflags; - char *real_name; - - use_mp_open = 0; - env = dbp->env; - fhp = NULL; - - /* Get the real backing file name. */ - if ((ret = __db_appname(env, - DB_APP_DATA, fname, NULL, &real_name)) != 0) - return (ret); - - /* Open the file. */ - if ((ret = __os_open(env, real_name, 0, 0, 0, &fhp)) != 0) { - __db_err(env, ret, "%s", real_name); - return (ret); - } - - /* Initialize the feedback. */ - if (dbp->db_feedback != NULL) - dbp->db_feedback(dbp, DB_UPGRADE, 0); - - /* - * Read the metadata page. We read 256 bytes, which is larger than - * any access method's metadata page and smaller than any disk sector. - */ - if ((ret = __os_read(env, fhp, mbuf, sizeof(mbuf), &n)) != 0) - goto err; - - switch (((DBMETA *)mbuf)->magic) { - case DB_BTREEMAGIC: - switch (((DBMETA *)mbuf)->version) { - case 6: - /* - * Before V7 not all pages had page types, so we do the - * single meta-data page by hand. - */ - if ((ret = - __bam_30_btreemeta(dbp, real_name, mbuf)) != 0) - goto err; - if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) - goto err; - if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0) - goto err; - /* FALLTHROUGH */ - case 7: - /* - * We need the page size to do more. Rip it out of - * the meta-data page. - */ - memcpy(&dbp->pgsize, mbuf + 20, sizeof(u_int32_t)); - - if ((ret = __db_page_pass( - dbp, real_name, flags, func_31_list, fhp)) != 0) - goto err; - /* FALLTHROUGH */ - case 8: - if ((ret = - __db_set_lastpgno(dbp, real_name, fhp)) != 0) - goto err; - /* FALLTHROUGH */ - case 9: - break; - default: - __db_errx(env, "%s: unsupported btree version: %lu", - real_name, (u_long)((DBMETA *)mbuf)->version); - ret = DB_OLD_VERSION; - goto err; - } - break; - case DB_HASHMAGIC: - switch (((DBMETA *)mbuf)->version) { - case 4: - case 5: - /* - * Before V6 not all pages had page types, so we do the - * single meta-data page by hand. - */ - if ((ret = - __ham_30_hashmeta(dbp, real_name, mbuf)) != 0) - goto err; - if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) - goto err; - if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0) - goto err; - - /* - * Before V6, we created hash pages one by one as they - * were needed, using hashhdr.ovfl_point to reserve - * a block of page numbers for them. A consequence - * of this was that, if no overflow pages had been - * created, the current doubling might extend past - * the end of the database file. - * - * In DB 3.X, we now create all the hash pages - * belonging to a doubling atomically; it's not - * safe to just save them for later, because when - * we create an overflow page we'll just create - * a new last page (whatever that may be). Grow - * the database to the end of the current doubling. - */ - if ((ret = - __ham_30_sizefix(dbp, fhp, real_name, mbuf)) != 0) - goto err; - /* FALLTHROUGH */ - case 6: - /* - * We need the page size to do more. Rip it out of - * the meta-data page. - */ - memcpy(&dbp->pgsize, mbuf + 20, sizeof(u_int32_t)); - - if ((ret = __db_page_pass( - dbp, real_name, flags, func_31_list, fhp)) != 0) - goto err; - /* FALLTHROUGH */ - case 7: - if ((ret = - __db_set_lastpgno(dbp, real_name, fhp)) != 0) - goto err; - /* FALLTHROUGH */ - case 8: - /* - * Any upgrade that has proceeded this far has metadata - * pages compatible with hash version 8 metadata pages, - * so casting mbuf to a dbmeta is safe. - * If a newer revision moves the pagesize, checksum or - * encrypt_alg flags in the metadata, then the - * extraction of the fields will need to use hard coded - * offsets. - */ - meta = (DBMETA*)mbuf; - /* - * We need the page size to do more. Extract it from - * the meta-data page. - */ - memcpy(&dbp->pgsize, &meta->pagesize, - sizeof(u_int32_t)); - /* - * Rip out metadata and encrypt_alg fields from the - * metadata page. So the upgrade can know how big - * the page metadata pre-amble is. Any upgrade that has - * proceeded this far has metadata pages compatible - * with hash version 8 metadata pages, so extracting - * the fields is safe. - */ - memcpy(&tmpflags, &meta->metaflags, sizeof(u_int8_t)); - if (FLD_ISSET(tmpflags, DBMETA_CHKSUM)) - F_SET(dbp, DB_AM_CHKSUM); - memcpy(&tmpflags, &meta->encrypt_alg, sizeof(u_int8_t)); - if (tmpflags != 0) { - if (!CRYPTO_ON(dbp->env)) { - __db_errx(env, -"Attempt to upgrade an encrypted database without providing a password."); - ret = EINVAL; - goto err; - } - F_SET(dbp, DB_AM_ENCRYPT); - } - - /* - * This is ugly. It is necessary to have a usable - * mpool in the dbp to upgrade from an unsorted - * to a sorted hash database. The mpool file is used - * to resolve offpage key items, which are needed to - * determine sort order. Having mpool open and access - * the file does not affect the page pass, since the - * page pass only updates DB_HASH_UNSORTED pages - * in-place, and the mpool file is only used to read - * OFFPAGE items. - */ - use_mp_open = 1; - if ((ret = __os_closehandle(env, fhp)) != 0) - return (ret); - dbp->type = DB_HASH; - if ((ret = __env_mpool(dbp, fname, - DB_AM_NOT_DURABLE | DB_AM_VERIFYING)) != 0) - return (ret); - fhp = dbp->mpf->fhp; - - /* Do the actual conversion pass. */ - if ((ret = __db_page_pass( - dbp, real_name, flags, func_46_list, fhp)) != 0) - goto err; - - /* FALLTHROUGH */ - case 9: - break; - default: - __db_errx(env, "%s: unsupported hash version: %lu", - real_name, (u_long)((DBMETA *)mbuf)->version); - ret = DB_OLD_VERSION; - goto err; - } - break; - case DB_QAMMAGIC: - switch (((DBMETA *)mbuf)->version) { - case 1: - /* - * If we're in a Queue database, the only page that - * needs upgrading is the meta-database page, don't - * bother with a full pass. - */ - if ((ret = __qam_31_qammeta(dbp, real_name, mbuf)) != 0) - return (ret); - /* FALLTHROUGH */ - case 2: - if ((ret = __qam_32_qammeta(dbp, real_name, mbuf)) != 0) - return (ret); - if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) - goto err; - if ((ret = __os_write(env, fhp, mbuf, 256, &n)) != 0) - goto err; - /* FALLTHROUGH */ - case 3: - case 4: - break; - default: - __db_errx(env, "%s: unsupported queue version: %lu", - real_name, (u_long)((DBMETA *)mbuf)->version); - ret = DB_OLD_VERSION; - goto err; - } - break; - default: - M_32_SWAP(((DBMETA *)mbuf)->magic); - switch (((DBMETA *)mbuf)->magic) { - case DB_BTREEMAGIC: - case DB_HASHMAGIC: - case DB_QAMMAGIC: - __db_errx(env, - "%s: DB->upgrade only supported on native byte-order systems", - real_name); - break; - default: - __db_errx(env, - "%s: unrecognized file type", real_name); - break; - } - ret = EINVAL; - goto err; - } - - ret = __os_fsync(env, fhp); - - /* - * If mp_open was used, then rely on the database close to clean up - * any file handles. - */ -err: if (use_mp_open == 0 && fhp != NULL && - (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) - ret = t_ret; - __os_free(env, real_name); - - /* We're done. */ - if (dbp->db_feedback != NULL) - dbp->db_feedback(dbp, DB_UPGRADE, 100); - - return (ret); -} - -/* - * __db_page_pass -- - * Walk the pages of the database, upgrading whatever needs it. - */ -static int -__db_page_pass(dbp, real_name, flags, fl, fhp) - DB *dbp; - char *real_name; - u_int32_t flags; - int (* const fl[P_PAGETYPE_MAX]) - __P((DB *, char *, u_int32_t, DB_FH *, PAGE *, int *)); - DB_FH *fhp; -{ - ENV *env; - PAGE *page; - db_pgno_t i, pgno_last; - size_t n; - int dirty, ret; - - env = dbp->env; - - /* Determine the last page of the file. */ - if ((ret = __db_lastpgno(dbp, real_name, fhp, &pgno_last)) != 0) - return (ret); - - /* Allocate memory for a single page. */ - if ((ret = __os_malloc(env, dbp->pgsize, &page)) != 0) - return (ret); - - /* Walk the file, calling the underlying conversion functions. */ - for (i = 0; i < pgno_last; ++i) { - if (dbp->db_feedback != NULL) - dbp->db_feedback( - dbp, DB_UPGRADE, (int)((i * 100)/pgno_last)); - if ((ret = __os_seek(env, fhp, i, dbp->pgsize, 0)) != 0) - break; - if ((ret = __os_read(env, fhp, page, dbp->pgsize, &n)) != 0) - break; - dirty = 0; - /* Always decrypt the page. */ - if ((ret = __db_decrypt_pg(env, dbp, page)) != 0) - break; - if (fl[TYPE(page)] != NULL && (ret = fl[TYPE(page)] - (dbp, real_name, flags, fhp, page, &dirty)) != 0) - break; - if (dirty) { - if ((ret = __db_encrypt_and_checksum_pg( - env, dbp, page)) != 0) - break; - if ((ret = - __os_seek(env, fhp, i, dbp->pgsize, 0)) != 0) - break; - if ((ret = __os_write(env, - fhp, page, dbp->pgsize, &n)) != 0) - break; - } - } - - __os_free(dbp->env, page); - return (ret); -} - -/* - * __db_lastpgno -- - * Return the current last page number of the file. - * - * PUBLIC: int __db_lastpgno __P((DB *, char *, DB_FH *, db_pgno_t *)); - */ -int -__db_lastpgno(dbp, real_name, fhp, pgno_lastp) - DB *dbp; - char *real_name; - DB_FH *fhp; - db_pgno_t *pgno_lastp; -{ - ENV *env; - db_pgno_t pgno_last; - u_int32_t mbytes, bytes; - int ret; - - env = dbp->env; - - if ((ret = __os_ioinfo(env, - real_name, fhp, &mbytes, &bytes, NULL)) != 0) { - __db_err(env, ret, "%s", real_name); - return (ret); - } - - /* Page sizes have to be a power-of-two. */ - if (bytes % dbp->pgsize != 0) { - __db_errx(env, - "%s: file size not a multiple of the pagesize", real_name); - return (EINVAL); - } - pgno_last = mbytes * (MEGABYTE / dbp->pgsize); - pgno_last += bytes / dbp->pgsize; - - *pgno_lastp = pgno_last; - return (0); -} - -/* - * __db_set_lastpgno -- - * Update the meta->last_pgno field. - * - * Code assumes that we do not have checksums/crypto on the page. - */ -static int -__db_set_lastpgno(dbp, real_name, fhp) - DB *dbp; - char *real_name; - DB_FH *fhp; -{ - DBMETA meta; - ENV *env; - int ret; - size_t n; - - env = dbp->env; - if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) - return (ret); - if ((ret = __os_read(env, fhp, &meta, sizeof(meta), &n)) != 0) - return (ret); - dbp->pgsize = meta.pagesize; - if ((ret = __db_lastpgno(dbp, real_name, fhp, &meta.last_pgno)) != 0) - return (ret); - if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0) - return (ret); - if ((ret = __os_write(env, fhp, &meta, sizeof(meta), &n)) != 0) - return (ret); - - return (0); -} -#endif /* HAVE_UPGRADE_SUPPORT */ diff --git a/db/db_upg_opd.c b/db/db_upg_opd.c deleted file mode 100644 index ea143cf..0000000 --- a/db/db_upg_opd.c +++ /dev/null @@ -1,343 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/btree.h" - -static int __db_build_bi __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *)); -static int __db_build_ri __P((DB *, DB_FH *, PAGE *, PAGE *, u_int32_t, int *)); -static int __db_up_ovref __P((DB *, DB_FH *, db_pgno_t)); - -#define GET_PAGE(dbp, fhp, pgno, page) { \ - if ((ret = __os_seek( \ - dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0) \ - goto err; \ - if ((ret = __os_read(dbp->env, \ - fhp, page, (dbp)->pgsize, &n)) != 0) \ - goto err; \ -} -#define PUT_PAGE(dbp, fhp, pgno, page) { \ - if ((ret = __os_seek( \ - dbp->env, fhp, pgno, (dbp)->pgsize, 0)) != 0) \ - goto err; \ - if ((ret = __os_write(dbp->env, \ - fhp, page, (dbp)->pgsize, &n)) != 0) \ - goto err; \ -} - -/* - * __db_31_offdup -- - * Convert 3.0 off-page duplicates to 3.1 off-page duplicates. - * - * PUBLIC: int __db_31_offdup __P((DB *, char *, DB_FH *, int, db_pgno_t *)); - */ -int -__db_31_offdup(dbp, real_name, fhp, sorted, pgnop) - DB *dbp; - char *real_name; - DB_FH *fhp; - int sorted; - db_pgno_t *pgnop; -{ - PAGE *ipage, *page; - db_indx_t indx; - db_pgno_t cur_cnt, i, next_cnt, pgno, *pgno_cur, pgno_last; - db_pgno_t *pgno_next, pgno_max, *tmp; - db_recno_t nrecs; - size_t n; - int level, nomem, ret; - - ipage = page = NULL; - pgno_cur = pgno_next = NULL; - - /* Allocate room to hold a page. */ - if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0) - goto err; - - /* - * Walk the chain of 3.0 off-page duplicates. Each one is converted - * in place to a 3.1 off-page duplicate page. If the duplicates are - * sorted, they are converted to a Btree leaf page, otherwise to a - * Recno leaf page. - */ - for (nrecs = 0, cur_cnt = pgno_max = 0, - pgno = *pgnop; pgno != PGNO_INVALID;) { - if (pgno_max == cur_cnt) { - pgno_max += 20; - if ((ret = __os_realloc(dbp->env, pgno_max * - sizeof(db_pgno_t), &pgno_cur)) != 0) - goto err; - } - pgno_cur[cur_cnt++] = pgno; - - GET_PAGE(dbp, fhp, pgno, page); - nrecs += NUM_ENT(page); - LEVEL(page) = LEAFLEVEL; - TYPE(page) = sorted ? P_LDUP : P_LRECNO; - /* - * !!! - * DB didn't zero the LSNs on off-page duplicates pages. - */ - ZERO_LSN(LSN(page)); - PUT_PAGE(dbp, fhp, pgno, page); - - pgno = NEXT_PGNO(page); - } - - /* If we only have a single page, it's easy. */ - if (cur_cnt <= 1) - goto done; - - /* - * pgno_cur is the list of pages we just converted. We're - * going to walk that list, but we'll need to create a new - * list while we do so. - */ - if ((ret = __os_malloc(dbp->env, - cur_cnt * sizeof(db_pgno_t), &pgno_next)) != 0) - goto err; - - /* Figure out where we can start allocating new pages. */ - if ((ret = __db_lastpgno(dbp, real_name, fhp, &pgno_last)) != 0) - goto err; - - /* Allocate room for an internal page. */ - if ((ret = __os_malloc(dbp->env, dbp->pgsize, &ipage)) != 0) - goto err; - PGNO(ipage) = PGNO_INVALID; - - /* - * Repeatedly walk the list of pages, building internal pages, until - * there's only one page at a level. - */ - for (level = LEAFLEVEL + 1; cur_cnt > 1; ++level) { - for (indx = 0, i = next_cnt = 0; i < cur_cnt;) { - if (indx == 0) { - P_INIT(ipage, dbp->pgsize, pgno_last, - PGNO_INVALID, PGNO_INVALID, - level, sorted ? P_IBTREE : P_IRECNO); - ZERO_LSN(LSN(ipage)); - - pgno_next[next_cnt++] = pgno_last++; - } - - GET_PAGE(dbp, fhp, pgno_cur[i], page); - - /* - * If the duplicates are sorted, put the first item on - * the lower-level page onto a Btree internal page. If - * the duplicates are not sorted, create an internal - * Recno structure on the page. If either case doesn't - * fit, push out the current page and start a new one. - */ - nomem = 0; - if (sorted) { - if ((ret = __db_build_bi( - dbp, fhp, ipage, page, indx, &nomem)) != 0) - goto err; - } else - if ((ret = __db_build_ri( - dbp, fhp, ipage, page, indx, &nomem)) != 0) - goto err; - if (nomem) { - indx = 0; - PUT_PAGE(dbp, fhp, PGNO(ipage), ipage); - } else { - ++indx; - ++NUM_ENT(ipage); - ++i; - } - } - - /* - * Push out the last internal page. Set the top-level record - * count if we've reached the top. - */ - if (next_cnt == 1) - RE_NREC_SET(ipage, nrecs); - PUT_PAGE(dbp, fhp, PGNO(ipage), ipage); - - /* Swap the current and next page number arrays. */ - cur_cnt = next_cnt; - tmp = pgno_cur; - pgno_cur = pgno_next; - pgno_next = tmp; - } - -done: *pgnop = pgno_cur[0]; - -err: if (pgno_cur != NULL) - __os_free(dbp->env, pgno_cur); - if (pgno_next != NULL) - __os_free(dbp->env, pgno_next); - if (ipage != NULL) - __os_free(dbp->env, ipage); - if (page != NULL) - __os_free(dbp->env, page); - - return (ret); -} - -/* - * __db_build_bi -- - * Build a BINTERNAL entry for a parent page. - */ -static int -__db_build_bi(dbp, fhp, ipage, page, indx, nomemp) - DB *dbp; - DB_FH *fhp; - PAGE *ipage, *page; - u_int32_t indx; - int *nomemp; -{ - BINTERNAL bi, *child_bi; - BKEYDATA *child_bk; - u_int8_t *p; - int ret; - db_indx_t *inp; - - inp = P_INP(dbp, ipage); - switch (TYPE(page)) { - case P_IBTREE: - child_bi = GET_BINTERNAL(dbp, page, 0); - if (P_FREESPACE(dbp, ipage) < BINTERNAL_PSIZE(child_bi->len)) { - *nomemp = 1; - return (0); - } - inp[indx] = - HOFFSET(ipage) -= BINTERNAL_SIZE(child_bi->len); - p = P_ENTRY(dbp, ipage, indx); - - bi.len = child_bi->len; - B_TSET(bi.type, child_bi->type); - bi.pgno = PGNO(page); - bi.nrecs = __bam_total(dbp, page); - memcpy(p, &bi, SSZA(BINTERNAL, data)); - p += SSZA(BINTERNAL, data); - memcpy(p, child_bi->data, child_bi->len); - - /* Increment the overflow ref count. */ - if (B_TYPE(child_bi->type) == B_OVERFLOW) - if ((ret = __db_up_ovref(dbp, fhp, - ((BOVERFLOW *)(child_bi->data))->pgno)) != 0) - return (ret); - break; - case P_LDUP: - child_bk = GET_BKEYDATA(dbp, page, 0); - switch (B_TYPE(child_bk->type)) { - case B_KEYDATA: - if (P_FREESPACE(dbp, ipage) < - BINTERNAL_PSIZE(child_bk->len)) { - *nomemp = 1; - return (0); - } - inp[indx] = - HOFFSET(ipage) -= BINTERNAL_SIZE(child_bk->len); - p = P_ENTRY(dbp, ipage, indx); - - bi.len = child_bk->len; - B_TSET(bi.type, child_bk->type); - bi.pgno = PGNO(page); - bi.nrecs = __bam_total(dbp, page); - memcpy(p, &bi, SSZA(BINTERNAL, data)); - p += SSZA(BINTERNAL, data); - memcpy(p, child_bk->data, child_bk->len); - break; - case B_OVERFLOW: - if (P_FREESPACE(dbp, ipage) < - BINTERNAL_PSIZE(BOVERFLOW_SIZE)) { - *nomemp = 1; - return (0); - } - inp[indx] = - HOFFSET(ipage) -= BINTERNAL_SIZE(BOVERFLOW_SIZE); - p = P_ENTRY(dbp, ipage, indx); - - bi.len = BOVERFLOW_SIZE; - B_TSET(bi.type, child_bk->type); - bi.pgno = PGNO(page); - bi.nrecs = __bam_total(dbp, page); - memcpy(p, &bi, SSZA(BINTERNAL, data)); - p += SSZA(BINTERNAL, data); - memcpy(p, child_bk, BOVERFLOW_SIZE); - - /* Increment the overflow ref count. */ - if ((ret = __db_up_ovref(dbp, fhp, - ((BOVERFLOW *)child_bk)->pgno)) != 0) - return (ret); - break; - default: - return (__db_pgfmt(dbp->env, PGNO(page))); - } - break; - default: - return (__db_pgfmt(dbp->env, PGNO(page))); - } - - return (0); -} - -/* - * __db_build_ri -- - * Build a RINTERNAL entry for an internal parent page. - */ -static int -__db_build_ri(dbp, fhp, ipage, page, indx, nomemp) - DB *dbp; - DB_FH *fhp; - PAGE *ipage, *page; - u_int32_t indx; - int *nomemp; -{ - RINTERNAL ri; - db_indx_t *inp; - - COMPQUIET(fhp, NULL); - inp = P_INP(dbp, ipage); - if (P_FREESPACE(dbp, ipage) < RINTERNAL_PSIZE) { - *nomemp = 1; - return (0); - } - - ri.pgno = PGNO(page); - ri.nrecs = __bam_total(dbp, page); - inp[indx] = HOFFSET(ipage) -= RINTERNAL_SIZE; - memcpy(P_ENTRY(dbp, ipage, indx), &ri, RINTERNAL_SIZE); - - return (0); -} - -/* - * __db_up_ovref -- - * Increment/decrement the reference count on an overflow page. - */ -static int -__db_up_ovref(dbp, fhp, pgno) - DB *dbp; - DB_FH *fhp; - db_pgno_t pgno; -{ - PAGE *page; - size_t n; - int ret; - - /* Allocate room to hold a page. */ - if ((ret = __os_malloc(dbp->env, dbp->pgsize, &page)) != 0) - return (ret); - - GET_PAGE(dbp, fhp, pgno, page); - ++OV_REF(page); - PUT_PAGE(dbp, fhp, pgno, page); - -err: __os_free(dbp->env, page); - - return (ret); -} diff --git a/db/db_vrfy.c b/db/db_vrfy.c deleted file mode 100644 index 7ea9c62..0000000 --- a/db/db_vrfy.c +++ /dev/null @@ -1,2894 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2000-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_swap.h" -#include "dbinc/db_verify.h" -#include "dbinc/btree.h" -#include "dbinc/hash.h" -#include "dbinc/lock.h" -#include "dbinc/mp.h" -#include "dbinc/qam.h" -#include "dbinc/txn.h" - -/* - * This is the code for DB->verify, the DB database consistency checker. - * For now, it checks all subdatabases in a database, and verifies - * everything it knows how to (i.e. it's all-or-nothing, and one can't - * check only for a subset of possible problems). - */ - -static u_int __db_guesspgsize __P((ENV *, DB_FH *)); -static int __db_is_valid_magicno __P((u_int32_t, DBTYPE *)); -static int __db_meta2pgset - __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t, DB *)); -static int __db_salvage __P((DB *, VRFY_DBINFO *, - db_pgno_t, void *, int (*)(void *, const void *), u_int32_t)); -static int __db_salvage_subdbpg __P((DB *, VRFY_DBINFO *, - PAGE *, void *, int (*)(void *, const void *), u_int32_t)); -static int __db_salvage_all __P((DB *, VRFY_DBINFO *, void *, - int(*)(void *, const void *), u_int32_t, int *)); -static int __db_salvage_unknowns __P((DB *, VRFY_DBINFO *, void *, - int (*)(void *, const void *), u_int32_t)); -static int __db_verify_arg __P((DB *, const char *, void *, u_int32_t)); -static int __db_vrfy_freelist - __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); -static int __db_vrfy_invalid - __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); -static int __db_vrfy_orderchkonly __P((DB *, - VRFY_DBINFO *, const char *, const char *, u_int32_t)); -static int __db_vrfy_pagezero __P((DB *, VRFY_DBINFO *, DB_FH *, u_int32_t)); -static int __db_vrfy_subdbs - __P((DB *, VRFY_DBINFO *, const char *, u_int32_t)); -static int __db_vrfy_structure __P((DB *, VRFY_DBINFO *, - const char *, db_pgno_t, void *, void *, u_int32_t)); -static int __db_vrfy_walkpages __P((DB *, VRFY_DBINFO *, - void *, int (*)(void *, const void *), u_int32_t)); - -#define VERIFY_FLAGS \ - (DB_AGGRESSIVE | \ - DB_NOORDERCHK | DB_ORDERCHKONLY | DB_PRINTABLE | DB_SALVAGE | DB_UNREF) - -/* - * __db_verify_pp -- - * DB->verify public interface. - * - * PUBLIC: int __db_verify_pp - * PUBLIC: __P((DB *, const char *, const char *, FILE *, u_int32_t)); - */ -int -__db_verify_pp(dbp, file, database, outfile, flags) - DB *dbp; - const char *file, *database; - FILE *outfile; - u_int32_t flags; -{ - /* - * __db_verify_pp is a wrapper to __db_verify_internal, which lets - * us pass appropriate equivalents to FILE * in from the non-C APIs. - * That's why the usual ENV_ENTER macros are in __db_verify_internal, - * not here. - */ - return (__db_verify_internal(dbp, - file, database, outfile, __db_pr_callback, flags)); -} - -/* - * __db_verify_internal -- - * - * PUBLIC: int __db_verify_internal __P((DB *, const char *, - * PUBLIC: const char *, void *, int (*)(void *, const void *), u_int32_t)); - */ -int -__db_verify_internal(dbp, fname, dname, handle, callback, flags) - DB *dbp; - const char *fname, *dname; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - DB_THREAD_INFO *ip; - ENV *env; - int ret, t_ret; - - env = dbp->env; - - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->verify"); - - if (!LF_ISSET(DB_SALVAGE)) - LF_SET(DB_UNREF); - - ENV_ENTER(env, ip); - - if ((ret = __db_verify_arg(dbp, dname, handle, flags)) == 0) - ret = __db_verify(dbp, ip, - fname, dname, handle, callback, NULL, NULL, flags); - - /* Db.verify is a DB handle destructor. */ - if ((t_ret = __db_close(dbp, NULL, 0)) != 0 && ret == 0) - ret = t_ret; - - ENV_LEAVE(env, ip); - return (ret); -} - -/* - * __db_verify_arg -- - * Check DB->verify arguments. - */ -static int -__db_verify_arg(dbp, dname, handle, flags) - DB *dbp; - const char *dname; - void *handle; - u_int32_t flags; -{ - ENV *env; - int ret; - - env = dbp->env; - - if ((ret = __db_fchk(env, "DB->verify", flags, VERIFY_FLAGS)) != 0) - return (ret); - - /* - * DB_SALVAGE is mutually exclusive with the other flags except - * DB_AGGRESSIVE, DB_PRINTABLE. - * - * DB_AGGRESSIVE and DB_PRINTABLE are only meaningful when salvaging. - * - * DB_SALVAGE requires an output stream. - */ - if (LF_ISSET(DB_SALVAGE)) { - if (LF_ISSET(~(DB_AGGRESSIVE | DB_PRINTABLE | DB_SALVAGE))) - return (__db_ferr(env, "DB->verify", 1)); - if (handle == NULL) { - __db_errx(env, - "DB_SALVAGE requires a an output handle"); - return (EINVAL); - } - } else - if (LF_ISSET(DB_AGGRESSIVE | DB_PRINTABLE)) - return (__db_ferr(env, "DB->verify", 1)); - - /* - * DB_ORDERCHKONLY is mutually exclusive with DB_SALVAGE and - * DB_NOORDERCHK, and requires a database name. - */ - if ((ret = __db_fcchk(env, "DB->verify", flags, - DB_ORDERCHKONLY, DB_SALVAGE | DB_NOORDERCHK)) != 0) - return (ret); - if (LF_ISSET(DB_ORDERCHKONLY) && dname == NULL) { - __db_errx(env, "DB_ORDERCHKONLY requires a database name"); - return (EINVAL); - } - return (0); -} - -/* - * __db_verify -- - * Walk the entire file page-by-page, either verifying with or without - * dumping in db_dump -d format, or DB_SALVAGE-ing whatever key/data - * pairs can be found and dumping them in standard (db_load-ready) - * dump format. - * - * (Salvaging isn't really a verification operation, but we put it - * here anyway because it requires essentially identical top-level - * code.) - * - * flags may be 0, DB_NOORDERCHK, DB_ORDERCHKONLY, or DB_SALVAGE - * (and optionally DB_AGGRESSIVE). - * PUBLIC: int __db_verify __P((DB *, DB_THREAD_INFO *, const char *, - * PUBLIC: const char *, void *, int (*)(void *, const void *), - * PUBLIC: void *, void *, u_int32_t)); - */ -int -__db_verify(dbp, ip, name, subdb, handle, callback, lp, rp, flags) - DB *dbp; - DB_THREAD_INFO *ip; - const char *name, *subdb; - void *handle; - int (*callback) __P((void *, const void *)); - void *lp, *rp; - u_int32_t flags; -{ - DB_FH *fhp; - ENV *env; - VRFY_DBINFO *vdp; - u_int32_t sflags; - int has_subdbs, isbad, ret, t_ret; - char *real_name; - - env = dbp->env; - fhp = NULL; - vdp = NULL; - real_name = NULL; - has_subdbs = isbad = ret = t_ret = 0; - - F_SET(dbp, DB_AM_VERIFYING); - - /* Initialize any feedback function. */ - if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL) - dbp->db_feedback(dbp, DB_VERIFY, 0); - - /* - * We don't know how large the cache is, and if the database - * in question uses a small page size--which we don't know - * yet!--it may be uncomfortably small for the default page - * size [#2143]. However, the things we need temporary - * databases for in dbinfo are largely tiny, so using a - * 1024-byte pagesize is probably not going to be a big hit, - * and will make us fit better into small spaces. - */ - if ((ret = __db_vrfy_dbinfo_create(env, ip, 1024, &vdp)) != 0) - goto err; - - /* - * Note whether the user has requested that we use printable - * chars where possible. We won't get here with this flag if - * we're not salvaging. - */ - if (LF_ISSET(DB_PRINTABLE)) - F_SET(vdp, SALVAGE_PRINTABLE); - - /* Find the real name of the file. */ - if ((ret = __db_appname(env, - DB_APP_DATA, name, &dbp->dirname, &real_name)) != 0) - goto err; - - /* - * Our first order of business is to verify page 0, which is - * the metadata page for the master database of subdatabases - * or of the only database in the file. We want to do this by hand - * rather than just calling __db_open in case it's corrupt--various - * things in __db_open might act funny. - * - * Once we know the metadata page is healthy, I believe that it's - * safe to open the database normally and then use the page swapping - * code, which makes life easier. - */ - if ((ret = __os_open(env, real_name, 0, DB_OSO_RDONLY, 0, &fhp)) != 0) - goto err; - - /* Verify the metadata page 0; set pagesize and type. */ - if ((ret = __db_vrfy_pagezero(dbp, vdp, fhp, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - /* - * We can assume at this point that dbp->pagesize and dbp->type are - * set correctly, or at least as well as they can be, and that - * locking, logging, and txns are not in use. Thus we can trust - * the memp code not to look at the page, and thus to be safe - * enough to use. - * - * The dbp is not open, but the file is open in the fhp, and we - * cannot assume that __db_open is safe. Call __env_setup, - * the [safe] part of __db_open that initializes the environment-- - * and the mpool--manually. - */ - if ((ret = __env_setup(dbp, NULL, - name, subdb, TXN_INVALID, DB_ODDFILESIZE | DB_RDONLY)) != 0) - goto err; - - /* - * Set our name in the Queue subsystem; we may need it later - * to deal with extents. - */ - if (dbp->type == DB_QUEUE && - (ret = __qam_set_ext_data(dbp, name)) != 0) - goto err; - - /* Mark the dbp as opened, so that we correctly handle its close. */ - F_SET(dbp, DB_AM_OPEN_CALLED); - - /* Find out the page number of the last page in the database. */ - if ((ret = __memp_get_last_pgno(dbp->mpf, &vdp->last_pgno)) != 0) - goto err; - - /* - * DB_ORDERCHKONLY is a special case; our file consists of - * several subdatabases, which use different hash, bt_compare, - * and/or dup_compare functions. Consequently, we couldn't verify - * sorting and hashing simply by calling DB->verify() on the file. - * DB_ORDERCHKONLY allows us to come back and check those things; it - * requires a subdatabase, and assumes that everything but that - * database's sorting/hashing is correct. - */ - if (LF_ISSET(DB_ORDERCHKONLY)) { - ret = __db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags); - goto done; - } - - sflags = flags; - if (dbp->p_internal != NULL) - LF_CLR(DB_SALVAGE); - - /* - * When salvaging, we use a db to keep track of whether we've seen a - * given overflow or dup page in the course of traversing normal data. - * If in the end we have not, we assume its key got lost and print it - * with key "UNKNOWN". - */ - if (LF_ISSET(DB_SALVAGE)) { - if ((ret = __db_salvage_init(vdp)) != 0) - goto err; - - /* - * If we're not being aggressive, salvage by walking the tree - * and only printing the leaves we find. "has_subdbs" will - * indicate whether we found subdatabases. - */ - if (!LF_ISSET(DB_AGGRESSIVE) && __db_salvage_all( - dbp, vdp, handle, callback, flags, &has_subdbs) != 0) - isbad = 1; - - /* - * If we have subdatabases, flag if any keys are found that - * don't belong to a subdatabase -- they'll need to have an - * "__OTHER__" subdatabase header printed first. - */ - if (has_subdbs) { - F_SET(vdp, SALVAGE_PRINTHEADER); - F_SET(vdp, SALVAGE_HASSUBDBS); - } - } - - /* Walk all the pages, if a page cannot be read, verify structure. */ - if ((ret = - __db_vrfy_walkpages(dbp, vdp, handle, callback, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else if (ret != DB_PAGE_NOTFOUND) - goto err; - } - - /* If we're verifying, verify inter-page structure. */ - if (!LF_ISSET(DB_SALVAGE) && isbad == 0) - if ((t_ret = __db_vrfy_structure(dbp, - vdp, name, 0, lp, rp, flags)) != 0) { - if (t_ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - /* - * If we're salvaging, output with key UNKNOWN any overflow or dup pages - * we haven't been able to put in context. Then destroy the salvager's - * state-saving database. - */ - if (LF_ISSET(DB_SALVAGE)) { - if ((ret = __db_salvage_unknowns(dbp, - vdp, handle, callback, flags)) != 0) - isbad = 1; - } - - flags = sflags; - -#ifdef HAVE_PARTITION - if (t_ret == 0 && dbp->p_internal != NULL) - t_ret = __part_verify(dbp, vdp, name, handle, callback, flags); -#endif - - if (ret == 0) - ret = t_ret; - - /* Don't display a footer for a database holding other databases. */ - if (LF_ISSET(DB_SALVAGE | DB_VERIFY_PARTITION) == DB_SALVAGE && - (!has_subdbs || F_ISSET(vdp, SALVAGE_PRINTFOOTER))) - (void)__db_prfooter(handle, callback); - -done: err: - /* Send feedback that we're done. */ - if (!LF_ISSET(DB_SALVAGE) && dbp->db_feedback != NULL) - dbp->db_feedback(dbp, DB_VERIFY, 100); - - if (LF_ISSET(DB_SALVAGE) && - (t_ret = __db_salvage_destroy(vdp)) != 0 && ret == 0) - ret = t_ret; - if (fhp != NULL && - (t_ret = __os_closehandle(env, fhp)) != 0 && ret == 0) - ret = t_ret; - if (vdp != NULL && - (t_ret = __db_vrfy_dbinfo_destroy(env, vdp)) != 0 && ret == 0) - ret = t_ret; - if (real_name != NULL) - __os_free(env, real_name); - - /* - * DB_VERIFY_FATAL is a private error, translate to a public one. - * - * If we didn't find a page, it's probably a page number was corrupted. - * Return the standard corruption error. - * - * Otherwise, if we found corruption along the way, set the return. - */ - if (ret == DB_VERIFY_FATAL || - ret == DB_PAGE_NOTFOUND || (ret == 0 && isbad == 1)) - ret = DB_VERIFY_BAD; - - /* Make sure there's a public complaint if we found corruption. */ - if (ret != 0) - __db_err(env, ret, "%s", name); - - return (ret); -} - -/* - * __db_vrfy_pagezero -- - * Verify the master metadata page. Use seek, read, and a local buffer - * rather than the DB paging code, for safety. - * - * Must correctly (or best-guess) set dbp->type and dbp->pagesize. - */ -static int -__db_vrfy_pagezero(dbp, vdp, fhp, flags) - DB *dbp; - VRFY_DBINFO *vdp; - DB_FH *fhp; - u_int32_t flags; -{ - DBMETA *meta; - ENV *env; - VRFY_PAGEINFO *pip; - db_pgno_t freelist; - size_t nr; - int isbad, ret, swapped; - u_int8_t mbuf[DBMETASIZE]; - - isbad = ret = swapped = 0; - freelist = 0; - env = dbp->env; - meta = (DBMETA *)mbuf; - dbp->type = DB_UNKNOWN; - - if ((ret = __db_vrfy_getpageinfo(vdp, PGNO_BASE_MD, &pip)) != 0) - return (ret); - - /* - * Seek to the metadata page. - * Note that if we're just starting a verification, dbp->pgsize - * may be zero; this is okay, as we want page zero anyway and - * 0*0 == 0. - */ - if ((ret = __os_seek(env, fhp, 0, 0, 0)) != 0 || - (ret = __os_read(env, fhp, mbuf, DBMETASIZE, &nr)) != 0) { - __db_err(env, ret, - "Metadata page %lu cannot be read", (u_long)PGNO_BASE_MD); - return (ret); - } - - if (nr != DBMETASIZE) { - EPRINT((env, - "Page %lu: Incomplete metadata page", - (u_long)PGNO_BASE_MD)); - return (DB_VERIFY_FATAL); - } - - if ((ret = __db_chk_meta(env, dbp, meta, 1)) != 0) { - EPRINT((env, - "Page %lu: metadata page corrupted", (u_long)PGNO_BASE_MD)); - isbad = 1; - if (ret != -1) { - EPRINT((env, - "Page %lu: could not check metadata page", - (u_long)PGNO_BASE_MD)); - return (DB_VERIFY_FATAL); - } - } - - /* - * Check all of the fields that we can. - * - * 08-11: Current page number. Must == pgno. - * Note that endianness doesn't matter--it's zero. - */ - if (meta->pgno != PGNO_BASE_MD) { - isbad = 1; - EPRINT((env, "Page %lu: pgno incorrectly set to %lu", - (u_long)PGNO_BASE_MD, (u_long)meta->pgno)); - } - - /* 12-15: Magic number. Must be one of valid set. */ - if (__db_is_valid_magicno(meta->magic, &dbp->type)) - swapped = 0; - else { - M_32_SWAP(meta->magic); - if (__db_is_valid_magicno(meta->magic, - &dbp->type)) - swapped = 1; - else { - isbad = 1; - EPRINT((env, - "Page %lu: bad magic number %lu", - (u_long)PGNO_BASE_MD, (u_long)meta->magic)); - } - } - - /* - * 16-19: Version. Must be current; for now, we - * don't support verification of old versions. - */ - if (swapped) - M_32_SWAP(meta->version); - if ((dbp->type == DB_BTREE && - (meta->version > DB_BTREEVERSION || - meta->version < DB_BTREEOLDVER)) || - (dbp->type == DB_HASH && - (meta->version > DB_HASHVERSION || - meta->version < DB_HASHOLDVER)) || - (dbp->type == DB_QUEUE && - (meta->version > DB_QAMVERSION || - meta->version < DB_QAMOLDVER))) { - isbad = 1; - EPRINT((env, - "Page %lu: unsupported DB version %lu; extraneous errors may result", - (u_long)PGNO_BASE_MD, (u_long)meta->version)); - } - - /* - * 20-23: Pagesize. Must be power of two, - * greater than 512, and less than 64K. - */ - if (swapped) - M_32_SWAP(meta->pagesize); - if (IS_VALID_PAGESIZE(meta->pagesize)) - dbp->pgsize = meta->pagesize; - else { - isbad = 1; - EPRINT((env, "Page %lu: bad page size %lu", - (u_long)PGNO_BASE_MD, (u_long)meta->pagesize)); - - /* - * Now try to settle on a pagesize to use. - * If the user-supplied one is reasonable, - * use it; else, guess. - */ - if (!IS_VALID_PAGESIZE(dbp->pgsize)) - dbp->pgsize = __db_guesspgsize(env, fhp); - } - - /* - * 25: Page type. Must be correct for dbp->type, - * which is by now set as well as it can be. - */ - /* Needs no swapping--only one byte! */ - if ((dbp->type == DB_BTREE && meta->type != P_BTREEMETA) || - (dbp->type == DB_HASH && meta->type != P_HASHMETA) || - (dbp->type == DB_QUEUE && meta->type != P_QAMMETA)) { - isbad = 1; - EPRINT((env, "Page %lu: bad page type %lu", - (u_long)PGNO_BASE_MD, (u_long)meta->type)); - } - - /* - * 26: Meta-flags. - */ - if (meta->metaflags != 0) { - if (FLD_ISSET(meta->metaflags, - ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) { - isbad = 1; - EPRINT((env, - "Page %lu: bad meta-data flags value %#lx", - (u_long)PGNO_BASE_MD, (u_long)meta->metaflags)); - } - if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) - F_SET(pip, VRFY_HAS_CHKSUM); - if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE)) - F_SET(pip, VRFY_HAS_PART_RANGE); - if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK)) - F_SET(pip, VRFY_HAS_PART_CALLBACK); - - if (FLD_ISSET(meta->metaflags, - DBMETA_PART_RANGE | DBMETA_PART_CALLBACK) && - (ret = __partition_init(dbp, meta->metaflags)) != 0) - return (ret); - } - - /* - * 28-31: Free list page number. - * 32-35: Last page in database file. - * We'll verify its sensibility when we do inter-page - * verification later; for now, just store it. - */ - if (swapped) - M_32_SWAP(meta->free); - freelist = meta->free; - if (swapped) - M_32_SWAP(meta->last_pgno); - vdp->meta_last_pgno = meta->last_pgno; - - /* - * Initialize vdp->pages to fit a single pageinfo structure for - * this one page. We'll realloc later when we know how many - * pages there are. - */ - pip->pgno = PGNO_BASE_MD; - pip->type = meta->type; - - /* - * Signal that we still have to check the info specific to - * a given type of meta page. - */ - F_SET(pip, VRFY_INCOMPLETE); - - pip->free = freelist; - - if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) - return (ret); - - /* Set up the dbp's fileid. We don't use the regular open path. */ - memcpy(dbp->fileid, meta->uid, DB_FILE_ID_LEN); - - if (swapped == 1) - F_SET(dbp, DB_AM_SWAP); - - return (isbad ? DB_VERIFY_BAD : 0); -} - -/* - * __db_vrfy_walkpages -- - * Main loop of the verifier/salvager. Walks through, - * page by page, and verifies all pages and/or prints all data pages. - */ -static int -__db_vrfy_walkpages(dbp, vdp, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - DB_MPOOLFILE *mpf; - ENV *env; - PAGE *h; - VRFY_PAGEINFO *pip; - db_pgno_t i; - int ret, t_ret, isbad; - - env = dbp->env; - mpf = dbp->mpf; - h = NULL; - ret = isbad = t_ret = 0; - - for (i = 0; i <= vdp->last_pgno; i++) { - /* - * If DB_SALVAGE is set, we inspect our database of completed - * pages, and skip any we've already printed in the subdb pass. - */ - if (LF_ISSET(DB_SALVAGE) && (__db_salvage_isdone(vdp, i) != 0)) - continue; - - /* - * An individual page get can fail if: - * * This is a hash database, it is expected to find - * empty buckets, which don't have allocated pages. Create - * a dummy page so the verification can proceed. - * * We are salvaging, flag the error and continue. - */ - if ((t_ret = __memp_fget(mpf, &i, - vdp->thread_info, NULL, 0, &h)) != 0) { - if (dbp->type == DB_HASH) { - if ((t_ret = - __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) - goto err1; - pip->type = P_INVALID; - pip->pgno = i; - F_CLR(pip, VRFY_IS_ALLZEROES); - if ((t_ret = __db_vrfy_putpageinfo( - env, vdp, pip)) != 0) - goto err1; - continue; - } - if (t_ret == DB_PAGE_NOTFOUND) { - EPRINT((env, - "Page %lu: beyond the end of the file, metadata page has last page as %lu", - (u_long)i, (u_long)vdp->last_pgno)); - if (ret == 0) - return (t_ret); - } - -err1: if (ret == 0) - ret = t_ret; - if (LF_ISSET(DB_SALVAGE)) - continue; - return (ret); - } - - if (LF_ISSET(DB_SALVAGE)) { - /* - * We pretty much don't want to quit unless a - * bomb hits. May as well return that something - * was screwy, however. - */ - if ((t_ret = __db_salvage_pg(dbp, - vdp, i, h, handle, callback, flags)) != 0) { - if (ret == 0) - ret = t_ret; - isbad = 1; - } - } else { - /* - * If we are not salvaging, and we get any error - * other than DB_VERIFY_BAD, return immediately; - * it may not be safe to proceed. If we get - * DB_VERIFY_BAD, keep going; listing more errors - * may make it easier to diagnose problems and - * determine the magnitude of the corruption. - * - * Verify info common to all page types. - */ - if (i != PGNO_BASE_MD) { - ret = __db_vrfy_common(dbp, vdp, h, i, flags); - if (ret == DB_VERIFY_BAD) - isbad = 1; - else if (ret != 0) - goto err; - } - - switch (TYPE(h)) { - case P_INVALID: - ret = __db_vrfy_invalid(dbp, vdp, h, i, flags); - break; - case __P_DUPLICATE: - isbad = 1; - EPRINT((env, - "Page %lu: old-style duplicate page", - (u_long)i)); - break; - case P_HASH_UNSORTED: - case P_HASH: - ret = __ham_vrfy(dbp, vdp, h, i, flags); - break; - case P_IBTREE: - case P_IRECNO: - case P_LBTREE: - case P_LDUP: - ret = __bam_vrfy(dbp, vdp, h, i, flags); - break; - case P_LRECNO: - ret = __ram_vrfy_leaf(dbp, vdp, h, i, flags); - break; - case P_OVERFLOW: - ret = __db_vrfy_overflow(dbp, vdp, h, i, flags); - break; - case P_HASHMETA: - ret = __ham_vrfy_meta(dbp, - vdp, (HMETA *)h, i, flags); - break; - case P_BTREEMETA: - ret = __bam_vrfy_meta(dbp, - vdp, (BTMETA *)h, i, flags); - break; - case P_QAMMETA: - ret = __qam_vrfy_meta(dbp, - vdp, (QMETA *)h, i, flags); - break; - case P_QAMDATA: - ret = __qam_vrfy_data(dbp, - vdp, (QPAGE *)h, i, flags); - break; - default: - EPRINT((env, - "Page %lu: unknown page type %lu", - (u_long)i, (u_long)TYPE(h))); - isbad = 1; - break; - } - - /* - * Set up error return. - */ - if (ret == DB_VERIFY_BAD) - isbad = 1; - else if (ret != 0) - goto err; - - /* - * Provide feedback to the application about our - * progress. The range 0-50% comes from the fact - * that this is the first of two passes through the - * database (front-to-back, then top-to-bottom). - */ - if (dbp->db_feedback != NULL) - dbp->db_feedback(dbp, DB_VERIFY, - (int)((i + 1) * 50 / (vdp->last_pgno + 1))); - } - - /* - * Just as with the page get, bail if and only if we're - * not salvaging. - */ - if ((t_ret = __memp_fput(mpf, - vdp->thread_info, h, dbp->priority)) != 0) { - if (ret == 0) - ret = t_ret; - if (!LF_ISSET(DB_SALVAGE)) - return (ret); - } - } - - /* - * If we've seen a Queue metadata page, we may need to walk Queue - * extent pages that won't show up between 0 and vdp->last_pgno. - */ - if (F_ISSET(vdp, VRFY_QMETA_SET) && (t_ret = - __qam_vrfy_walkqueue(dbp, vdp, handle, callback, flags)) != 0) { - if (ret == 0) - ret = t_ret; - if (t_ret == DB_VERIFY_BAD) - isbad = 1; - else if (!LF_ISSET(DB_SALVAGE)) - return (ret); - } - - if (0) { -err: if (h != NULL && (t_ret = __memp_fput(mpf, - vdp->thread_info, h, dbp->priority)) != 0) - return (ret == 0 ? t_ret : ret); - } - - return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); -} - -/* - * __db_vrfy_structure-- - * After a beginning-to-end walk through the database has been - * completed, put together the information that has been collected - * to verify the overall database structure. - * - * Should only be called if we want to do a database verification, - * i.e. if DB_SALVAGE is not set. - */ -static int -__db_vrfy_structure(dbp, vdp, dbname, meta_pgno, lp, rp, flags) - DB *dbp; - VRFY_DBINFO *vdp; - const char *dbname; - db_pgno_t meta_pgno; - void *lp, *rp; - u_int32_t flags; -{ - DB *pgset; - ENV *env; - VRFY_PAGEINFO *pip; - db_pgno_t i; - int ret, isbad, hassubs, p; - - isbad = 0; - pip = NULL; - env = dbp->env; - pgset = vdp->pgset; - - /* - * Providing feedback here is tricky; in most situations, - * we fetch each page one more time, but we do so in a top-down - * order that depends on the access method. Worse, we do this - * recursively in btree, such that on any call where we're traversing - * a subtree we don't know where that subtree is in the whole database; - * worse still, any given database may be one of several subdbs. - * - * The solution is to decrement a counter vdp->pgs_remaining each time - * we verify (and call feedback on) a page. We may over- or - * under-count, but the structure feedback function will ensure that we - * never give a percentage under 50 or over 100. (The first pass - * covered the range 0-50%.) - */ - if (dbp->db_feedback != NULL) - vdp->pgs_remaining = vdp->last_pgno + 1; - - /* - * Call the appropriate function to downwards-traverse the db type. - */ - switch (dbp->type) { - case DB_BTREE: - case DB_RECNO: - if ((ret = - __bam_vrfy_structure(dbp, vdp, 0, lp, rp, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - - /* - * If we have subdatabases and we know that the database is, - * thus far, sound, it's safe to walk the tree of subdatabases. - * Do so, and verify the structure of the databases within. - */ - if ((ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) != 0) - goto err; - hassubs = F_ISSET(pip, VRFY_HAS_SUBDBS) ? 1 : 0; - if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) - goto err; - pip = NULL; - - if (isbad == 0 && hassubs) - if ((ret = - __db_vrfy_subdbs(dbp, vdp, dbname, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - break; - case DB_HASH: - if ((ret = __ham_vrfy_structure(dbp, vdp, 0, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - break; - case DB_QUEUE: - if ((ret = __qam_vrfy_structure(dbp, vdp, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - } - - /* - * Queue pages may be unreferenced and totally zeroed, if - * they're empty; queue doesn't have much structure, so - * this is unlikely to be wrong in any troublesome sense. - * Skip to "err". - */ - goto err; - case DB_UNKNOWN: - default: - ret = __db_unknown_path(env, "__db_vrfy_structure"); - goto err; - } - - /* Walk free list. */ - if ((ret = - __db_vrfy_freelist(dbp, vdp, meta_pgno, flags)) == DB_VERIFY_BAD) - isbad = 1; - - /* - * If structure checks up until now have failed, it's likely that - * checking what pages have been missed will result in oodles of - * extraneous error messages being EPRINTed. Skip to the end - * if this is the case; we're going to be printing at least one - * error anyway, and probably all the more salient ones. - */ - if (ret != 0 || isbad == 1) - goto err; - - /* - * Make sure no page has been missed and that no page is still marked - * "all zeroes" (only certain hash pages can be, and they're unmarked - * in __ham_vrfy_structure). - */ - for (i = 0; i < vdp->last_pgno + 1; i++) { - if ((ret = __db_vrfy_getpageinfo(vdp, i, &pip)) != 0) - goto err; - if ((ret = __db_vrfy_pgset_get(pgset, - vdp->thread_info, i, &p)) != 0) - goto err; - if (pip->type == P_OVERFLOW) { - if ((u_int32_t)p != pip->refcount) { - EPRINT((env, - "Page %lu: overflow refcount %lu, referenced %lu times", - (u_long)i, - (u_long)pip->refcount, (u_long)p)); - isbad = 1; - } - } else if (p == 0 && -#ifndef HAVE_FTRUNCATE - !(i > vdp->meta_last_pgno && - (F_ISSET(pip, VRFY_IS_ALLZEROES) || pip->type == P_HASH)) && -#endif - !(dbp->type == DB_HASH && pip->type == P_INVALID)) { - /* - * It is OK for unreferenced hash buckets to be - * marked invalid and unreferenced. - */ - EPRINT((env, - "Page %lu: unreferenced page", (u_long)i)); - isbad = 1; - } - - if (F_ISSET(pip, VRFY_IS_ALLZEROES) -#ifndef HAVE_FTRUNCATE - && i <= vdp->meta_last_pgno -#endif - ) { - EPRINT((env, - "Page %lu: totally zeroed page", (u_long)i)); - isbad = 1; - } - if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) - goto err; - pip = NULL; - } - -err: if (pip != NULL) - (void)__db_vrfy_putpageinfo(env, vdp, pip); - - return ((isbad == 1 && ret == 0) ? DB_VERIFY_BAD : ret); -} - -/* - * __db_is_valid_magicno - */ -static int -__db_is_valid_magicno(magic, typep) - u_int32_t magic; - DBTYPE *typep; -{ - switch (magic) { - case DB_BTREEMAGIC: - *typep = DB_BTREE; - return (1); - case DB_HASHMAGIC: - *typep = DB_HASH; - return (1); - case DB_QAMMAGIC: - *typep = DB_QUEUE; - return (1); - default: - break; - } - *typep = DB_UNKNOWN; - return (0); -} - -/* - * __db_vrfy_common -- - * Verify info common to all page types. - * - * PUBLIC: int __db_vrfy_common - * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); - */ -int -__db_vrfy_common(dbp, vdp, h, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - PAGE *h; - db_pgno_t pgno; - u_int32_t flags; -{ - ENV *env; - VRFY_PAGEINFO *pip; - int ret, t_ret; - u_int8_t *p; - - env = dbp->env; - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - pip->pgno = pgno; - F_CLR(pip, VRFY_IS_ALLZEROES); - - /* - * Hash expands the table by leaving some pages between the - * old last and the new last totally zeroed. These pages may - * not be all zero if they were used, freed and then reallocated. - * - * Queue will create sparse files if sparse record numbers are used. - */ - if (pgno != 0 && PGNO(h) == 0) { - F_SET(pip, VRFY_IS_ALLZEROES); - for (p = (u_int8_t *)h; p < (u_int8_t *)h + dbp->pgsize; p++) - if (*p != 0) { - F_CLR(pip, VRFY_IS_ALLZEROES); - break; - } - /* - * Mark it as a hash, and we'll - * check that that makes sense structurally later. - * (The queue verification doesn't care, since queues - * don't really have much in the way of structure.) - */ - pip->type = P_HASH; - ret = 0; - goto err; /* well, not really an err. */ - } - - if (PGNO(h) != pgno) { - EPRINT((env, "Page %lu: bad page number %lu", - (u_long)pgno, (u_long)h->pgno)); - ret = DB_VERIFY_BAD; - } - - switch (h->type) { - case P_INVALID: /* Order matches ordinal value. */ - case P_HASH_UNSORTED: - case P_IBTREE: - case P_IRECNO: - case P_LBTREE: - case P_LRECNO: - case P_OVERFLOW: - case P_HASHMETA: - case P_BTREEMETA: - case P_QAMMETA: - case P_QAMDATA: - case P_LDUP: - case P_HASH: - break; - default: - EPRINT((env, "Page %lu: bad page type %lu", - (u_long)pgno, (u_long)h->type)); - ret = DB_VERIFY_BAD; - } - pip->type = h->type; - -err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __db_vrfy_invalid -- - * Verify P_INVALID page. - * (Yes, there's not much to do here.) - */ -static int -__db_vrfy_invalid(dbp, vdp, h, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - PAGE *h; - db_pgno_t pgno; - u_int32_t flags; -{ - ENV *env; - VRFY_PAGEINFO *pip; - int ret, t_ret; - - env = dbp->env; - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - pip->next_pgno = pip->prev_pgno = 0; - - if (!IS_VALID_PGNO(NEXT_PGNO(h))) { - EPRINT((env, "Page %lu: invalid next_pgno %lu", - (u_long)pgno, (u_long)NEXT_PGNO(h))); - ret = DB_VERIFY_BAD; - } else - pip->next_pgno = NEXT_PGNO(h); - - if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __db_vrfy_datapage -- - * Verify elements common to data pages (P_HASH, P_LBTREE, - * P_IBTREE, P_IRECNO, P_LRECNO, P_OVERFLOW, P_DUPLICATE)--i.e., - * those defined in the PAGE structure. - * - * Called from each of the per-page routines, after the - * all-page-type-common elements of pip have been verified and filled - * in. - * - * PUBLIC: int __db_vrfy_datapage - * PUBLIC: __P((DB *, VRFY_DBINFO *, PAGE *, db_pgno_t, u_int32_t)); - */ -int -__db_vrfy_datapage(dbp, vdp, h, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - PAGE *h; - db_pgno_t pgno; - u_int32_t flags; -{ - ENV *env; - VRFY_PAGEINFO *pip; - u_int32_t smallest_entry; - int isbad, ret, t_ret; - - env = dbp->env; - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - isbad = 0; - - /* - * prev_pgno and next_pgno: store for inter-page checks, - * verify that they point to actual pages and not to self. - * - * !!! - * Internal btree pages do not maintain these fields (indeed, - * they overload them). Skip. - */ - if (TYPE(h) != P_IBTREE && TYPE(h) != P_IRECNO) { - if (!IS_VALID_PGNO(PREV_PGNO(h)) || PREV_PGNO(h) == pip->pgno) { - isbad = 1; - EPRINT((env, "Page %lu: invalid prev_pgno %lu", - (u_long)pip->pgno, (u_long)PREV_PGNO(h))); - } - if (!IS_VALID_PGNO(NEXT_PGNO(h)) || NEXT_PGNO(h) == pip->pgno) { - isbad = 1; - EPRINT((env, "Page %lu: invalid next_pgno %lu", - (u_long)pip->pgno, (u_long)NEXT_PGNO(h))); - } - pip->prev_pgno = PREV_PGNO(h); - pip->next_pgno = NEXT_PGNO(h); - } - - /* - * Verify the number of entries on the page: there's no good way to - * determine if this is accurate. The best we can do is verify that - * it's not more than can, in theory, fit on the page. Then, we make - * sure there are at least this many valid elements in inp[], and - * hope the test catches most cases. - */ - switch (TYPE(h)) { - case P_HASH_UNSORTED: - case P_HASH: - smallest_entry = HKEYDATA_PSIZE(0); - break; - case P_IBTREE: - smallest_entry = BINTERNAL_PSIZE(0); - break; - case P_IRECNO: - smallest_entry = RINTERNAL_PSIZE; - break; - case P_LBTREE: - case P_LDUP: - case P_LRECNO: - smallest_entry = BKEYDATA_PSIZE(0); - break; - default: - smallest_entry = 0; - break; - } - if (smallest_entry * NUM_ENT(h) / 2 > dbp->pgsize) { - isbad = 1; - EPRINT((env, "Page %lu: too many entries: %lu", - (u_long)pgno, (u_long)NUM_ENT(h))); - } - - if (TYPE(h) != P_OVERFLOW) - pip->entries = NUM_ENT(h); - - /* - * btree level. Should be zero unless we're a btree; - * if we are a btree, should be between LEAFLEVEL and MAXBTREELEVEL, - * and we need to save it off. - */ - switch (TYPE(h)) { - case P_IBTREE: - case P_IRECNO: - if (LEVEL(h) < LEAFLEVEL + 1) { - isbad = 1; - EPRINT((env, "Page %lu: bad btree level %lu", - (u_long)pgno, (u_long)LEVEL(h))); - } - pip->bt_level = LEVEL(h); - break; - case P_LBTREE: - case P_LDUP: - case P_LRECNO: - if (LEVEL(h) != LEAFLEVEL) { - isbad = 1; - EPRINT((env, - "Page %lu: btree leaf page has incorrect level %lu", - (u_long)pgno, (u_long)LEVEL(h))); - } - break; - default: - if (LEVEL(h) != 0) { - isbad = 1; - EPRINT((env, - "Page %lu: nonzero level %lu in non-btree database", - (u_long)pgno, (u_long)LEVEL(h))); - } - break; - } - - /* - * Even though inp[] occurs in all PAGEs, we look at it in the - * access-method-specific code, since btree and hash treat - * item lengths very differently, and one of the most important - * things we want to verify is that the data--as specified - * by offset and length--cover the right part of the page - * without overlaps, gaps, or violations of the page boundary. - */ - if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __db_vrfy_meta-- - * Verify the access-method common parts of a meta page, using - * normal mpool routines. - * - * PUBLIC: int __db_vrfy_meta - * PUBLIC: __P((DB *, VRFY_DBINFO *, DBMETA *, db_pgno_t, u_int32_t)); - */ -int -__db_vrfy_meta(dbp, vdp, meta, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - DBMETA *meta; - db_pgno_t pgno; - u_int32_t flags; -{ - DBTYPE dbtype, magtype; - ENV *env; - VRFY_PAGEINFO *pip; - int isbad, ret, t_ret; - - isbad = 0; - env = dbp->env; - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - /* type plausible for a meta page */ - switch (meta->type) { - case P_BTREEMETA: - dbtype = DB_BTREE; - break; - case P_HASHMETA: - dbtype = DB_HASH; - break; - case P_QAMMETA: - dbtype = DB_QUEUE; - break; - default: - ret = __db_unknown_path(env, "__db_vrfy_meta"); - goto err; - } - - /* magic number valid */ - if (!__db_is_valid_magicno(meta->magic, &magtype)) { - isbad = 1; - EPRINT((env, - "Page %lu: invalid magic number", (u_long)pgno)); - } - if (magtype != dbtype) { - isbad = 1; - EPRINT((env, - "Page %lu: magic number does not match database type", - (u_long)pgno)); - } - - /* version */ - if ((dbtype == DB_BTREE && - (meta->version > DB_BTREEVERSION || - meta->version < DB_BTREEOLDVER)) || - (dbtype == DB_HASH && - (meta->version > DB_HASHVERSION || - meta->version < DB_HASHOLDVER)) || - (dbtype == DB_QUEUE && - (meta->version > DB_QAMVERSION || - meta->version < DB_QAMOLDVER))) { - isbad = 1; - EPRINT((env, - "Page %lu: unsupported database version %lu; extraneous errors may result", - (u_long)pgno, (u_long)meta->version)); - } - - /* pagesize */ - if (meta->pagesize != dbp->pgsize) { - isbad = 1; - EPRINT((env, "Page %lu: invalid pagesize %lu", - (u_long)pgno, (u_long)meta->pagesize)); - } - - /* Flags */ - if (meta->metaflags != 0) { - if (FLD_ISSET(meta->metaflags, - ~(DBMETA_CHKSUM|DBMETA_PART_RANGE|DBMETA_PART_CALLBACK))) { - isbad = 1; - EPRINT((env, - "Page %lu: bad meta-data flags value %#lx", - (u_long)PGNO_BASE_MD, (u_long)meta->metaflags)); - } - if (FLD_ISSET(meta->metaflags, DBMETA_CHKSUM)) - F_SET(pip, VRFY_HAS_CHKSUM); - if (FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE)) - F_SET(pip, VRFY_HAS_PART_RANGE); - if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK)) - F_SET(pip, VRFY_HAS_PART_CALLBACK); - } - - /* - * Free list. - * - * If this is not the main, master-database meta page, it - * should not have a free list. - */ - if (pgno != PGNO_BASE_MD && meta->free != PGNO_INVALID) { - isbad = 1; - EPRINT((env, - "Page %lu: nonempty free list on subdatabase metadata page", - (u_long)pgno)); - } - - /* Can correctly be PGNO_INVALID--that's just the end of the list. */ - if (meta->free != PGNO_INVALID && IS_VALID_PGNO(meta->free)) - pip->free = meta->free; - else if (!IS_VALID_PGNO(meta->free)) { - isbad = 1; - EPRINT((env, - "Page %lu: nonsensical free list pgno %lu", - (u_long)pgno, (u_long)meta->free)); - } - - /* - * Check that the meta page agrees with what we got from mpool. - * If we don't have FTRUNCATE then mpool could include some - * zeroed pages at the end of the file, we assume the meta page - * is correct. - */ - if (pgno == PGNO_BASE_MD && meta->last_pgno != vdp->last_pgno) { -#ifdef HAVE_FTRUNCATE - isbad = 1; - EPRINT((env, - "Page %lu: last_pgno is not correct: %lu != %lu", - (u_long)pgno, - (u_long)meta->last_pgno, (u_long)vdp->last_pgno)); -#endif - vdp->meta_last_pgno = meta->last_pgno; - } - - /* - * We have now verified the common fields of the metadata page. - * Clear the flag that told us they had been incompletely checked. - */ - F_CLR(pip, VRFY_INCOMPLETE); - -err: if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __db_vrfy_freelist -- - * Walk free list, checking off pages and verifying absence of - * loops. - */ -static int -__db_vrfy_freelist(dbp, vdp, meta, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t meta; - u_int32_t flags; -{ - DB *pgset; - ENV *env; - VRFY_PAGEINFO *pip; - db_pgno_t cur_pgno, next_pgno; - int p, ret, t_ret; - - env = dbp->env; - pgset = vdp->pgset; - DB_ASSERT(env, pgset != NULL); - - if ((ret = __db_vrfy_getpageinfo(vdp, meta, &pip)) != 0) - return (ret); - for (next_pgno = pip->free; - next_pgno != PGNO_INVALID; next_pgno = pip->next_pgno) { - cur_pgno = pip->pgno; - if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) - return (ret); - - /* This shouldn't happen, but just in case. */ - if (!IS_VALID_PGNO(next_pgno)) { - EPRINT((env, - "Page %lu: invalid next_pgno %lu on free list page", - (u_long)cur_pgno, (u_long)next_pgno)); - return (DB_VERIFY_BAD); - } - - /* Detect cycles. */ - if ((ret = __db_vrfy_pgset_get(pgset, - vdp->thread_info, next_pgno, &p)) != 0) - return (ret); - if (p != 0) { - EPRINT((env, - "Page %lu: page %lu encountered a second time on free list", - (u_long)cur_pgno, (u_long)next_pgno)); - return (DB_VERIFY_BAD); - } - if ((ret = __db_vrfy_pgset_inc(pgset, - vdp->thread_info, next_pgno)) != 0) - return (ret); - - if ((ret = __db_vrfy_getpageinfo(vdp, next_pgno, &pip)) != 0) - return (ret); - - if (pip->type != P_INVALID) { - EPRINT((env, - "Page %lu: non-invalid page %lu on free list", - (u_long)cur_pgno, (u_long)next_pgno)); - ret = DB_VERIFY_BAD; /* unsafe to continue */ - break; - } - } - - if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) - ret = t_ret; - return (ret); -} - -/* - * __db_vrfy_subdbs -- - * Walk the known-safe master database of subdbs with a cursor, - * verifying the structure of each subdatabase we encounter. - */ -static int -__db_vrfy_subdbs(dbp, vdp, dbname, flags) - DB *dbp; - VRFY_DBINFO *vdp; - const char *dbname; - u_int32_t flags; -{ - DB *mdbp; - DBC *dbc; - DBT key, data; - ENV *env; - VRFY_PAGEINFO *pip; - db_pgno_t meta_pgno; - int ret, t_ret, isbad; - u_int8_t type; - - isbad = 0; - dbc = NULL; - env = dbp->env; - - if ((ret = __db_master_open(dbp, - vdp->thread_info, NULL, dbname, DB_RDONLY, 0, &mdbp)) != 0) - return (ret); - - if ((ret = __db_cursor_int(mdbp, NULL, - NULL, DB_BTREE, PGNO_INVALID, 0, DB_LOCK_INVALIDID, &dbc)) != 0) - goto err; - - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - while ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) == 0) { - if (data.size != sizeof(db_pgno_t)) { - EPRINT((env, - "Subdatabase entry not page-number size")); - isbad = 1; - goto err; - } - memcpy(&meta_pgno, data.data, data.size); - /* - * Subdatabase meta pgnos are stored in network byte - * order for cross-endian compatibility. Swap if appropriate. - */ - DB_NTOHL_SWAP(env, &meta_pgno); - if (meta_pgno == PGNO_INVALID || meta_pgno > vdp->last_pgno) { - EPRINT((env, - "Subdatabase entry references invalid page %lu", - (u_long)meta_pgno)); - isbad = 1; - goto err; - } - if ((ret = __db_vrfy_getpageinfo(vdp, meta_pgno, &pip)) != 0) - goto err; - type = pip->type; - if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) - goto err; - switch (type) { - case P_BTREEMETA: - if ((ret = __bam_vrfy_structure( - dbp, vdp, meta_pgno, NULL, NULL, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - break; - case P_HASHMETA: - if ((ret = __ham_vrfy_structure( - dbp, vdp, meta_pgno, flags)) != 0) { - if (ret == DB_VERIFY_BAD) - isbad = 1; - else - goto err; - } - break; - case P_QAMMETA: - default: - EPRINT((env, - "Subdatabase entry references page %lu of invalid type %lu", - (u_long)meta_pgno, (u_long)type)); - ret = DB_VERIFY_BAD; - goto err; - } - } - - if (ret == DB_NOTFOUND) - ret = 0; - -err: if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - if ((t_ret = __db_close(mdbp, NULL, 0)) != 0 && ret == 0) - ret = t_ret; - - return ((ret == 0 && isbad == 1) ? DB_VERIFY_BAD : ret); -} - -/* - * __db_vrfy_struct_feedback -- - * Provide feedback during top-down database structure traversal. - * (See comment at the beginning of __db_vrfy_structure.) - * - * PUBLIC: void __db_vrfy_struct_feedback __P((DB *, VRFY_DBINFO *)); - */ -void -__db_vrfy_struct_feedback(dbp, vdp) - DB *dbp; - VRFY_DBINFO *vdp; -{ - int progress; - - if (dbp->db_feedback == NULL) - return; - - if (vdp->pgs_remaining > 0) - vdp->pgs_remaining--; - - /* Don't allow a feedback call of 100 until we're really done. */ - progress = 100 - (int)(vdp->pgs_remaining * 50 / (vdp->last_pgno + 1)); - dbp->db_feedback(dbp, DB_VERIFY, progress == 100 ? 99 : progress); -} - -/* - * __db_vrfy_orderchkonly -- - * Do an sort-order/hashing check on a known-otherwise-good subdb. - */ -static int -__db_vrfy_orderchkonly(dbp, vdp, name, subdb, flags) - DB *dbp; - VRFY_DBINFO *vdp; - const char *name, *subdb; - u_int32_t flags; -{ - BTMETA *btmeta; - DB *mdbp, *pgset; - DBC *pgsc; - DBT key, data; - DB_MPOOLFILE *mpf; - ENV *env; - HASH *h_internal; - HMETA *hmeta; - PAGE *h, *currpg; - db_pgno_t meta_pgno, p, pgno; - u_int32_t bucket; - int t_ret, ret; - - pgset = NULL; - pgsc = NULL; - env = dbp->env; - mpf = dbp->mpf; - currpg = h = NULL; - - LF_CLR(DB_NOORDERCHK); - - /* Open the master database and get the meta_pgno for the subdb. */ - if ((ret = __db_master_open(dbp, - vdp->thread_info, NULL, name, DB_RDONLY, 0, &mdbp)) != 0) - goto err; - - DB_INIT_DBT(key, subdb, strlen(subdb)); - memset(&data, 0, sizeof(data)); - if ((ret = __db_get(mdbp, - vdp->thread_info, NULL, &key, &data, 0)) != 0) { - if (ret == DB_NOTFOUND) - ret = ENOENT; - goto err; - } - - if (data.size != sizeof(db_pgno_t)) { - EPRINT((env, "Subdatabase entry of invalid size")); - ret = DB_VERIFY_BAD; - goto err; - } - - memcpy(&meta_pgno, data.data, data.size); - - /* - * Subdatabase meta pgnos are stored in network byte - * order for cross-endian compatibility. Swap if appropriate. - */ - DB_NTOHL_SWAP(env, &meta_pgno); - - if ((ret = __memp_fget(mpf, - &meta_pgno, vdp->thread_info, NULL, 0, &h)) != 0) - goto err; - - if ((ret = __db_vrfy_pgset(env, - vdp->thread_info, dbp->pgsize, &pgset)) != 0) - goto err; - - switch (TYPE(h)) { - case P_BTREEMETA: - btmeta = (BTMETA *)h; - if (F_ISSET(&btmeta->dbmeta, BTM_RECNO)) { - /* Recnos have no order to check. */ - ret = 0; - goto err; - } - if ((ret = - __db_meta2pgset(dbp, vdp, meta_pgno, flags, pgset)) != 0) - goto err; - if ((ret = __db_cursor_int(pgset, NULL, NULL, dbp->type, - PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0) - goto err; - while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) { - if ((ret = __memp_fget(mpf, &p, - vdp->thread_info, NULL, 0, &currpg)) != 0) - goto err; - if ((ret = __bam_vrfy_itemorder(dbp, NULL, - vdp->thread_info, currpg, p, NUM_ENT(currpg), 1, - F_ISSET(&btmeta->dbmeta, BTM_DUP), flags)) != 0) - goto err; - if ((ret = __memp_fput(mpf, - vdp->thread_info, currpg, dbp->priority)) != 0) - goto err; - currpg = NULL; - } - - /* - * The normal exit condition for the loop above is DB_NOTFOUND. - * If we see that, zero it and continue on to cleanup. - * Otherwise, it's a real error and will be returned. - */ - if (ret == DB_NOTFOUND) - ret = 0; - break; - case P_HASHMETA: - hmeta = (HMETA *)h; - h_internal = (HASH *)dbp->h_internal; - /* - * Make sure h_charkey is right. - */ - if (h_internal == NULL) { - EPRINT((env, - "Page %lu: DB->h_internal field is NULL", - (u_long)meta_pgno)); - ret = DB_VERIFY_BAD; - goto err; - } - if (h_internal->h_hash == NULL) - h_internal->h_hash = hmeta->dbmeta.version < 5 - ? __ham_func4 : __ham_func5; - if (hmeta->h_charkey != - h_internal->h_hash(dbp, CHARKEY, sizeof(CHARKEY))) { - EPRINT((env, - "Page %lu: incorrect hash function for database", - (u_long)meta_pgno)); - ret = DB_VERIFY_BAD; - goto err; - } - - /* - * Foreach bucket, verify hashing on each page in the - * corresponding chain of pages. - */ - if ((ret = __db_cursor_int(dbp, NULL, NULL, dbp->type, - PGNO_INVALID, 0, DB_LOCK_INVALIDID, &pgsc)) != 0) - goto err; - for (bucket = 0; bucket <= hmeta->max_bucket; bucket++) { - pgno = BS_TO_PAGE(bucket, hmeta->spares); - while (pgno != PGNO_INVALID) { - if ((ret = __memp_fget(mpf, &pgno, - vdp->thread_info, NULL, 0, &currpg)) != 0) - goto err; - if ((ret = __ham_vrfy_hashing(pgsc, - NUM_ENT(currpg), hmeta, bucket, pgno, - flags, h_internal->h_hash)) != 0) - goto err; - pgno = NEXT_PGNO(currpg); - if ((ret = __memp_fput(mpf, vdp->thread_info, - currpg, dbp->priority)) != 0) - goto err; - currpg = NULL; - } - } - break; - default: - EPRINT((env, "Page %lu: database metapage of bad type %lu", - (u_long)meta_pgno, (u_long)TYPE(h))); - ret = DB_VERIFY_BAD; - break; - } - -err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0) - ret = t_ret; - if (pgset != NULL && - (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret == 0) - ret = t_ret; - if (h != NULL && (t_ret = __memp_fput(mpf, - vdp->thread_info, h, dbp->priority)) != 0) - ret = t_ret; - if (currpg != NULL && - (t_ret = __memp_fput(mpf, - vdp->thread_info, currpg, dbp->priority)) != 0) - ret = t_ret; - if ((t_ret = __db_close(mdbp, NULL, 0)) != 0) - ret = t_ret; - return (ret); -} - -/* - * __db_salvage_pg -- - * Walk through a page, salvaging all likely or plausible (w/ - * DB_AGGRESSIVE) key/data pairs and marking seen pages in vdp. - * - * PUBLIC: int __db_salvage_pg __P((DB *, VRFY_DBINFO *, db_pgno_t, - * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t)); - */ -int -__db_salvage_pg(dbp, vdp, pgno, h, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - PAGE *h; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - ENV *env; - VRFY_PAGEINFO *pip; - int keyflag, ret, t_ret; - - env = dbp->env; - DB_ASSERT(env, LF_ISSET(DB_SALVAGE)); - - /* - * !!! - * We dump record numbers when salvaging Queue databases, but not for - * immutable Recno databases. The problem is we can't figure out the - * record number from the database page in the Recno case, while the - * offset in the file is sufficient for Queue. - */ - keyflag = 0; - - /* If we got this page in the subdb pass, we can safely skip it. */ - if (__db_salvage_isdone(vdp, pgno)) - return (0); - - switch (TYPE(h)) { - case P_BTREEMETA: - ret = __bam_vrfy_meta(dbp, vdp, (BTMETA *)h, pgno, flags); - break; - case P_HASH: - case P_HASH_UNSORTED: - case P_LBTREE: - case P_QAMDATA: - return (__db_salvage_leaf(dbp, - vdp, pgno, h, handle, callback, flags)); - case P_HASHMETA: - ret = __ham_vrfy_meta(dbp, vdp, (HMETA *)h, pgno, flags); - break; - case P_IBTREE: - /* - * We need to mark any overflow keys on internal pages as seen, - * so we don't print them out in __db_salvage_unknowns. But if - * we're an upgraded database, a P_LBTREE page may very well - * have a reference to the same overflow pages (this practice - * stopped somewhere around db4.5). To give P_LBTREEs a chance - * to print out any keys on shared pages, mark the page now and - * deal with it at the end. - */ - return (__db_salvage_markneeded(vdp, pgno, SALVAGE_IBTREE)); - case P_LDUP: - return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LDUP)); - case P_LRECNO: - /* - * Recno leaves are tough, because the leaf could be (1) a dup - * page, or it could be (2) a regular database leaf page. - * Fortunately, RECNO databases are not allowed to have - * duplicates. - * - * If there are no subdatabases, dump the page immediately if - * it's a leaf in a RECNO database, otherwise wait and hopefully - * it will be dumped by the leaf page that refers to it, - * otherwise we'll get it with the unknowns. - * - * If there are subdatabases, there might be mixed types and - * dbp->type can't be trusted. We'll only get here after - * salvaging each database, though, so salvaging this page - * immediately isn't important. If this page is a dup, it might - * get salvaged later on, otherwise the unknowns pass will pick - * it up. Note that SALVAGE_HASSUBDBS won't get set if we're - * salvaging aggressively. - * - * If we're salvaging aggressively, we don't know whether or not - * there's subdatabases, so we wait on all recno pages. - */ - if (!LF_ISSET(DB_AGGRESSIVE) && - !F_ISSET(vdp, SALVAGE_HASSUBDBS) && dbp->type == DB_RECNO) - return (__db_salvage_leaf(dbp, - vdp, pgno, h, handle, callback, flags)); - return (__db_salvage_markneeded(vdp, pgno, SALVAGE_LRECNODUP)); - case P_OVERFLOW: - return (__db_salvage_markneeded(vdp, pgno, SALVAGE_OVERFLOW)); - case P_QAMMETA: - keyflag = 1; - ret = __qam_vrfy_meta(dbp, vdp, (QMETA *)h, pgno, flags); - break; - case P_INVALID: - case P_IRECNO: - case __P_DUPLICATE: - default: - /* - * There's no need to display an error, the page type was - * already checked and reported on. - */ - return (0); - } - if (ret != 0) - return (ret); - - /* - * We have to display the dump header if it's a metadata page. It's - * our last chance as the page was marked "seen" in the vrfy routine, - * and we won't see the page again. We don't display headers for - * the first database in a multi-database file, that database simply - * contains a list of subdatabases. - */ - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - if (!F_ISSET(pip, VRFY_HAS_SUBDBS) && !LF_ISSET(DB_VERIFY_PARTITION)) - ret = __db_prheader( - dbp, NULL, 0, keyflag, handle, callback, vdp, pgno); - if ((t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __db_salvage_leaf -- - * Walk through a leaf, salvaging all likely key/data pairs and marking - * seen pages in vdp. - * - * PUBLIC: int __db_salvage_leaf __P((DB *, VRFY_DBINFO *, db_pgno_t, - * PUBLIC: PAGE *, void *, int (*)(void *, const void *), u_int32_t)); - */ -int -__db_salvage_leaf(dbp, vdp, pgno, h, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - PAGE *h; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - ENV *env; - - env = dbp->env; - DB_ASSERT(env, LF_ISSET(DB_SALVAGE)); - - /* If we got this page in the subdb pass, we can safely skip it. */ - if (__db_salvage_isdone(vdp, pgno)) - return (0); - - switch (TYPE(h)) { - case P_HASH_UNSORTED: - case P_HASH: - return (__ham_salvage(dbp, vdp, - pgno, h, handle, callback, flags)); - case P_LBTREE: - case P_LRECNO: - return (__bam_salvage(dbp, vdp, - pgno, TYPE(h), h, handle, callback, NULL, flags)); - case P_QAMDATA: - return (__qam_salvage(dbp, vdp, - pgno, h, handle, callback, flags)); - default: - /* - * There's no need to display an error, the page type was - * already checked and reported on. - */ - return (0); - } -} - -/* - * __db_salvage_unknowns -- - * Walk through the salvager database, printing with key "UNKNOWN" - * any pages we haven't dealt with. - */ -static int -__db_salvage_unknowns(dbp, vdp, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - DBC *dbc; - DBT unkdbt, key, *dbt; - DB_MPOOLFILE *mpf; - ENV *env; - PAGE *h; - db_pgno_t pgno; - u_int32_t pgtype, ovfl_bufsz, tmp_flags; - int ret, t_ret; - void *ovflbuf; - - dbc = NULL; - env = dbp->env; - mpf = dbp->mpf; - - DB_INIT_DBT(unkdbt, "UNKNOWN", sizeof("UNKNOWN") - 1); - - if ((ret = __os_malloc(env, dbp->pgsize, &ovflbuf)) != 0) - return (ret); - ovfl_bufsz = dbp->pgsize; - - /* - * We make two passes -- in the first pass, skip SALVAGE_OVERFLOW - * pages, because they may be referenced by the standard database - * pages that we're resolving. - */ - while ((t_ret = - __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 1)) == 0) { - if ((t_ret = __memp_fget(mpf, - &pgno, vdp->thread_info, NULL, 0, &h)) != 0) { - if (ret == 0) - ret = t_ret; - continue; - } - - dbt = NULL; - tmp_flags = 0; - switch (pgtype) { - case SALVAGE_LDUP: - case SALVAGE_LRECNODUP: - dbt = &unkdbt; - tmp_flags = DB_SA_UNKNOWNKEY; - /* FALLTHROUGH */ - case SALVAGE_IBTREE: - case SALVAGE_LBTREE: - case SALVAGE_LRECNO: - if ((t_ret = __bam_salvage( - dbp, vdp, pgno, pgtype, h, handle, - callback, dbt, tmp_flags | flags)) != 0 && ret == 0) - ret = t_ret; - break; - case SALVAGE_OVERFLOW: - DB_ASSERT(env, 0); /* Shouldn't ever happen. */ - break; - case SALVAGE_HASH: - if ((t_ret = __ham_salvage(dbp, vdp, - pgno, h, handle, callback, flags)) != 0 && ret == 0) - ret = t_ret; - break; - case SALVAGE_INVALID: - case SALVAGE_IGNORE: - default: - /* - * Shouldn't happen, but if it does, just do what the - * nice man says. - */ - DB_ASSERT(env, 0); - break; - } - if ((t_ret = __memp_fput(mpf, - vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) - ret = t_ret; - } - - /* We should have reached the end of the database. */ - if (t_ret == DB_NOTFOUND) - t_ret = 0; - if (t_ret != 0 && ret == 0) - ret = t_ret; - - /* Re-open the cursor so we traverse the database again. */ - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - dbc = NULL; - - /* Now, deal with any remaining overflow pages. */ - while ((t_ret = - __db_salvage_getnext(vdp, &dbc, &pgno, &pgtype, 0)) == 0) { - if ((t_ret = __memp_fget(mpf, - &pgno, vdp->thread_info, NULL, 0, &h)) != 0) { - if (ret == 0) - ret = t_ret; - continue; - } - - switch (pgtype) { - case SALVAGE_OVERFLOW: - /* - * XXX: - * This may generate multiple "UNKNOWN" keys in - * a database with no dups. What to do? - */ - if ((t_ret = __db_safe_goff(dbp, vdp, - pgno, &key, &ovflbuf, &ovfl_bufsz, flags)) != 0 || - ((vdp->type == DB_BTREE || vdp->type == DB_HASH) && - (t_ret = __db_vrfy_prdbt(&unkdbt, - 0, " ", handle, callback, 0, vdp)) != 0) || - (t_ret = __db_vrfy_prdbt( - &key, 0, " ", handle, callback, 0, vdp)) != 0) - if (ret == 0) - ret = t_ret; - break; - default: - DB_ASSERT(env, 0); /* Shouldn't ever happen. */ - break; - } - if ((t_ret = __memp_fput(mpf, - vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) - ret = t_ret; - } - - /* We should have reached the end of the database. */ - if (t_ret == DB_NOTFOUND) - t_ret = 0; - if (t_ret != 0 && ret == 0) - ret = t_ret; - - if ((t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - __os_free(env, ovflbuf); - - return (ret); -} - -/* - * Offset of the ith inp array entry, which we can compare to the offset - * the entry stores. - */ -#define INP_OFFSET(dbp, h, i) \ - ((db_indx_t)((u_int8_t *)((P_INP(dbp,(h))) + (i)) - (u_int8_t *)(h))) - -/* - * __db_vrfy_inpitem -- - * Verify that a single entry in the inp array is sane, and update - * the high water mark and current item offset. (The former of these is - * used for state information between calls, and is required; it must - * be initialized to the pagesize before the first call.) - * - * Returns DB_VERIFY_FATAL if inp has collided with the data, - * since verification can't continue from there; returns DB_VERIFY_BAD - * if anything else is wrong. - * - * PUBLIC: int __db_vrfy_inpitem __P((DB *, PAGE *, - * PUBLIC: db_pgno_t, u_int32_t, int, u_int32_t, u_int32_t *, u_int32_t *)); - */ -int -__db_vrfy_inpitem(dbp, h, pgno, i, is_btree, flags, himarkp, offsetp) - DB *dbp; - PAGE *h; - db_pgno_t pgno; - u_int32_t i; - int is_btree; - u_int32_t flags, *himarkp, *offsetp; -{ - BKEYDATA *bk; - ENV *env; - db_indx_t *inp, offset, len; - - env = dbp->env; - - DB_ASSERT(env, himarkp != NULL); - inp = P_INP(dbp, h); - - /* - * Check that the inp array, which grows from the beginning of the - * page forward, has not collided with the data, which grow from the - * end of the page backward. - */ - if (inp + i >= (db_indx_t *)((u_int8_t *)h + *himarkp)) { - /* We've collided with the data. We need to bail. */ - EPRINT((env, "Page %lu: entries listing %lu overlaps data", - (u_long)pgno, (u_long)i)); - return (DB_VERIFY_FATAL); - } - - offset = inp[i]; - - /* - * Check that the item offset is reasonable: it points somewhere - * after the inp array and before the end of the page. - */ - if (offset <= INP_OFFSET(dbp, h, i) || offset >= dbp->pgsize) { - EPRINT((env, "Page %lu: bad offset %lu at page index %lu", - (u_long)pgno, (u_long)offset, (u_long)i)); - return (DB_VERIFY_BAD); - } - - /* Update the high-water mark (what HOFFSET should be) */ - if (offset < *himarkp) - *himarkp = offset; - - if (is_btree) { - /* - * Check alignment; if it's unaligned, it's unsafe to - * manipulate this item. - */ - if (offset != DB_ALIGN(offset, sizeof(u_int32_t))) { - EPRINT((env, - "Page %lu: unaligned offset %lu at page index %lu", - (u_long)pgno, (u_long)offset, (u_long)i)); - return (DB_VERIFY_BAD); - } - - /* - * Check that the item length remains on-page. - */ - bk = GET_BKEYDATA(dbp, h, i); - - /* - * We need to verify the type of the item here; - * we can't simply assume that it will be one of the - * expected three. If it's not a recognizable type, - * it can't be considered to have a verifiable - * length, so it's not possible to certify it as safe. - */ - switch (B_TYPE(bk->type)) { - case B_KEYDATA: - len = bk->len; - break; - case B_DUPLICATE: - case B_OVERFLOW: - len = BOVERFLOW_SIZE; - break; - default: - EPRINT((env, - "Page %lu: item %lu of unrecognizable type", - (u_long)pgno, (u_long)i)); - return (DB_VERIFY_BAD); - } - - if ((size_t)(offset + len) > dbp->pgsize) { - EPRINT((env, - "Page %lu: item %lu extends past page boundary", - (u_long)pgno, (u_long)i)); - return (DB_VERIFY_BAD); - } - } - - if (offsetp != NULL) - *offsetp = offset; - return (0); -} - -/* - * __db_vrfy_duptype-- - * Given a page number and a set of flags to __bam_vrfy_subtree, - * verify that the dup tree type is correct--i.e., it's a recno - * if DUPSORT is not set and a btree if it is. - * - * PUBLIC: int __db_vrfy_duptype - * PUBLIC: __P((DB *, VRFY_DBINFO *, db_pgno_t, u_int32_t)); - */ -int -__db_vrfy_duptype(dbp, vdp, pgno, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - u_int32_t flags; -{ - ENV *env; - VRFY_PAGEINFO *pip; - int ret, isbad; - - env = dbp->env; - isbad = 0; - - if ((ret = __db_vrfy_getpageinfo(vdp, pgno, &pip)) != 0) - return (ret); - - switch (pip->type) { - case P_IBTREE: - case P_LDUP: - if (!LF_ISSET(DB_ST_DUPSORT)) { - EPRINT((env, - "Page %lu: sorted duplicate set in unsorted-dup database", - (u_long)pgno)); - isbad = 1; - } - break; - case P_IRECNO: - case P_LRECNO: - if (LF_ISSET(DB_ST_DUPSORT)) { - EPRINT((env, - "Page %lu: unsorted duplicate set in sorted-dup database", - (u_long)pgno)); - isbad = 1; - } - break; - default: - /* - * If the page is entirely zeroed, its pip->type will be a lie - * (we assumed it was a hash page, as they're allowed to be - * zeroed); handle this case specially. - */ - if (F_ISSET(pip, VRFY_IS_ALLZEROES)) - ZEROPG_ERR_PRINT(env, pgno, "duplicate page"); - else - EPRINT((env, - "Page %lu: duplicate page of inappropriate type %lu", - (u_long)pgno, (u_long)pip->type)); - isbad = 1; - break; - } - - if ((ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0) - return (ret); - return (isbad == 1 ? DB_VERIFY_BAD : 0); -} - -/* - * __db_salvage_duptree -- - * Attempt to salvage a given duplicate tree, given its alleged root. - * - * The key that corresponds to this dup set has been passed to us - * in DBT *key. Because data items follow keys, though, it has been - * printed once already. - * - * The basic idea here is that pgno ought to be a P_LDUP, a P_LRECNO, a - * P_IBTREE, or a P_IRECNO. If it's an internal page, use the verifier - * functions to make sure it's safe; if it's not, we simply bail and the - * data will have to be printed with no key later on. if it is safe, - * recurse on each of its children. - * - * Whether or not it's safe, if it's a leaf page, __bam_salvage it. - * - * At all times, use the DB hanging off vdp to mark and check what we've - * done, so each page gets printed exactly once and we don't get caught - * in any cycles. - * - * PUBLIC: int __db_salvage_duptree __P((DB *, VRFY_DBINFO *, db_pgno_t, - * PUBLIC: DBT *, void *, int (*)(void *, const void *), u_int32_t)); - */ -int -__db_salvage_duptree(dbp, vdp, pgno, key, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - DBT *key; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - DB_MPOOLFILE *mpf; - PAGE *h; - int ret, t_ret; - - mpf = dbp->mpf; - - if (pgno == PGNO_INVALID || !IS_VALID_PGNO(pgno)) - return (DB_VERIFY_BAD); - - /* We have a plausible page. Try it. */ - if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) - return (ret); - - switch (TYPE(h)) { - case P_IBTREE: - case P_IRECNO: - if ((ret = __db_vrfy_common(dbp, vdp, h, pgno, flags)) != 0) - goto err; - if ((ret = __bam_vrfy(dbp, - vdp, h, pgno, flags | DB_NOORDERCHK)) != 0 || - (ret = __db_salvage_markdone(vdp, pgno)) != 0) - goto err; - /* - * We have a known-healthy internal page. Walk it. - */ - if ((ret = __bam_salvage_walkdupint(dbp, vdp, h, key, - handle, callback, flags)) != 0) - goto err; - break; - case P_LRECNO: - case P_LDUP: - if ((ret = __bam_salvage(dbp, - vdp, pgno, TYPE(h), h, handle, callback, key, flags)) != 0) - goto err; - break; - default: - ret = DB_VERIFY_BAD; - goto err; - } - -err: if ((t_ret = __memp_fput(mpf, - vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __db_salvage_all -- - * Salvage only the leaves we find by walking the tree. If we have subdbs, - * salvage each of them individually. - */ -static int -__db_salvage_all(dbp, vdp, handle, callback, flags, hassubsp) - DB *dbp; - VRFY_DBINFO *vdp; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; - int *hassubsp; -{ - DB *pgset; - DBC *pgsc; - DB_MPOOLFILE *mpf; - ENV *env; - PAGE *h; - VRFY_PAGEINFO *pip; - db_pgno_t p, meta_pgno; - int ret, t_ret; - - *hassubsp = 0; - - env = dbp->env; - pgset = NULL; - pgsc = NULL; - mpf = dbp->mpf; - h = NULL; - pip = NULL; - ret = 0; - - /* - * Check to make sure the page is OK and find out if it contains - * subdatabases. - */ - meta_pgno = PGNO_BASE_MD; - if ((t_ret = __memp_fget(mpf, - &meta_pgno, vdp->thread_info, NULL, 0, &h)) == 0 && - (t_ret = __db_vrfy_common(dbp, vdp, h, PGNO_BASE_MD, flags)) == 0 && - (t_ret = __db_salvage_pg( - dbp, vdp, PGNO_BASE_MD, h, handle, callback, flags)) == 0 && - (t_ret = __db_vrfy_getpageinfo(vdp, 0, &pip)) == 0) - if (F_ISSET(pip, VRFY_HAS_SUBDBS)) - *hassubsp = 1; - if (pip != NULL && - (t_ret = __db_vrfy_putpageinfo(env, vdp, pip)) != 0 && ret == 0) - ret = t_ret; - if (h != NULL) { - if ((t_ret = __memp_fput(mpf, - vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) - ret = t_ret; - h = NULL; - } - if (ret != 0) - return (ret); - - /* Without subdatabases, we can just dump from the meta pgno. */ - if (*hassubsp == 0) - return (__db_salvage(dbp, - vdp, PGNO_BASE_MD, handle, callback, flags)); - - /* - * We have subdbs. Try to crack them. - * - * To do so, get a set of leaf pages in the master database, and then - * walk each of the valid ones, salvaging subdbs as we go. If any - * prove invalid, just drop them; we'll pick them up on a later pass. - */ - if ((ret = __db_vrfy_pgset(env, - vdp->thread_info, dbp->pgsize, &pgset)) != 0) - goto err; - if ((ret = __db_meta2pgset(dbp, vdp, PGNO_BASE_MD, flags, pgset)) != 0) - goto err; - if ((ret = __db_cursor(pgset, vdp->thread_info, NULL, &pgsc, 0)) != 0) - goto err; - while ((t_ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) { - if ((t_ret = __memp_fget(mpf, - &p, vdp->thread_info, NULL, 0, &h)) == 0 && - (t_ret = __db_vrfy_common(dbp, vdp, h, p, flags)) == 0 && - (t_ret = - __bam_vrfy(dbp, vdp, h, p, flags | DB_NOORDERCHK)) == 0) - t_ret = __db_salvage_subdbpg( - dbp, vdp, h, handle, callback, flags); - if (t_ret != 0 && ret == 0) - ret = t_ret; - if (h != NULL) { - if ((t_ret = __memp_fput(mpf, vdp->thread_info, - h, dbp->priority)) != 0 && ret == 0) - ret = t_ret; - h = NULL; - } - } - - if (t_ret != DB_NOTFOUND && ret == 0) - ret = t_ret; - -err: if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0 && ret == 0) - ret = t_ret; - if (pgset != NULL && - (t_ret = __db_close(pgset, NULL, 0)) != 0 && ret ==0) - ret = t_ret; - if (h != NULL && - (t_ret = __memp_fput(mpf, - vdp->thread_info, h, dbp->priority)) != 0 && ret == 0) - ret = t_ret; - return (ret); -} - -/* - * __db_salvage_subdbpg -- - * Given a known-good leaf page in the master database, salvage all - * leaf pages corresponding to each subdb. - */ -static int -__db_salvage_subdbpg(dbp, vdp, master, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - PAGE *master; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - BKEYDATA *bkkey, *bkdata; - BOVERFLOW *bo; - DB *pgset; - DBC *pgsc; - DBT key; - DB_MPOOLFILE *mpf; - ENV *env; - PAGE *subpg; - db_indx_t i; - db_pgno_t meta_pgno; - int ret, err_ret, t_ret; - char *subdbname; - u_int32_t ovfl_bufsz; - - env = dbp->env; - mpf = dbp->mpf; - ret = err_ret = 0; - subdbname = NULL; - pgsc = NULL; - pgset = NULL; - ovfl_bufsz = 0; - - /* - * For each entry, get and salvage the set of pages - * corresponding to that entry. - */ - for (i = 0; i < NUM_ENT(master); i += P_INDX) { - bkkey = GET_BKEYDATA(dbp, master, i); - bkdata = GET_BKEYDATA(dbp, master, i + O_INDX); - - /* Get the subdatabase name. */ - if (B_TYPE(bkkey->type) == B_OVERFLOW) { - /* - * We can, in principle anyway, have a subdb - * name so long it overflows. Ick. - */ - bo = (BOVERFLOW *)bkkey; - if ((ret = __db_safe_goff(dbp, vdp, bo->pgno, - &key, &subdbname, &ovfl_bufsz, flags)) != 0) { - err_ret = DB_VERIFY_BAD; - continue; - } - - /* Nul-terminate it. */ - if (ovfl_bufsz < key.size + 1) { - if ((ret = __os_realloc(env, - key.size + 1, &subdbname)) != 0) - goto err; - ovfl_bufsz = key.size + 1; - } - subdbname[key.size] = '\0'; - } else if (B_TYPE(bkkey->type) == B_KEYDATA) { - if (ovfl_bufsz < (u_int32_t)bkkey->len + 1) { - if ((ret = __os_realloc(env, - bkkey->len + 1, &subdbname)) != 0) - goto err; - ovfl_bufsz = bkkey->len + 1; - } - DB_ASSERT(env, subdbname != NULL); - memcpy(subdbname, bkkey->data, bkkey->len); - subdbname[bkkey->len] = '\0'; - } - - /* Get the corresponding pgno. */ - if (bkdata->len != sizeof(db_pgno_t)) { - err_ret = DB_VERIFY_BAD; - continue; - } - memcpy(&meta_pgno, - (db_pgno_t *)bkdata->data, sizeof(db_pgno_t)); - - /* - * Subdatabase meta pgnos are stored in network byte - * order for cross-endian compatibility. Swap if appropriate. - */ - DB_NTOHL_SWAP(env, &meta_pgno); - - /* If we can't get the subdb meta page, just skip the subdb. */ - if (!IS_VALID_PGNO(meta_pgno) || (ret = __memp_fget(mpf, - &meta_pgno, vdp->thread_info, NULL, 0, &subpg)) != 0) { - err_ret = ret; - continue; - } - - /* - * Verify the subdatabase meta page. This has two functions. - * First, if it's bad, we have no choice but to skip the subdb - * and let the pages just get printed on a later pass. Second, - * the access-method-specific meta verification routines record - * the various state info (such as the presence of dups) - * that we need for __db_prheader(). - */ - if ((ret = - __db_vrfy_common(dbp, vdp, subpg, meta_pgno, flags)) != 0) { - err_ret = ret; - (void)__memp_fput(mpf, - vdp->thread_info, subpg, dbp->priority); - continue; - } - switch (TYPE(subpg)) { - case P_BTREEMETA: - if ((ret = __bam_vrfy_meta(dbp, - vdp, (BTMETA *)subpg, meta_pgno, flags)) != 0) { - err_ret = ret; - (void)__memp_fput(mpf, - vdp->thread_info, subpg, dbp->priority); - continue; - } - break; - case P_HASHMETA: - if ((ret = __ham_vrfy_meta(dbp, - vdp, (HMETA *)subpg, meta_pgno, flags)) != 0) { - err_ret = ret; - (void)__memp_fput(mpf, - vdp->thread_info, subpg, dbp->priority); - continue; - } - break; - default: - /* This isn't an appropriate page; skip this subdb. */ - err_ret = DB_VERIFY_BAD; - continue; - } - - if ((ret = __memp_fput(mpf, - vdp->thread_info, subpg, dbp->priority)) != 0) { - err_ret = ret; - continue; - } - - /* Print a subdatabase header. */ - if ((ret = __db_prheader(dbp, - subdbname, 0, 0, handle, callback, vdp, meta_pgno)) != 0) - goto err; - - /* Salvage meta_pgno's tree. */ - if ((ret = __db_salvage(dbp, - vdp, meta_pgno, handle, callback, flags)) != 0) - err_ret = ret; - - /* Print a subdatabase footer. */ - if ((ret = __db_prfooter(handle, callback)) != 0) - goto err; - } - -err: if (subdbname) - __os_free(env, subdbname); - - if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0) - ret = t_ret; - - if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0) - ret = t_ret; - - if ((t_ret = __db_salvage_markdone(vdp, PGNO(master))) != 0) - return (t_ret); - - return ((err_ret != 0) ? err_ret : ret); -} - -/* - * __db_salvage -- - * Given a meta page number, salvage all data from leaf pages found by - * walking the meta page's tree. - */ -static int -__db_salvage(dbp, vdp, meta_pgno, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t meta_pgno; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; - -{ - DB *pgset; - DBC *dbc, *pgsc; - DB_MPOOLFILE *mpf; - ENV *env; - PAGE *subpg; - db_pgno_t p; - int err_ret, ret, t_ret; - - env = dbp->env; - mpf = dbp->mpf; - err_ret = ret = t_ret = 0; - pgsc = NULL; - pgset = NULL; - dbc = NULL; - - if ((ret = __db_vrfy_pgset(env, - vdp->thread_info, dbp->pgsize, &pgset)) != 0) - goto err; - - /* Get all page numbers referenced from this meta page. */ - if ((ret = __db_meta2pgset(dbp, vdp, meta_pgno, - flags, pgset)) != 0) { - err_ret = ret; - goto err; - } - - if ((ret = __db_cursor(pgset, - vdp->thread_info, NULL, &pgsc, 0)) != 0) - goto err; - - if (dbp->type == DB_QUEUE && - (ret = __db_cursor(dbp, vdp->thread_info, NULL, &dbc, 0)) != 0) - goto err; - - /* Salvage every page in pgset. */ - while ((ret = __db_vrfy_pgset_next(pgsc, &p)) == 0) { - if (dbp->type == DB_QUEUE) { -#ifdef HAVE_QUEUE - ret = __qam_fget(dbc, &p, 0, &subpg); -#else - ret = __db_no_queue_am(env); -#endif - /* Don't report an error for pages not found in a queue. - * The pgset is a best guess, it doesn't know about - * deleted extents which leads to this error. - */ - if (ret == ENOENT || ret == DB_PAGE_NOTFOUND) - continue; - } else - ret = __memp_fget(mpf, - &p, vdp->thread_info, NULL, 0, &subpg); - if (ret != 0) { - err_ret = ret; - continue; - } - - if ((ret = __db_salvage_pg(dbp, vdp, p, subpg, - handle, callback, flags)) != 0) - err_ret = ret; - - if (dbp->type == DB_QUEUE) -#ifdef HAVE_QUEUE - ret = __qam_fput(dbc, p, subpg, dbp->priority); -#else - ret = __db_no_queue_am(env); -#endif - else - ret = __memp_fput(mpf, - vdp->thread_info, subpg, dbp->priority); - if (ret != 0) - err_ret = ret; - } - - if (ret == DB_NOTFOUND) - ret = 0; - -err: - if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0) - ret = t_ret; - if (pgsc != NULL && (t_ret = __dbc_close(pgsc)) != 0) - ret = t_ret; - if (pgset != NULL && (t_ret = __db_close(pgset, NULL, 0)) != 0) - ret = t_ret; - - return ((err_ret != 0) ? err_ret : ret); -} - -/* - * __db_meta2pgset -- - * Given a known-safe meta page number, return the set of pages - * corresponding to the database it represents. Return DB_VERIFY_BAD if - * it's not a suitable meta page or is invalid. - */ -static int -__db_meta2pgset(dbp, vdp, pgno, flags, pgset) - DB *dbp; - VRFY_DBINFO *vdp; - db_pgno_t pgno; - u_int32_t flags; - DB *pgset; -{ - DB_MPOOLFILE *mpf; - PAGE *h; - int ret, t_ret; - - mpf = dbp->mpf; - - if ((ret = __memp_fget(mpf, &pgno, vdp->thread_info, NULL, 0, &h)) != 0) - return (ret); - - switch (TYPE(h)) { - case P_BTREEMETA: - ret = __bam_meta2pgset(dbp, vdp, (BTMETA *)h, flags, pgset); - break; - case P_HASHMETA: - ret = __ham_meta2pgset(dbp, vdp, (HMETA *)h, flags, pgset); - break; - case P_QAMMETA: -#ifdef HAVE_QUEUE - ret = __qam_meta2pgset(dbp, vdp, pgset); - break; -#endif - default: - ret = DB_VERIFY_BAD; - break; - } - - if ((t_ret = __memp_fput(mpf, vdp->thread_info, h, dbp->priority)) != 0) - return (t_ret); - return (ret); -} - -/* - * __db_guesspgsize -- - * Try to guess what the pagesize is if the one on the meta page - * and the one in the db are invalid. - */ -static u_int -__db_guesspgsize(env, fhp) - ENV *env; - DB_FH *fhp; -{ - db_pgno_t i; - size_t nr; - u_int32_t guess; - u_int8_t type; - - for (guess = DB_MAX_PGSIZE; guess >= DB_MIN_PGSIZE; guess >>= 1) { - /* - * We try to read three pages ahead after the first one - * and make sure we have plausible types for all of them. - * If the seeks fail, continue with a smaller size; - * we're probably just looking past the end of the database. - * If they succeed and the types are reasonable, also continue - * with a size smaller; we may be looking at pages N, - * 2N, and 3N for some N > 1. - * - * As soon as we hit an invalid type, we stop and return - * our previous guess; that last one was probably the page size. - */ - for (i = 1; i <= 3; i++) { - if (__os_seek( - env, fhp, i, guess, SSZ(DBMETA, type)) != 0) - break; - if (__os_read(env, - fhp, &type, 1, &nr) != 0 || nr == 0) - break; - if (type == P_INVALID || type >= P_PAGETYPE_MAX) - return (guess << 1); - } - } - - /* - * If we're just totally confused--the corruption takes up most of the - * beginning pages of the database--go with the default size. - */ - return (DB_DEF_IOSIZE); -} diff --git a/db/db_vrfy_stub.c b/db/db_vrfy_stub.c deleted file mode 100644 index 9ed5acd..0000000 --- a/db/db_vrfy_stub.c +++ /dev/null @@ -1,117 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 1996-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#ifndef HAVE_VERIFY -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_am.h" -#include "dbinc/db_verify.h" - -/* - * If the library wasn't compiled with the verification support, various - * routines aren't available. Stub them here, returning an appropriate - * error. - */ - -static int __db_novrfy __P((ENV *)); - -/* - * __db_novrfy -- - * Error when a Berkeley DB build doesn't include the access method. - */ -static int -__db_novrfy(env) - ENV *env; -{ - __db_errx(env, - "library build did not include support for database verification"); - return (DB_OPNOTSUP); -} - -int -__db_verify_pp(dbp, file, database, outfile, flags) - DB *dbp; - const char *file, *database; - FILE *outfile; - u_int32_t flags; -{ - int ret; - - COMPQUIET(file, NULL); - COMPQUIET(database, NULL); - COMPQUIET(outfile, NULL); - COMPQUIET(flags, 0); - - ret = __db_novrfy(dbp->env); - - /* The verify method is a destructor. */ - (void)__db_close(dbp, NULL, 0); - - return (ret); -} - -int -__db_verify_internal(dbp, name, subdb, handle, callback, flags) - DB *dbp; - const char *name, *subdb; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - COMPQUIET(dbp, NULL); - COMPQUIET(name, NULL); - COMPQUIET(subdb, NULL); - COMPQUIET(handle, NULL); - COMPQUIET(callback, NULL); - COMPQUIET(flags, 0); - return (0); -} - -int -__db_vrfy_getpageinfo(vdp, pgno, pipp) - VRFY_DBINFO *vdp; - db_pgno_t pgno; - VRFY_PAGEINFO **pipp; -{ - COMPQUIET(pgno, 0); - COMPQUIET(pipp, NULL); - return (__db_novrfy(vdp->pgdbp->env)); -} - -int -__db_vrfy_putpageinfo(env, vdp, pip) - ENV *env; - VRFY_DBINFO *vdp; - VRFY_PAGEINFO *pip; -{ - COMPQUIET(vdp, NULL); - COMPQUIET(pip, NULL); - return (__db_novrfy(env)); -} - -int -__db_vrfy_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, vdp) - DBT *dbtp; - int checkprint; - const char *prefix; - void *handle; - int (*callback) __P((void *, const void *)); - int is_recno; - VRFY_DBINFO *vdp; -{ - COMPQUIET(dbtp, NULL); - COMPQUIET(checkprint, 0); - COMPQUIET(prefix, NULL); - COMPQUIET(handle, NULL); - COMPQUIET(callback, NULL); - COMPQUIET(is_recno, 0); - return (__db_novrfy(vdp->pgdbp->env)); -} -#endif /* !HAVE_VERIFY */ diff --git a/db/db_vrfyutil.c b/db/db_vrfyutil.c deleted file mode 100644 index 04d73d9..0000000 --- a/db/db_vrfyutil.c +++ /dev/null @@ -1,916 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2000-2009 Oracle. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_verify.h" -#include "dbinc/db_am.h" - -static int __db_vrfy_childinc __P((DBC *, VRFY_CHILDINFO *)); -static int __db_vrfy_pageinfo_create __P((ENV *, VRFY_PAGEINFO **)); - -/* - * __db_vrfy_dbinfo_create -- - * Allocate and initialize a VRFY_DBINFO structure. - * - * PUBLIC: int __db_vrfy_dbinfo_create - * PUBLIC: __P((ENV *, DB_THREAD_INFO *, u_int32_t, VRFY_DBINFO **)); - */ -int -__db_vrfy_dbinfo_create(env, ip, pgsize, vdpp) - ENV *env; - DB_THREAD_INFO *ip; - u_int32_t pgsize; - VRFY_DBINFO **vdpp; -{ - DB *cdbp, *pgdbp, *pgset; - VRFY_DBINFO *vdp; - int ret; - - vdp = NULL; - cdbp = pgdbp = pgset = NULL; - - if ((ret = __os_calloc(NULL, 1, sizeof(VRFY_DBINFO), &vdp)) != 0) - goto err; - - if ((ret = __db_create_internal(&cdbp, env, 0)) != 0) - goto err; - - if ((ret = __db_set_flags(cdbp, DB_DUP)) != 0) - goto err; - - if ((ret = __db_set_pagesize(cdbp, pgsize)) != 0) - goto err; - - /* If transactional, make sure we don't log. */ - if (TXN_ON(env) && - (ret = __db_set_flags(cdbp, DB_TXN_NOT_DURABLE)) != 0) - goto err; - if ((ret = __db_open(cdbp, ip, - NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0600, PGNO_BASE_MD)) != 0) - goto err; - - if ((ret = __db_create_internal(&pgdbp, env, 0)) != 0) - goto err; - - if ((ret = __db_set_pagesize(pgdbp, pgsize)) != 0) - goto err; - - /* If transactional, make sure we don't log. */ - if (TXN_ON(env) && - (ret = __db_set_flags(pgdbp, DB_TXN_NOT_DURABLE)) != 0) - goto err; - - if ((ret = __db_open(pgdbp, ip, - NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0600, PGNO_BASE_MD)) != 0) - goto err; - - if ((ret = __db_vrfy_pgset(env, ip, pgsize, &pgset)) != 0) - goto err; - - LIST_INIT(&vdp->subdbs); - LIST_INIT(&vdp->activepips); - - vdp->cdbp = cdbp; - vdp->pgdbp = pgdbp; - vdp->pgset = pgset; - vdp->thread_info = ip; - *vdpp = vdp; - return (0); - -err: if (cdbp != NULL) - (void)__db_close(cdbp, NULL, 0); - if (pgdbp != NULL) - (void)__db_close(pgdbp, NULL, 0); - if (vdp != NULL) - __os_free(env, vdp); - return (ret); -} - -/* - * __db_vrfy_dbinfo_destroy -- - * Destructor for VRFY_DBINFO. Destroys VRFY_PAGEINFOs and deallocates - * structure. - * - * PUBLIC: int __db_vrfy_dbinfo_destroy __P((ENV *, VRFY_DBINFO *)); - */ -int -__db_vrfy_dbinfo_destroy(env, vdp) - ENV *env; - VRFY_DBINFO *vdp; -{ - VRFY_CHILDINFO *c; - int t_ret, ret; - - ret = 0; - - /* - * Discard active page structures. Ideally there wouldn't be any, - * but in some error cases we may not have cleared them all out. - */ - while (LIST_FIRST(&vdp->activepips) != NULL) - if ((t_ret = __db_vrfy_putpageinfo( - env, vdp, LIST_FIRST(&vdp->activepips))) != 0) { - if (ret == 0) - ret = t_ret; - break; - } - - /* Discard subdatabase list structures. */ - while ((c = LIST_FIRST(&vdp->subdbs)) != NULL) { - LIST_REMOVE(c, links); - __os_free(NULL, c); - } - - if ((t_ret = __db_close(vdp->pgdbp, NULL, 0)) != 0) - ret = t_ret; - - if ((t_ret = __db_close(vdp->cdbp, NULL, 0)) != 0 && ret == 0) - ret = t_ret; - - if ((t_ret = __db_close(vdp->pgset, NULL, 0)) != 0 && ret == 0) - ret = t_ret; - - if (vdp->extents != NULL) - __os_free(env, vdp->extents); - __os_free(env, vdp); - return (ret); -} - -/* - * __db_vrfy_getpageinfo -- - * Get a PAGEINFO structure for a given page, creating it if necessary. - * - * PUBLIC: int __db_vrfy_getpageinfo - * PUBLIC: __P((VRFY_DBINFO *, db_pgno_t, VRFY_PAGEINFO **)); - */ -int -__db_vrfy_getpageinfo(vdp, pgno, pipp) - VRFY_DBINFO *vdp; - db_pgno_t pgno; - VRFY_PAGEINFO **pipp; -{ - DB *pgdbp; - DBT key, data; - ENV *env; - VRFY_PAGEINFO *pip; - int ret; - - /* - * We want a page info struct. There are three places to get it from, - * in decreasing order of preference: - * - * 1. vdp->activepips. If it's already "checked out", we're - * already using it, we return the same exact structure with a - * bumped refcount. This is necessary because this code is - * replacing array accesses, and it's common for f() to make some - * changes to a pip, and then call g() and h() which each make - * changes to the same pip. vdps are never shared between threads - * (they're never returned to the application), so this is safe. - * 2. The pgdbp. It's not in memory, but it's in the database, so - * get it, give it a refcount of 1, and stick it on activepips. - * 3. malloc. It doesn't exist yet; create it, then stick it on - * activepips. We'll put it in the database when we putpageinfo - * later. - */ - - /* Case 1. */ - LIST_FOREACH(pip, &vdp->activepips, links) - if (pip->pgno == pgno) - goto found; - - /* Case 2. */ - pgdbp = vdp->pgdbp; - env = pgdbp->env; - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - F_SET(&data, DB_DBT_MALLOC); - key.data = &pgno; - key.size = sizeof(db_pgno_t); - - if ((ret = __db_get(pgdbp, - vdp->thread_info, NULL, &key, &data, 0)) == 0) { - /* Found it. */ - DB_ASSERT(env, data.size == sizeof(VRFY_PAGEINFO)); - pip = data.data; - LIST_INSERT_HEAD(&vdp->activepips, pip, links); - goto found; - } else if (ret != DB_NOTFOUND) /* Something nasty happened. */ - return (ret); - - /* Case 3 */ - if ((ret = __db_vrfy_pageinfo_create(env, &pip)) != 0) - return (ret); - - LIST_INSERT_HEAD(&vdp->activepips, pip, links); -found: pip->pi_refcount++; - - *pipp = pip; - return (0); -} - -/* - * __db_vrfy_putpageinfo -- - * Put back a VRFY_PAGEINFO that we're done with. - * - * PUBLIC: int __db_vrfy_putpageinfo __P((ENV *, - * PUBLIC: VRFY_DBINFO *, VRFY_PAGEINFO *)); - */ -int -__db_vrfy_putpageinfo(env, vdp, pip) - ENV *env; - VRFY_DBINFO *vdp; - VRFY_PAGEINFO *pip; -{ - DB *pgdbp; - DBT key, data; - VRFY_PAGEINFO *p; - int ret; - - if (--pip->pi_refcount > 0) - return (0); - - pgdbp = vdp->pgdbp; - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - key.data = &pip->pgno; - key.size = sizeof(db_pgno_t); - data.data = pip; - data.size = sizeof(VRFY_PAGEINFO); - - if ((ret = __db_put(pgdbp, - vdp->thread_info, NULL, &key, &data, 0)) != 0) - return (ret); - - LIST_FOREACH(p, &vdp->activepips, links) - if (p == pip) - break; - if (p != NULL) - LIST_REMOVE(p, links); - - __os_ufree(env, p); - return (0); -} - -/* - * __db_vrfy_pgset -- - * Create a temporary database for the storing of sets of page numbers. - * (A mapping from page number to int, used by the *_meta2pgset functions, - * as well as for keeping track of which pages the verifier has seen.) - * - * PUBLIC: int __db_vrfy_pgset __P((ENV *, - * PUBLIC: DB_THREAD_INFO *, u_int32_t, DB **)); - */ -int -__db_vrfy_pgset(env, ip, pgsize, dbpp) - ENV *env; - DB_THREAD_INFO *ip; - u_int32_t pgsize; - DB **dbpp; -{ - DB *dbp; - int ret; - - if ((ret = __db_create_internal(&dbp, env, 0)) != 0) - return (ret); - if ((ret = __db_set_pagesize(dbp, pgsize)) != 0) - goto err; - - /* If transactional, make sure we don't log. */ - if (TXN_ON(env) && - (ret = __db_set_flags(dbp, DB_TXN_NOT_DURABLE)) != 0) - goto err; - if ((ret = __db_open(dbp, ip, - NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0600, PGNO_BASE_MD)) == 0) - *dbpp = dbp; - else -err: (void)__db_close(dbp, NULL, 0); - - return (ret); -} - -/* - * __db_vrfy_pgset_get -- - * Get the value associated in a page set with a given pgno. Return - * a 0 value (and succeed) if we've never heard of this page. - * - * PUBLIC: int __db_vrfy_pgset_get __P((DB *, DB_THREAD_INFO *, db_pgno_t, - * PUBLIC: int *)); - */ -int -__db_vrfy_pgset_get(dbp, ip, pgno, valp) - DB *dbp; - DB_THREAD_INFO *ip; - db_pgno_t pgno; - int *valp; -{ - DBT key, data; - int ret, val; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - key.data = &pgno; - key.size = sizeof(db_pgno_t); - data.data = &val; - data.ulen = sizeof(int); - F_SET(&data, DB_DBT_USERMEM); - - if ((ret = __db_get(dbp, ip, NULL, &key, &data, 0)) == 0) { - DB_ASSERT(dbp->env, data.size == sizeof(int)); - } else if (ret == DB_NOTFOUND) - val = 0; - else - return (ret); - - *valp = val; - return (0); -} - -/* - * __db_vrfy_pgset_inc -- - * Increment the value associated with a pgno by 1. - * - * PUBLIC: int __db_vrfy_pgset_inc __P((DB *, DB_THREAD_INFO *, db_pgno_t)); - */ -int -__db_vrfy_pgset_inc(dbp, ip, pgno) - DB *dbp; - DB_THREAD_INFO *ip; - db_pgno_t pgno; -{ - DBT key, data; - int ret; - int val; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - val = 0; - - key.data = &pgno; - key.size = sizeof(db_pgno_t); - data.data = &val; - data.ulen = sizeof(int); - F_SET(&data, DB_DBT_USERMEM); - - if ((ret = __db_get(dbp, ip, NULL, &key, &data, 0)) == 0) { - DB_ASSERT(dbp->env, data.size == sizeof(int)); - } else if (ret != DB_NOTFOUND) - return (ret); - - data.size = sizeof(int); - ++val; - - return (__db_put(dbp, ip, NULL, &key, &data, 0)); -} - -/* - * __db_vrfy_pgset_next -- - * Given a cursor open in a pgset database, get the next page in the - * set. - * - * PUBLIC: int __db_vrfy_pgset_next __P((DBC *, db_pgno_t *)); - */ -int -__db_vrfy_pgset_next(dbc, pgnop) - DBC *dbc; - db_pgno_t *pgnop; -{ - DBT key, data; - db_pgno_t pgno; - int ret; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - /* We don't care about the data, just the keys. */ - F_SET(&data, DB_DBT_USERMEM | DB_DBT_PARTIAL); - F_SET(&key, DB_DBT_USERMEM); - key.data = &pgno; - key.ulen = sizeof(db_pgno_t); - - if ((ret = __dbc_get(dbc, &key, &data, DB_NEXT)) != 0) - return (ret); - - DB_ASSERT(dbc->env, key.size == sizeof(db_pgno_t)); - *pgnop = pgno; - - return (0); -} - -/* - * __db_vrfy_childcursor -- - * Create a cursor to walk the child list with. Returns with a nonzero - * final argument if the specified page has no children. - * - * PUBLIC: int __db_vrfy_childcursor __P((VRFY_DBINFO *, DBC **)); - */ -int -__db_vrfy_childcursor(vdp, dbcp) - VRFY_DBINFO *vdp; - DBC **dbcp; -{ - DB *cdbp; - DBC *dbc; - int ret; - - cdbp = vdp->cdbp; - - if ((ret = __db_cursor(cdbp, vdp->thread_info, NULL, &dbc, 0)) == 0) - *dbcp = dbc; - - return (ret); -} - -/* - * __db_vrfy_childput -- - * Add a child structure to the set for a given page. - * - * PUBLIC: int __db_vrfy_childput - * PUBLIC: __P((VRFY_DBINFO *, db_pgno_t, VRFY_CHILDINFO *)); - */ -int -__db_vrfy_childput(vdp, pgno, cip) - VRFY_DBINFO *vdp; - db_pgno_t pgno; - VRFY_CHILDINFO *cip; -{ - DB *cdbp; - DBC *cc; - DBT key, data; - VRFY_CHILDINFO *oldcip; - int ret; - - cdbp = vdp->cdbp; - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - key.data = &pgno; - key.size = sizeof(db_pgno_t); - - /* - * We want to avoid adding multiple entries for a single child page; - * we only need to verify each child once, even if a child (such - * as an overflow key) is multiply referenced. - * - * However, we also need to make sure that when walking the list - * of children, we encounter them in the order they're referenced - * on a page. (This permits us, for example, to verify the - * prev_pgno/next_pgno chain of Btree leaf pages.) - * - * Check the child database to make sure that this page isn't - * already a child of the specified page number. If it's not, - * put it at the end of the duplicate set. - */ - if ((ret = __db_vrfy_childcursor(vdp, &cc)) != 0) - return (ret); - for (ret = __db_vrfy_ccset(cc, pgno, &oldcip); ret == 0; - ret = __db_vrfy_ccnext(cc, &oldcip)) - if (oldcip->pgno == cip->pgno) { - /* - * Found a matching child. Increment its reference - * count--we've run into it again--but don't put it - * again. - */ - if ((ret = __db_vrfy_childinc(cc, oldcip)) != 0 || - (ret = __db_vrfy_ccclose(cc)) != 0) - return (ret); - return (0); - } - if (ret != DB_NOTFOUND) { - (void)__db_vrfy_ccclose(cc); - return (ret); - } - if ((ret = __db_vrfy_ccclose(cc)) != 0) - return (ret); - - cip->refcnt = 1; - data.data = cip; - data.size = sizeof(VRFY_CHILDINFO); - - return (__db_put(cdbp, vdp->thread_info, NULL, &key, &data, 0)); -} - -/* - * __db_vrfy_childinc -- - * Increment the refcount of the VRFY_CHILDINFO struct that the child - * cursor is pointing to. (The caller has just retrieved this struct, and - * passes it in as cip to save us a get.) - */ -static int -__db_vrfy_childinc(dbc, cip) - DBC *dbc; - VRFY_CHILDINFO *cip; -{ - DBT key, data; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - cip->refcnt++; - data.data = cip; - data.size = sizeof(VRFY_CHILDINFO); - - return (__dbc_put(dbc, &key, &data, DB_CURRENT)); -} - -/* - * __db_vrfy_ccset -- - * Sets a cursor created with __db_vrfy_childcursor to the first - * child of the given pgno, and returns it in the third arg. - * - * PUBLIC: int __db_vrfy_ccset __P((DBC *, db_pgno_t, VRFY_CHILDINFO **)); - */ -int -__db_vrfy_ccset(dbc, pgno, cipp) - DBC *dbc; - db_pgno_t pgno; - VRFY_CHILDINFO **cipp; -{ - DBT key, data; - int ret; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - key.data = &pgno; - key.size = sizeof(db_pgno_t); - - if ((ret = __dbc_get(dbc, &key, &data, DB_SET)) != 0) - return (ret); - - DB_ASSERT(dbc->env, data.size == sizeof(VRFY_CHILDINFO)); - *cipp = (VRFY_CHILDINFO *)data.data; - - return (0); -} - -/* - * __db_vrfy_ccnext -- - * Gets the next child of the given cursor created with - * __db_vrfy_childcursor, and returns it in the memory provided in the - * second arg. - * - * PUBLIC: int __db_vrfy_ccnext __P((DBC *, VRFY_CHILDINFO **)); - */ -int -__db_vrfy_ccnext(dbc, cipp) - DBC *dbc; - VRFY_CHILDINFO **cipp; -{ - DBT key, data; - int ret; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - if ((ret = __dbc_get(dbc, &key, &data, DB_NEXT_DUP)) != 0) - return (ret); - - DB_ASSERT(dbc->env, data.size == sizeof(VRFY_CHILDINFO)); - *cipp = (VRFY_CHILDINFO *)data.data; - - return (0); -} - -/* - * __db_vrfy_ccclose -- - * Closes the cursor created with __db_vrfy_childcursor. - * - * This doesn't actually do anything interesting now, but it's - * not inconceivable that we might change the internal database usage - * and keep the interfaces the same, and a function call here or there - * seldom hurts anyone. - * - * PUBLIC: int __db_vrfy_ccclose __P((DBC *)); - */ -int -__db_vrfy_ccclose(dbc) - DBC *dbc; -{ - - return (__dbc_close(dbc)); -} - -/* - * __db_vrfy_pageinfo_create -- - * Constructor for VRFY_PAGEINFO; allocates and initializes. - */ -static int -__db_vrfy_pageinfo_create(env, pipp) - ENV *env; - VRFY_PAGEINFO **pipp; -{ - VRFY_PAGEINFO *pip; - int ret; - - /* - * pageinfo structs are sometimes allocated here and sometimes - * allocated by fetching them from a database with DB_DBT_MALLOC. - * There's no easy way for the destructor to tell which was - * used, and so we always allocate with __os_umalloc so we can free - * with __os_ufree. - */ - if ((ret = __os_umalloc(env, sizeof(VRFY_PAGEINFO), &pip)) != 0) - return (ret); - memset(pip, 0, sizeof(VRFY_PAGEINFO)); - - *pipp = pip; - return (0); -} - -/* - * __db_salvage_init -- - * Set up salvager database. - * - * PUBLIC: int __db_salvage_init __P((VRFY_DBINFO *)); - */ -int -__db_salvage_init(vdp) - VRFY_DBINFO *vdp; -{ - DB *dbp; - int ret; - - if ((ret = __db_create_internal(&dbp, NULL, 0)) != 0) - return (ret); - - if ((ret = __db_set_pagesize(dbp, 1024)) != 0) - goto err; - - if ((ret = __db_open(dbp, vdp->thread_info, - NULL, NULL, NULL, DB_BTREE, DB_CREATE, 0, PGNO_BASE_MD)) != 0) - goto err; - - vdp->salvage_pages = dbp; - return (0); - -err: (void)__db_close(dbp, NULL, 0); - return (ret); -} - -/* - * __db_salvage_destroy -- - * Close salvager database. - * PUBLIC: int __db_salvage_destroy __P((VRFY_DBINFO *)); - */ -int -__db_salvage_destroy(vdp) - VRFY_DBINFO *vdp; -{ - return (vdp->salvage_pages == NULL ? 0 : - __db_close(vdp->salvage_pages, NULL, 0)); -} - -/* - * __db_salvage_getnext -- - * Get the next (first) unprinted page in the database of pages we need to - * print still. Delete entries for any already-printed pages we encounter - * in this search, as well as the page we're returning. - * - * PUBLIC: int __db_salvage_getnext - * PUBLIC: __P((VRFY_DBINFO *, DBC **, db_pgno_t *, u_int32_t *, int)); - */ -int -__db_salvage_getnext(vdp, dbcp, pgnop, pgtypep, skip_overflow) - VRFY_DBINFO *vdp; - DBC **dbcp; - db_pgno_t *pgnop; - u_int32_t *pgtypep; - int skip_overflow; -{ - DB *dbp; - DBT key, data; - int ret; - u_int32_t pgtype; - - dbp = vdp->salvage_pages; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - if (*dbcp == NULL && - (ret = __db_cursor(dbp, vdp->thread_info, NULL, dbcp, 0)) != 0) - return (ret); - - while ((ret = __dbc_get(*dbcp, &key, &data, DB_NEXT)) == 0) { - DB_ASSERT(dbp->env, data.size == sizeof(u_int32_t)); - memcpy(&pgtype, data.data, sizeof(pgtype)); - - if (skip_overflow && pgtype == SALVAGE_OVERFLOW) - continue; - - if ((ret = __dbc_del(*dbcp, 0)) != 0) - return (ret); - if (pgtype != SALVAGE_IGNORE) { - DB_ASSERT(dbp->env, key.size == sizeof(db_pgno_t)); - DB_ASSERT(dbp->env, data.size == sizeof(u_int32_t)); - - *pgnop = *(db_pgno_t *)key.data; - *pgtypep = *(u_int32_t *)data.data; - break; - } - } - - return (ret); -} - -/* - * __db_salvage_isdone -- - * Return whether or not the given pgno is already marked - * SALVAGE_IGNORE (meaning that we don't need to print it again). - * - * Returns DB_KEYEXIST if it is marked, 0 if not, or another error on - * error. - * - * PUBLIC: int __db_salvage_isdone __P((VRFY_DBINFO *, db_pgno_t)); - */ -int -__db_salvage_isdone(vdp, pgno) - VRFY_DBINFO *vdp; - db_pgno_t pgno; -{ - DB *dbp; - DBT key, data; - int ret; - u_int32_t currtype; - - dbp = vdp->salvage_pages; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - currtype = SALVAGE_INVALID; - data.data = &currtype; - data.ulen = sizeof(u_int32_t); - data.flags = DB_DBT_USERMEM; - - key.data = &pgno; - key.size = sizeof(db_pgno_t); - - /* - * Put an entry for this page, with pgno as key and type as data, - * unless it's already there and is marked done. - * If it's there and is marked anything else, that's fine--we - * want to mark it done. - */ - if ((ret = __db_get(dbp, - vdp->thread_info, NULL, &key, &data, 0)) == 0) { - /* - * The key's already here. Check and see if it's already - * marked done. If it is, return DB_KEYEXIST. If it's not, - * return 0. - */ - if (currtype == SALVAGE_IGNORE) - return (DB_KEYEXIST); - else - return (0); - } else if (ret != DB_NOTFOUND) - return (ret); - - /* The pgno is not yet marked anything; return 0. */ - return (0); -} - -/* - * __db_salvage_markdone -- - * Mark as done a given page. - * - * PUBLIC: int __db_salvage_markdone __P((VRFY_DBINFO *, db_pgno_t)); - */ -int -__db_salvage_markdone(vdp, pgno) - VRFY_DBINFO *vdp; - db_pgno_t pgno; -{ - DB *dbp; - DBT key, data; - int pgtype, ret; - u_int32_t currtype; - - pgtype = SALVAGE_IGNORE; - dbp = vdp->salvage_pages; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - currtype = SALVAGE_INVALID; - data.data = &currtype; - data.ulen = sizeof(u_int32_t); - data.flags = DB_DBT_USERMEM; - - key.data = &pgno; - key.size = sizeof(db_pgno_t); - - /* - * Put an entry for this page, with pgno as key and type as data, - * unless it's already there and is marked done. - * If it's there and is marked anything else, that's fine--we - * want to mark it done, but db_salvage_isdone only lets - * us know if it's marked IGNORE. - * - * We don't want to return DB_KEYEXIST, though; this will - * likely get passed up all the way and make no sense to the - * application. Instead, use DB_VERIFY_BAD to indicate that - * we've seen this page already--it probably indicates a - * multiply-linked page. - */ - if ((ret = __db_salvage_isdone(vdp, pgno)) != 0) - return (ret == DB_KEYEXIST ? DB_VERIFY_BAD : ret); - - data.size = sizeof(u_int32_t); - data.data = &pgtype; - - return (__db_put(dbp, vdp->thread_info, NULL, &key, &data, 0)); -} - -/* - * __db_salvage_markneeded -- - * If it has not yet been printed, make note of the fact that a page - * must be dealt with later. - * - * PUBLIC: int __db_salvage_markneeded - * PUBLIC: __P((VRFY_DBINFO *, db_pgno_t, u_int32_t)); - */ -int -__db_salvage_markneeded(vdp, pgno, pgtype) - VRFY_DBINFO *vdp; - db_pgno_t pgno; - u_int32_t pgtype; -{ - DB *dbp; - DBT key, data; - int ret; - - dbp = vdp->salvage_pages; - - memset(&key, 0, sizeof(DBT)); - memset(&data, 0, sizeof(DBT)); - - key.data = &pgno; - key.size = sizeof(db_pgno_t); - - data.data = &pgtype; - data.size = sizeof(u_int32_t); - - /* - * Put an entry for this page, with pgno as key and type as data, - * unless it's already there, in which case it's presumably - * already been marked done. - */ - ret = __db_put(dbp, - vdp->thread_info, NULL, &key, &data, DB_NOOVERWRITE); - return (ret == DB_KEYEXIST ? 0 : ret); -} - -/* - * __db_vrfy_prdbt -- - * Print out a DBT data element from a verification routine. - * - * PUBLIC: int __db_vrfy_prdbt __P((DBT *, int, const char *, void *, - * PUBLIC: int (*)(void *, const void *), int, VRFY_DBINFO *)); - */ -int -__db_vrfy_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno, vdp) - DBT *dbtp; - int checkprint; - const char *prefix; - void *handle; - int (*callback) __P((void *, const void *)); - int is_recno; - VRFY_DBINFO *vdp; -{ - if (vdp != NULL) { - /* - * If vdp is non-NULL, we might be the first key in the - * "fake" subdatabase used for key/data pairs we can't - * associate with a known subdb. - * - * Check and clear the SALVAGE_PRINTHEADER flag; if - * it was set, print a subdatabase header. - */ - if (F_ISSET(vdp, SALVAGE_PRINTHEADER)) { - (void)__db_prheader( - NULL, "__OTHER__", 0, 0, handle, callback, vdp, 0); - F_CLR(vdp, SALVAGE_PRINTHEADER); - F_SET(vdp, SALVAGE_PRINTFOOTER); - } - - /* - * Even if the printable flag wasn't set by our immediate - * caller, it may be set on a salvage-wide basis. - */ - if (F_ISSET(vdp, SALVAGE_PRINTABLE)) - checkprint = 1; - } - return ( - __db_prdbt(dbtp, checkprint, prefix, handle, callback, is_recno)); -} diff --git a/db/partition.c b/db/partition.c deleted file mode 100644 index 4e89ede..0000000 --- a/db/partition.c +++ /dev/null @@ -1,2048 +0,0 @@ -/*- - * See the file LICENSE for redistribution information. - * - * Copyright (c) 2001, 2010 Oracle and/or its affiliates. All rights reserved. - * - * $Id$ - */ - -#include "db_config.h" - -#include "db_int.h" -#include "dbinc/db_page.h" -#include "dbinc/db_verify.h" -#include "dbinc/btree.h" -#ifdef HAVE_HASH -#include "dbinc/hash.h" -#endif -#include "dbinc/lock.h" -#include "dbinc/log.h" -#include "dbinc/mp.h" -#include "dbinc/partition.h" -#include "dbinc/txn.h" -#ifdef HAVE_PARTITION - -static int __part_rr __P((DB *, DB_THREAD_INFO *, DB_TXN *, - const char *, const char *, const char *, u_int32_t)); -static int __partc_close __P((DBC *, db_pgno_t, int *)); -static int __partc_del __P((DBC*, u_int32_t)); -static int __partc_destroy __P((DBC*)); -static int __partc_get_pp __P((DBC*, DBT *, DBT *, u_int32_t)); -static int __partc_put __P((DBC*, DBT *, DBT *, u_int32_t, db_pgno_t *)); -static int __partc_writelock __P((DBC*)); -static int __partition_chk_meta __P((DB *, - DB_THREAD_INFO *, DB_TXN *, u_int32_t)); -static int __partition_setup_keys __P((DBC *, - DB_PARTITION *, DBMETA *, u_int32_t)); -static int __part_key_cmp __P((const void *, const void *)); -static inline void __part_search __P((DB *, - DB_PARTITION *, DBT *, u_int32_t *)); - -static char *Alloc_err = "Partition open failed to allocate %d bytes"; - -/* - * Allocate a partition cursor and copy flags to the partition cursor. - * Not passed: - * DBC_PARTITIONED -- the subcursors are not. - * DBC_OWN_LID -- the arg dbc owns the lock id. - * DBC_WRITECURSOR DBC_WRITER -- CDS locking happens on - * the whole DB, not the partition. - */ -#define GET_PART_CURSOR(dbc, new_dbc, part_id) do { \ - DB *__part_dbp; \ - __part_dbp = part->handles[part_id]; \ - if ((ret = __db_cursor_int(__part_dbp, \ - (dbc)->thread_info, (dbc)->txn, __part_dbp->type, \ - PGNO_INVALID, 0, (dbc)->locker, &new_dbc)) != 0) \ - goto err; \ - (new_dbc)->flags = (dbc)->flags & \ - ~(DBC_PARTITIONED|DBC_OWN_LID|DBC_WRITECURSOR|DBC_WRITER); \ -} while (0) - -/* - * Search for the correct partition. - */ -static inline void __part_search(dbp, part, key, part_idp) - DB *dbp; - DB_PARTITION *part; - DBT *key; - u_int32_t *part_idp; -{ - db_indx_t base, indx, limit; - int cmp; - int (*func) __P((DB *, const DBT *, const DBT *)); - - DB_ASSERT(dbp->env, part->nparts != 0); - COMPQUIET(cmp, 0); - COMPQUIET(indx, 0); - - func = ((BTREE *)dbp->bt_internal)->bt_compare; - DB_BINARY_SEARCH_FOR(base, limit, part->nparts, O_INDX) { - DB_BINARY_SEARCH_INCR(indx, base, limit, O_INDX); - cmp = func(dbp, key, &part->keys[indx]); - if (cmp == 0) - break; - if (cmp > 0) - DB_BINARY_SEARCH_SHIFT_BASE(indx, base, limit, O_INDX); - } - if (cmp == 0) - *part_idp = indx; - else if ((*part_idp = base) != 0) - (*part_idp)--; -} - -/* - * __partition_init -- - * Initialize the partition structure. - * Called when the meta data page is read in during database open or - * when partition keys or a callback are set. - * - * PUBLIC: int __partition_init __P((DB *, u_int32_t)); - */ -int -__partition_init(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - DB_PARTITION *part; - int ret; - - if ((part = dbp->p_internal) != NULL) { - if ((LF_ISSET(DBMETA_PART_RANGE) && - F_ISSET(part, PART_CALLBACK)) || - (LF_ISSET(DBMETA_PART_CALLBACK) && - F_ISSET(part, PART_RANGE))) { - __db_errx(dbp->env, - "Cannot specify callback and range keys."); - return (EINVAL); - } - } else if ((ret = __os_calloc(dbp->env, 1, sizeof(*part), &part)) != 0) - return (ret); - - if (LF_ISSET(DBMETA_PART_RANGE)) - F_SET(part, PART_RANGE); - if (LF_ISSET(DBMETA_PART_CALLBACK)) - F_SET(part, PART_CALLBACK); - dbp->p_internal = part; - /* Set up AM-specific methods that do not require an open. */ - dbp->db_am_rename = __part_rename; - dbp->db_am_remove = __part_remove; - return (0); -} -/* - * __partition_set -- - * Set the partitioning keys or callback function. - * This routine must be called prior to creating the database. - * PUBLIC: int __partition_set __P((DB *, u_int32_t, DBT *, - * PUBLIC: u_int32_t (*callback)(DB *, DBT *key))); - */ - -int -__partition_set(dbp, parts, keys, callback) - DB *dbp; - u_int32_t parts; - DBT *keys; - u_int32_t (*callback)(DB *, DBT *key); -{ - DB_PARTITION *part; - ENV *env; - int ret; - - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_partition"); - env = dbp->dbenv->env; - - if (parts < 2) { - __db_errx(env, "Must specify at least 2 partitions."); - return (EINVAL); - } - - if (keys == NULL && callback == NULL) { - __db_errx(env, "Must specify either keys or a callback."); - return (EINVAL); - } - if (keys != NULL && callback != NULL) { -bad: __db_errx(env, "May not specify both keys and a callback."); - return (EINVAL); - } - - if ((part = dbp->p_internal) == NULL) { - if ((ret = __partition_init(dbp, - keys != NULL ? - DBMETA_PART_RANGE : DBMETA_PART_CALLBACK)) != 0) - return (ret); - part = dbp->p_internal; - } else if ((part->keys != NULL && callback != NULL) || - (part->callback != NULL && keys != NULL)) - goto bad; - - part->nparts = parts; - part->keys = keys; - part->callback = callback; - - return (0); -} - -/* - * __partition_set_dirs -- - * Set the directories for creating the partition databases. - * They must be in the environment. - * PUBLIC: int __partition_set_dirs __P((DB *, const char **)); - */ -int -__partition_set_dirs(dbp, dirp) - DB *dbp; - const char **dirp; -{ - DB_ENV *dbenv; - DB_PARTITION *part; - ENV *env; - u_int32_t ndirs, slen; - int i, ret; - const char **dir; - char *cp, **part_dirs, **pd; - - DB_ILLEGAL_AFTER_OPEN(dbp, "DB->set_partition_dirs"); - dbenv = dbp->dbenv; - env = dbp->env; - - ndirs = 1; - slen = 0; - for (dir = dirp; *dir != NULL; dir++) { - if (F_ISSET(env, ENV_DBLOCAL)) - slen += (u_int32_t)strlen(*dir) + 1; - ndirs++; - } - - slen += sizeof(char *) * ndirs; - if ((ret = __os_malloc(env, slen, &part_dirs)) != 0) - return (EINVAL); - memset(part_dirs, 0, slen); - - cp = (char *) part_dirs + (sizeof(char *) * ndirs); - pd = part_dirs; - for (dir = dirp; *dir != NULL; dir++, pd++) { - if (F_ISSET(env, ENV_DBLOCAL)) { - (void)strcpy(cp, *dir); - *pd = cp; - cp += strlen(*dir) + 1; - continue; - } - for (i = 0; i < dbenv->data_next; i++) - if (strcmp(*dir, dbenv->db_data_dir[i]) == 0) - break; - if (i == dbenv->data_next) { - __db_errx(dbp->env, - "Directory not in environment list %s", *dir); - __os_free(env, part_dirs); - return (EINVAL); - } - *pd = dbenv->db_data_dir[i]; - } - - if ((part = dbp->p_internal) == NULL) { - if ((ret = __partition_init(dbp, 0)) != 0) - return (ret); - part = dbp->p_internal; - } - - part->dirs = (const char **)part_dirs; - - return (0); -} - -/* - * __partition_open -- - * Open/create a partitioned database. - * PUBLIC: int __partition_open __P((DB *, DB_THREAD_INFO *, - * PUBLIC: DB_TXN *, const char *, DBTYPE, u_int32_t, int, int)); - */ -int -__partition_open(dbp, ip, txn, fname, type, flags, mode, do_open) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *fname; - DBTYPE type; - u_int32_t flags; - int mode, do_open; -{ - DB *part_db; - DB_PARTITION *part; - DBC *dbc; - ENV *env; - u_int32_t part_id; - int ret; - char *name, *sp; - const char **dirp, *np; - - part = dbp->p_internal; - env = dbp->dbenv->env; - name = NULL; - - if ((ret = __partition_chk_meta(dbp, ip, txn, flags)) != 0 && do_open) - goto err; - - if ((ret = __os_calloc(env, - part->nparts, sizeof(*part->handles), &part->handles)) != 0) { - __db_errx(env, - Alloc_err, part->nparts * sizeof(*part->handles)); - goto err; - } - - DB_ASSERT(env, fname != NULL); - if ((ret = __os_malloc(env, - strlen(fname) + PART_LEN + 1, &name)) != 0) { - __db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1); - goto err; - } - - sp = name; - np = __db_rpath(fname); - if (np == NULL) - np = fname; - else { - np++; - (void)strncpy(name, fname, (size_t)(np - fname)); - sp = name + (np - fname); - } - - if (F_ISSET(dbp, DB_AM_RECOVER)) - goto done; - dirp = part->dirs; - for (part_id = 0; part_id < part->nparts; part_id++) { - if ((ret = __db_create_internal( - &part->handles[part_id], dbp->env, 0)) != 0) - goto err; - - part_db = part->handles[part_id]; - part_db->flags = F_ISSET(dbp, - ~(DB_AM_CREATED | DB_AM_CREATED_MSTR | DB_AM_OPEN_CALLED)); - part_db->adj_fileid = dbp->adj_fileid; - part_db->pgsize = dbp->pgsize; - part_db->priority = dbp->priority; - part_db->db_append_recno = dbp->db_append_recno; - part_db->db_feedback = dbp->db_feedback; - part_db->dup_compare = dbp->dup_compare; - part_db->app_private = dbp->app_private; - part_db->api_internal = dbp->api_internal; - - if (dbp->type == DB_BTREE) - __bam_copy_config(dbp, part_db, part->nparts); -#ifdef HAVE_HASH - if (dbp->type == DB_HASH) - __ham_copy_config(dbp, part_db, part->nparts); -#endif - - (void)sprintf(sp, PART_NAME, np, part_id); - if ((ret = __os_strdup(env, name, &part_db->fname)) != 0) - goto err; - if (do_open) { - /* - * Cycle through the directory names passed in, - * if any. - */ - if (dirp != NULL && - (part_db->dirname = *dirp++) == NULL) - part_db->dirname = *(dirp = part->dirs); - if ((ret = __db_open(part_db, ip, txn, - name, NULL, type, flags, mode, PGNO_BASE_MD)) != 0) - goto err; - } - } - - /* Get rid of the cursor used to open the database its the wrong type */ -done: while ((dbc = TAILQ_FIRST(&dbp->free_queue)) != NULL) - if ((ret = __dbc_destroy(dbc)) != 0) - break; - - if (0) { -err: (void)__partition_close(dbp, txn, 0); - } - if (name != NULL) - __os_free(env, name); - return (ret); -} - -/* - * __partition_chk_meta -- - * Check for a consistent meta data page and parameters when opening a - * partitioned database. - */ -static int -__partition_chk_meta(dbp, ip, txn, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - u_int32_t flags; -{ - DBMETA *meta; - DB_PARTITION *part; - DBC *dbc; - DB_LOCK metalock; - DB_MPOOLFILE *mpf; - ENV *env; - db_pgno_t base_pgno; - int ret, t_ret; - - dbc = NULL; - meta = NULL; - LOCK_INIT(metalock); - part = dbp->p_internal; - mpf = dbp->mpf; - env = dbp->env; - ret = 0; - - /* Get a cursor on the main db. */ - dbp->p_internal = NULL; - if ((ret = __db_cursor(dbp, ip, txn, &dbc, 0)) != 0) - goto err; - - /* Get the metadata page. */ - base_pgno = PGNO_BASE_MD; - if ((ret = - __db_lget(dbc, 0, base_pgno, DB_LOCK_READ, 0, &metalock)) != 0) - goto err; - if ((ret = __memp_fget(mpf, &base_pgno, ip, dbc->txn, 0, &meta)) != 0) - goto err; - - if (meta->magic != DB_HASHMAGIC && - (meta->magic != DB_BTREEMAGIC || F_ISSET(meta, BTM_RECNO))) { - __db_errx(env, - "Partitioning may only specified on BTREE and HASH databases."); - ret = EINVAL; - goto err; - } - if (!FLD_ISSET(meta->metaflags, - DBMETA_PART_RANGE | DBMETA_PART_CALLBACK)) { - __db_errx(env, - "Partitioning specified on a non-partitioned database."); - ret = EINVAL; - goto err; - } - - if ((F_ISSET(part, PART_RANGE) && - FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK)) || - (F_ISSET(part, PART_CALLBACK) && - FLD_ISSET(meta->metaflags, DBMETA_PART_RANGE))) { - __db_errx(env, "Incompatible partitioning specified."); - ret = EINVAL; - goto err; - } - - if (FLD_ISSET(meta->metaflags, DBMETA_PART_CALLBACK) && - part->callback == NULL && !IS_RECOVERING(env) && - !F_ISSET(dbp, DB_AM_RECOVER) && !LF_ISSET(DB_RDWRMASTER)) { - __db_errx(env, "Partition callback not specified."); - ret = EINVAL; - goto err; - } - - if (F_ISSET(dbp, DB_AM_RECNUM)) { - __db_errx(env, - "Record numbers are not supported in partitioned databases."); - ret = EINVAL; - goto err; - } - - if (part->nparts == 0) { - if (LF_ISSET(DB_CREATE) && meta->nparts == 0) { - __db_errx(env, "Zero paritions specified."); - ret = EINVAL; - goto err; - } else - part->nparts = meta->nparts; - } else if (meta->nparts != 0 && part->nparts != meta->nparts) { - __db_errx(env, "Number of partitions does not match."); - ret = EINVAL; - goto err; - } - - if (meta->magic == DB_HASHMAGIC) { - if (!F_ISSET(part, PART_CALLBACK)) { - __db_errx(env, - "Hash database must specify a partition callback."); - ret = EINVAL; - } - } else if (meta->magic != DB_BTREEMAGIC) { - __db_errx(env, - "Partitioning only supported on BTREE nad HASH."); - ret = EINVAL; - } else - ret = __partition_setup_keys(dbc, part, meta, flags); - -err: /* Put the metadata page back. */ - if (meta != NULL && (t_ret = __memp_fput(mpf, - ip, meta, dbc->priority)) != 0 && ret == 0) - ret = t_ret; - if ((t_ret = __LPUT(dbc, metalock)) != 0 && ret == 0) - ret = t_ret; - - if (dbc != NULL && (t_ret = __dbc_close(dbc)) != 0 && ret == 0) - ret = t_ret; - - dbp->p_internal = part; - return (ret); -} - -/* - * Support for sorting keys. Keys must be sorted using the btree - * compare function so if we call qsort in __partiton_setup_keys - * we use this structure to pass the DBP and compare function. - */ -struct key_sort { - DB *dbp; - DBT *key; - int (*compare) __P((DB *, const DBT *, const DBT *)); -}; - -static int __part_key_cmp(a, b) - const void *a, *b; -{ - const struct key_sort *ka, *kb; - - ka = a; - kb = b; - return (ka->compare(ka->dbp, ka->key, kb->key)); -} -/* - * __partition_setup_keys -- - * Get the partition keys into memory, or put them to disk if we - * are creating a partitioned database. - */ -static int -__partition_setup_keys(dbc, part, meta, flags) - DBC *dbc; - DB_PARTITION *part; - DBMETA *meta; - u_int32_t flags; -{ - BTREE *t; - DB *dbp; - DBT data, key, *keys, *kp; - ENV *env; - u_int32_t ds, i, j; - u_int8_t *dd; - struct key_sort *ks; - int have_keys, ret; - int (*compare) __P((DB *, const DBT *, const DBT *)); - void *dp; - - COMPQUIET(dd, NULL); - COMPQUIET(ds, 0); - memset(&data, 0, sizeof(data)); - memset(&key, 0, sizeof(key)); - ks = NULL; - - dbp = dbc->dbp; - env = dbp->env; - - /* Need to just read the main database. */ - dbp->p_internal = NULL; - have_keys = 0; - - /* First verify that things what we expect. */ - if ((ret = __dbc_get(dbc, &key, &data, DB_FIRST)) != 0) { - if (ret != DB_NOTFOUND) - goto err; - if (F_ISSET(part, PART_CALLBACK)) { - ret = 0; - goto done; - } - if (!LF_ISSET(DB_CREATE) && !F_ISSET(dbp, DB_AM_RECOVER) && - !LF_ISSET(DB_RDWRMASTER)) { - __db_errx(env, "No range keys found."); - ret = EINVAL; - goto err; - } - } else { - if (F_ISSET(part, PART_CALLBACK)) { - __db_errx(env, "Keys found and callback set."); - ret = EINVAL; - goto err; - } - if (key.size != 0) { - __db_errx(env, "Partition key 0 is not empty."); - ret = EINVAL; - goto err; - } - have_keys = 1; - } - - if (LF_ISSET(DB_CREATE) && have_keys == 0) { - /* Insert the keys into the master database. */ - for (i = 0; i < part->nparts - 1; i++) { - if ((ret = __db_put(dbp, dbc->thread_info, - dbc->txn, &part->keys[i], &data, 0)) != 0) - goto err; - } - - /* - * Insert the "0" pointer. All records less than the first - * given key go into this partition. We must use the default - * compare to insert this key, otherwise it might not be first. - */ - t = dbc->dbp->bt_internal; - compare = t->bt_compare; - t->bt_compare = __bam_defcmp; - memset(&key, 0, sizeof(key)); - ret = __db_put(dbp, dbc->thread_info, dbc->txn, &key, &data, 0); - t->bt_compare = compare; - if (ret != 0) - goto err; - } -done: if (F_ISSET(part, PART_RANGE)) { - /* - * Allocate one page to hold the keys plus space at the - * end of the buffer to put an array of DBTs. If there - * is not enough space __dbc_get will return how much - * is needed and we realloc. - */ - if ((ret = __os_malloc(env, - meta->pagesize + (sizeof(DBT) * part->nparts), - &part->data)) != 0) { - __db_errx(env, Alloc_err, meta->pagesize); - goto err; - } - memset(&key, 0, sizeof(key)); - memset(&data, 0, sizeof(data)); - data.data = part->data; - data.ulen = meta->pagesize; - data.flags = DB_DBT_USERMEM; -again: if ((ret = __dbc_get(dbc, &key, &data, - DB_FIRST | DB_MULTIPLE_KEY)) == DB_BUFFER_SMALL) { - if ((ret = __os_realloc(env, - data.size + (sizeof(DBT) * part->nparts), - &part->data)) != 0) - goto err; - data.data = part->data; - data.ulen = data.size; - goto again; - } - if (ret == 0) { - /* - * They passed in keys, they must match. - */ - keys = NULL; - compare = NULL; - if (have_keys == 1 && (keys = part->keys) != NULL) { - t = dbc->dbp->bt_internal; - compare = t->bt_compare; - if ((ret = __os_malloc(env, (part->nparts - 1) - * sizeof(struct key_sort), &ks)) != 0) - goto err; - for (j = 0; j < part->nparts - 1; j++) { - ks[j].dbp = dbc->dbp; - ks[j].compare = compare; - ks[j].key = &keys[j]; - } - - qsort(ks, (size_t)part->nparts - 1, - sizeof(struct key_sort), __part_key_cmp); - } - DB_MULTIPLE_INIT(dp, &data); - part->keys = (DBT *) - ((u_int8_t *)part->data + data.size); - j = 0; - for (kp = part->keys; - kp < &part->keys[part->nparts]; kp++, j++) { - DB_MULTIPLE_KEY_NEXT(dp, - &data, kp->data, kp->size, dd, ds); - if (dp == NULL) { - ret = DB_NOTFOUND; - break; - } - if (keys != NULL && j != 0 && - compare(dbc->dbp, ks[j - 1].key, kp) != 0) { - if (kp->data == NULL && - F_ISSET(dbp, DB_AM_RECOVER)) - goto err; - __db_errx(env, - "Partition key %d does not match", j); - ret = EINVAL; - goto err; - } - } - } - } - if (ret == DB_NOTFOUND && F_ISSET(dbp, DB_AM_RECOVER)) - ret = 0; - -err: dbp->p_internal = part; - if (ks != NULL) - __os_free(env, ks); - return (ret); -} - -/* - * __partition_get_callback -- - * Get the partition callback function. - * PUBLIC: int __partition_get_callback __P((DB *, - * PUBLIC: u_int32_t *, u_int32_t (**callback)(DB *, DBT *key))); - */ -int -__partition_get_callback(dbp, parts, callback) - DB *dbp; - u_int32_t *parts; - u_int32_t (**callback)(DB *, DBT *key); -{ - DB_PARTITION *part; - - part = dbp->p_internal; - /* Only return populated results if partitioned using callbacks. */ - if (part != NULL && !F_ISSET(part, PART_CALLBACK)) - part = NULL; - if (parts != NULL) - *parts = (part != NULL ? part->nparts : 0); - if (callback != NULL) - *callback = (part != NULL ? part->callback : NULL); - - return (0); -} - -/* - * __partition_get_keys -- - * Get partition keys. - * PUBLIC: int __partition_get_keys __P((DB *, u_int32_t *, DBT **)); - */ -int -__partition_get_keys(dbp, parts, keys) - DB *dbp; - u_int32_t *parts; - DBT **keys; -{ - DB_PARTITION *part; - - part = dbp->p_internal; - /* Only return populated results if partitioned using ranges. */ - if (part != NULL && !F_ISSET(part, PART_RANGE)) - part = NULL; - if (parts != NULL) - *parts = (part != NULL ? part->nparts : 0); - if (keys != NULL) - *keys = (part != NULL ? &part->keys[1] : NULL); - - return (0); -} - -/* - * __partition_get_dirs -- - * Get partition dirs. - * PUBLIC: int __partition_get_dirs __P((DB *, const char ***)); - */ -int -__partition_get_dirs(dbp, dirpp) - DB *dbp; - const char ***dirpp; -{ - DB_PARTITION *part; - ENV *env; - u_int32_t i; - int ret; - - env = dbp->env; - if ((part = dbp->p_internal) == NULL) { - *dirpp = NULL; - return (0); - } - if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) { - *dirpp = part->dirs; - return (0); - } - - /* - * We build a list once when asked. The original directory list, - * if any, was discarded at open time. - */ - if ((*dirpp = part->dirs) != NULL) - return (0); - - if ((ret = __os_calloc(env, - sizeof(char *), part->nparts + 1, (char **)&part->dirs)) != 0) - return (ret); - - for (i = 0; i < part->nparts; i++) - part->dirs[i] = part->handles[i]->dirname; - - *dirpp = part->dirs; - return (0); -} - -/* - * __partc_init -- - * Initialize the access private portion of a cursor - * - * PUBLIC: int __partc_init __P((DBC *)); - */ -int -__partc_init(dbc) - DBC *dbc; -{ - ENV *env; - int ret; - - env = dbc->env; - - /* Allocate/initialize the internal structure. */ - if (dbc->internal == NULL && (ret = - __os_calloc(env, 1, sizeof(PART_CURSOR), &dbc->internal)) != 0) - return (ret); - - /* Initialize methods. */ - dbc->close = dbc->c_close = __dbc_close_pp; - dbc->cmp = __dbc_cmp_pp; - dbc->count = dbc->c_count = __dbc_count_pp; - dbc->del = dbc->c_del = __dbc_del_pp; - dbc->dup = dbc->c_dup = __dbc_dup_pp; - dbc->get = dbc->c_get = __partc_get_pp; - dbc->pget = dbc->c_pget = __dbc_pget_pp; - dbc->put = dbc->c_put = __dbc_put_pp; - dbc->am_bulk = NULL; - dbc->am_close = __partc_close; - dbc->am_del = __partc_del; - dbc->am_destroy = __partc_destroy; - dbc->am_get = NULL; - dbc->am_put = __partc_put; - dbc->am_writelock = __partc_writelock; - - /* We avoid swapping partition cursors since we swap the sub cursors */ - F_SET(dbc, DBC_PARTITIONED); - - return (0); -} -/* - * __partc_get_pp -- - * cursor get opeartion on a partitioned database. - */ -static int -__partc_get_pp(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp; - DB_THREAD_INFO *ip; - ENV *env; - int ignore_lease, ret; - - dbp = dbc->dbp; - env = dbp->env; - - ignore_lease = LF_ISSET(DB_IGNORE_LEASE) ? 1 : 0; - LF_CLR(DB_IGNORE_LEASE); - if ((ret = __dbc_get_arg(dbc, key, data, flags)) != 0) - return (ret); - - ENV_ENTER(env, ip); - - DEBUG_LREAD(dbc, dbc->txn, "DBcursor->get", - flags == DB_SET || flags == DB_SET_RANGE ? key : NULL, NULL, flags); - - ret = __partc_get(dbc, key, data, flags); - /* - * Check for master leases. - */ - if (ret == 0 && - IS_REP_MASTER(env) && IS_USING_LEASES(env) && !ignore_lease) - ret = __rep_lease_check(env, 1); - - ENV_LEAVE(env, ip); - __dbt_userfree(env, key, NULL, data); - return (ret); -} -/* - * __partiton_get -- - * cursor get opeartion on a partitioned database. - * - * PUBLIC: int __partc_get __P((DBC*, DBT *, DBT *, u_int32_t)); - */ -int -__partc_get(dbc, key, data, flags) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; -{ - DB *dbp; - DBC *orig_dbc, *new_dbc; - DB_PARTITION *part; - PART_CURSOR *cp; - u_int32_t multi, part_id; - int ret, retry, search; - - dbp = dbc->dbp; - cp = (PART_CURSOR*)dbc->internal; - orig_dbc = cp->sub_cursor; - part = dbp->p_internal; - - new_dbc = NULL; - retry = search = 0; - part_id = cp->part_id; - multi = flags & ~DB_OPFLAGS_MASK; - - switch (flags & DB_OPFLAGS_MASK) { - case DB_CURRENT: - break; - case DB_FIRST: - part_id = 0; - retry = 1; - break; - case DB_GET_BOTH: - case DB_GET_BOTHC: - case DB_GET_BOTH_RANGE: - search = 1; - break; - case DB_SET_RANGE: - search = 1; - retry = 1; - break; - case DB_LAST: - part_id = part->nparts - 1; - retry = 1; - break; - case DB_NEXT: - case DB_NEXT_NODUP: - if (orig_dbc == NULL) - part_id = 0; - else - part_id = cp->part_id; - retry = 1; - break; - case DB_NEXT_DUP: - break; - case DB_PREV: - case DB_PREV_NODUP: - if (orig_dbc == NULL) - part_id = part->nparts - 1; - else - part_id = cp->part_id; - retry = 1; - break; - case DB_PREV_DUP: - break; - case DB_SET: - search = 1; - break; - default: - return (__db_unknown_flag(dbp->env, "__partc_get", flags)); - } - - /* - * If we need to find the partition to start on, then - * do a binary search of the in memory partition table. - */ - if (search == 1 && F_ISSET(part, PART_CALLBACK)) - part_id = part->callback(dbp, key) % part->nparts; - else if (search == 1) - __part_search(dbp, part, key, &part_id); - - /* Get a new cursor if necessary */ - if (orig_dbc == NULL || cp->part_id != part_id) { - GET_PART_CURSOR(dbc, new_dbc, part_id); - } else - new_dbc = orig_dbc; - - while ((ret = __dbc_get(new_dbc, - key, data, flags)) == DB_NOTFOUND && retry == 1) { - switch (flags & DB_OPFLAGS_MASK) { - case DB_FIRST: - case DB_NEXT: - case DB_NEXT_NODUP: - case DB_SET_RANGE: - if (++part_id < part->nparts) { - flags = DB_FIRST | multi; - break; - } - goto err; - case DB_LAST: - case DB_PREV: - case DB_PREV_NODUP: - if (part_id-- > 0) { - flags = DB_LAST | multi; - break; - } - goto err; - default: - goto err; - } - - if (new_dbc != orig_dbc && (ret = __dbc_close(new_dbc)) != 0) - goto err; - GET_PART_CURSOR(dbc, new_dbc, part_id); - } - - if (ret != 0) - goto err; - - /* Success: swap original and new cursors. */ - if (new_dbc != orig_dbc) { - if (orig_dbc != NULL) { - cp->sub_cursor = NULL; - if ((ret = __dbc_close(orig_dbc)) != 0) - goto err; - } - cp->sub_cursor = new_dbc; - cp->part_id = part_id; - } - - return (0); - -err: if (new_dbc != NULL && new_dbc != orig_dbc) - (void)__dbc_close(new_dbc); - return (ret); -} - -/* - * __partc_put -- - * cursor put opeartion on a partitioned cursor. - * - */ -static int -__partc_put(dbc, key, data, flags, pgnop) - DBC *dbc; - DBT *key, *data; - u_int32_t flags; - db_pgno_t *pgnop; -{ - DB *dbp; - DB_PARTITION *part; - DBC *new_dbc; - PART_CURSOR *cp; - u_int32_t part_id; - int ret; - - dbp = dbc->dbp; - cp = (PART_CURSOR*)dbc->internal; - part_id = cp->part_id; - part = dbp->p_internal; - *pgnop = PGNO_INVALID; - - switch (flags) { - case DB_KEYFIRST: - case DB_KEYLAST: - case DB_NODUPDATA: - case DB_NOOVERWRITE: - case DB_OVERWRITE_DUP: - if (F_ISSET(part, PART_CALLBACK)) { - part_id = part->callback(dbp, key) % part->nparts; - break; - } - __part_search(dbp, part, key, &part_id); - break; - default: - break; - } - - if ((new_dbc = cp->sub_cursor) == NULL || cp->part_id != part_id) { - if ((ret = __db_cursor_int(part->handles[part_id], - dbc->thread_info, dbc->txn, part->handles[part_id]->type, - PGNO_INVALID, 0, dbc->locker, &new_dbc)) != 0) - goto err; - } - - if (F_ISSET(dbc, DBC_WRITER | DBC_WRITECURSOR)) - F_SET(new_dbc, DBC_WRITER); - if ((ret = __dbc_put(new_dbc, key, data, flags)) != 0) - goto err; - - if (new_dbc != cp->sub_cursor) { - if (cp->sub_cursor != NULL) { - if ((ret = __dbc_close(cp->sub_cursor)) != 0) - goto err; - cp->sub_cursor = NULL; - } - cp->sub_cursor = new_dbc; - cp->part_id = part_id; - } - - return (0); - -err: if (new_dbc != NULL && cp->sub_cursor != new_dbc) - (void)__dbc_close(new_dbc); - return (ret); -} - -/* - * __partc_del - * Delete interface to partitioned cursors. - * - */ -static int -__partc_del(dbc, flags) - DBC *dbc; - u_int32_t flags; -{ - PART_CURSOR *cp; - cp = (PART_CURSOR*)dbc->internal; - - if (F_ISSET(dbc, DBC_WRITER | DBC_WRITECURSOR)) - F_SET(cp->sub_cursor, DBC_WRITER); - return (__dbc_del(cp->sub_cursor, flags)); -} - -/* - * __partc_writelock - * Writelock interface to partitioned cursors. - * - */ -static int -__partc_writelock(dbc) - DBC *dbc; -{ - PART_CURSOR *cp; - cp = (PART_CURSOR*)dbc->internal; - - return (cp->sub_cursor->am_writelock(cp->sub_cursor)); -} - -/* - * __partc_close - * Close interface to partitioned cursors. - * - */ -static int -__partc_close(dbc, root_pgno, rmroot) - DBC *dbc; - db_pgno_t root_pgno; - int *rmroot; -{ - PART_CURSOR *cp; - int ret; - - COMPQUIET(root_pgno, 0); - COMPQUIET(rmroot, NULL); - - cp = (PART_CURSOR*)dbc->internal; - - if (cp->sub_cursor == NULL) - return (0); - ret = __dbc_close(cp->sub_cursor); - cp->sub_cursor = NULL; - return (ret); -} - -/* - * __partc_destroy -- - * Destroy a single cursor. - */ -static int -__partc_destroy(dbc) - DBC *dbc; -{ - PART_CURSOR *cp; - ENV *env; - - cp = (PART_CURSOR *)dbc->internal; - env = dbc->env; - - /* Discard the structure. Don't recurse. */ - __os_free(env, cp); - - return (0); -} - -/* - * __partiton_close - * Close a partitioned database. - * - * PUBLIC: int __partition_close __P((DB *, DB_TXN *, u_int32_t)); - */ -int -__partition_close(dbp, txn, flags) - DB *dbp; - DB_TXN *txn; - u_int32_t flags; -{ - DB **pdbp; - DB_PARTITION *part; - ENV *env; - u_int32_t i; - int ret, t_ret; - - if ((part = dbp->p_internal) == NULL) - return (0); - - env = dbp->env; - ret = 0; - - if ((pdbp = part->handles) != NULL) { - for (i = 0; i < part->nparts; i++, pdbp++) - if (*pdbp != NULL && (t_ret = - __db_close(*pdbp, txn, flags)) != 0 && ret == 0) - ret = t_ret; - __os_free(env, part->handles); - } - if (part->dirs != NULL) - __os_free(env, (char **)part->dirs); - if (part->data != NULL) - __os_free(env, (char **)part->data); - __os_free(env, part); - dbp->p_internal = NULL; - - return (ret); -} - -/* - * __partiton_sync - * Sync a partitioned database. - * - * PUBLIC: int __partition_sync __P((DB *)); - */ -int -__partition_sync(dbp) - DB *dbp; -{ - DB **pdbp; - DB_PARTITION *part; - u_int32_t i; - int ret, t_ret; - - ret = 0; - part = dbp->p_internal; - - if ((pdbp = part->handles) != NULL) { - for (i = 0; i < part->nparts; i++, pdbp++) - if (*pdbp != NULL && - F_ISSET(*pdbp, DB_AM_OPEN_CALLED) && (t_ret = - __memp_fsync((*pdbp)->mpf)) != 0 && ret == 0) - ret = t_ret; - } - if ((t_ret = __memp_fsync(dbp->mpf)) != 0 && ret == 0) - ret = t_ret; - - return (ret); -} - -/* - * __partiton_stat - * Stat a partitioned database. - * - * PUBLIC: int __partition_stat __P((DBC *, void *, u_int32_t)); - */ -int -__partition_stat(dbc, spp, flags) - DBC *dbc; - void *spp; - u_int32_t flags; -{ - DB *dbp, **pdbp; - DB_BTREE_STAT *fsp, *bsp; -#ifdef HAVE_HASH - DB_HASH_STAT *hfsp, *hsp; -#endif - DB_PARTITION *part; - DBC *new_dbc; - ENV *env; - u_int32_t i; - int ret; - - dbp = dbc->dbp; - part = dbp->p_internal; - env = dbp->env; - fsp = NULL; -#ifdef HAVE_HASH - hfsp = NULL; -#endif - - pdbp = part->handles; - for (i = 0; i < part->nparts; i++, pdbp++) { - if ((ret = __db_cursor_int(*pdbp, dbc->thread_info, dbc->txn, - (*pdbp)->type, PGNO_INVALID, - 0, dbc->locker, &new_dbc)) != 0) - goto err; - switch (new_dbc->dbtype) { - case DB_BTREE: - if ((ret = __bam_stat(new_dbc, &bsp, flags)) != 0) - goto err; - if (fsp == NULL) { - fsp = bsp; - *(DB_BTREE_STAT **)spp = fsp; - } else { - fsp->bt_nkeys += bsp->bt_nkeys; - fsp->bt_ndata += bsp->bt_ndata; - fsp->bt_pagecnt += bsp->bt_pagecnt; - if (fsp->bt_levels < bsp->bt_levels) - fsp->bt_levels = bsp->bt_levels; - fsp->bt_int_pg += bsp->bt_int_pg; - fsp->bt_leaf_pg += bsp->bt_leaf_pg; - fsp->bt_dup_pg += bsp->bt_dup_pg; - fsp->bt_over_pg += bsp->bt_over_pg; - fsp->bt_free += bsp->bt_free; - fsp->bt_int_pgfree += bsp->bt_int_pgfree; - fsp->bt_leaf_pgfree += bsp->bt_leaf_pgfree; - fsp->bt_dup_pgfree += bsp->bt_dup_pgfree; - fsp->bt_over_pgfree += bsp->bt_over_pgfree; - __os_ufree(env, bsp); - } - break; -#ifdef HAVE_HASH - case DB_HASH: - if ((ret = __ham_stat(new_dbc, &hsp, flags)) != 0) - goto err; - if (hfsp == NULL) { - hfsp = hsp; - *(DB_HASH_STAT **)spp = hfsp; - } else { - hfsp->hash_nkeys += hsp->hash_nkeys; - hfsp->hash_ndata += hsp->hash_ndata; - hfsp->hash_pagecnt += hsp->hash_pagecnt; - hfsp->hash_ffactor += hsp->hash_ffactor; - hfsp->hash_buckets += hsp->hash_buckets; - hfsp->hash_free += hsp->hash_free; - hfsp->hash_bfree += hsp->hash_bfree; - hfsp->hash_bigpages += hsp->hash_bigpages; - hfsp->hash_big_bfree += hsp->hash_big_bfree; - hfsp->hash_overflows += hsp->hash_overflows; - hfsp->hash_ovfl_free += hsp->hash_ovfl_free; - hfsp->hash_dup += hsp->hash_dup; - hfsp->hash_dup_free += hsp->hash_dup_free; - __os_ufree(env, hsp); - } - break; -#endif - default: - break; - } - if ((ret = __dbc_close(new_dbc)) != 0) - goto err; - } - return (0); - -err: - if (fsp != NULL) - __os_ufree(env, fsp); - *(DB_BTREE_STAT **)spp = NULL; - return (ret); -} - -/* - * __part_truncate -- - * Truncate a database. - * - * PUBLIC: int __part_truncate __P((DBC *, u_int32_t *)); - */ -int -__part_truncate(dbc, countp) - DBC *dbc; - u_int32_t *countp; -{ - DB *dbp, **pdbp; - DB_PARTITION *part; - DBC *new_dbc; - u_int32_t count, i; - int ret, t_ret; - - dbp = dbc->dbp; - part = dbp->p_internal; - pdbp = part->handles; - ret = 0; - - if (countp != NULL) - *countp = 0; - for (i = 0; ret == 0 && i < part->nparts; i++, pdbp++) { - if ((ret = __db_cursor_int(*pdbp, dbc->thread_info, dbc->txn, - (*pdbp)->type, PGNO_INVALID, - 0, dbc->locker, &new_dbc)) != 0) - break; - switch (dbp->type) { - case DB_BTREE: - case DB_RECNO: - ret = __bam_truncate(new_dbc, &count); - break; - case DB_HASH: -#ifdef HAVE_HASH - ret = __ham_truncate(new_dbc, &count); - break; -#endif - case DB_QUEUE: - case DB_UNKNOWN: - default: - ret = __db_unknown_type(dbp->env, - "DB->truncate", dbp->type); - count = 0; - break; - } - if ((t_ret = __dbc_close(new_dbc)) != 0 && ret == 0) - ret = t_ret; - if (countp != NULL) - *countp += count; - } - - return (ret); -} -/* - * __part_compact -- compact a partitioned database. - * - * PUBLIC: int __part_compact __P((DB *, DB_THREAD_INFO *, DB_TXN *, - * PUBLIC: DBT *, DBT *, DB_COMPACT *, u_int32_t, DBT *)); - */ -int -__part_compact(dbp, ip, txn, start, stop, c_data, flags, end) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - DBT *start, *stop; - DB_COMPACT *c_data; - u_int32_t flags; - DBT *end; -{ - DB **pdbp; - DB_PARTITION *part; - u_int32_t i; - int ret; - - part = dbp->p_internal; - pdbp = part->handles; - ret = 0; - - for (i = 0; ret == 0 && i < part->nparts; i++, pdbp++) { - switch (dbp->type) { - case DB_HASH: - if (!LF_ISSET(DB_FREELIST_ONLY)) - goto err; - /* FALLTHROUGH */ - case DB_BTREE: - case DB_RECNO: - ret = __bam_compact(*pdbp, - ip, txn, start, stop, c_data, flags, end); - break; - - default: - err: ret = __dbh_am_chk(dbp, DB_OK_BTREE); - break; - } - } - return (ret); -} - -/* - * __part_lsn_reset -- - * reset the lsns on each partition. - * - * PUBLIC: int __part_lsn_reset __P((DB *, DB_THREAD_INFO *)); - */ -int -__part_lsn_reset(dbp, ip) - DB *dbp; - DB_THREAD_INFO *ip; -{ - DB **pdbp; - DB_PARTITION *part; - u_int32_t i; - int ret; - - part = dbp->p_internal; - pdbp = part->handles; - ret = 0; - - for (i = 0; ret == 0 && i < part->nparts; i++, pdbp++) - ret = __db_lsn_reset((*pdbp)->mpf, ip); - - return (ret); -} - -/* - * __part_fileid_reset -- - * reset the fileid on each partition. - * - * PUBLIC: int __part_fileid_reset - * PUBLIC: __P((ENV *, DB_THREAD_INFO *, const char *, u_int32_t, int)); - */ -int -__part_fileid_reset(env, ip, fname, nparts, encrypted) - ENV *env; - DB_THREAD_INFO *ip; - const char *fname; - u_int32_t nparts; - int encrypted; -{ - int ret; - u_int32_t part_id; - char *name, *sp; - const char *np; - - if ((ret = __os_malloc(env, - strlen(fname) + PART_LEN + 1, &name)) != 0) { - __db_errx(env, Alloc_err, strlen(fname) + PART_LEN + 1); - return (ret); - } - - sp = name; - np = __db_rpath(fname); - if (np == NULL) - np = fname; - else { - np++; - (void)strncpy(name, fname, (size_t)(np - fname)); - sp = name + (np - fname); - } - - for (part_id = 0; ret == 0 && part_id < nparts; part_id++) { - (void)sprintf(sp, PART_NAME, np, part_id); - ret = __env_fileid_reset(env, ip, sp, encrypted); - } - - __os_free(env, name); - return (ret); -} -#ifndef HAVE_BREW -/* - * __part_key_range -- - * Return proportion of keys relative to given key. - * - * PUBLIC: int __part_key_range __P((DBC *, DBT *, DB_KEY_RANGE *, u_int32_t)); - */ -int -__part_key_range(dbc, dbt, kp, flags) - DBC *dbc; - DBT *dbt; - DB_KEY_RANGE *kp; - u_int32_t flags; -{ - BTREE_CURSOR *cp; - DBC *new_dbc; - DB_PARTITION *part; - PAGE *h; - u_int32_t id, part_id; - u_int32_t elems, empty, less_elems, my_elems, greater_elems; - u_int32_t levels, max_levels, my_levels; - int ret; - double total_elems; - - COMPQUIET(flags, 0); - - part = dbc->dbp->p_internal; - - /* - * First we find the key range for the partition that contains the - * key. Then we scale based on estimates of the other partitions. - */ - if (F_ISSET(part, PART_CALLBACK)) - part_id = part->callback(dbc->dbp, dbt) % part->nparts; - else - __part_search(dbc->dbp, part, dbt, &part_id); - GET_PART_CURSOR(dbc, new_dbc, part_id); - - if ((ret = __bam_key_range(new_dbc, dbt, kp, flags)) != 0) - goto err; - - cp = (BTREE_CURSOR *)new_dbc->internal; - - if ((ret = __memp_fget(new_dbc->dbp->mpf, - &cp->root, new_dbc->thread_info, new_dbc->txn, 0, &h)) != 0) - goto c_err; - - my_elems = NUM_ENT(h); - my_levels = LEVEL(h); - max_levels = my_levels; - - if ((ret = __memp_fput(new_dbc->dbp->mpf, - new_dbc->thread_info, h, new_dbc->priority)) != 0) - goto c_err; - - if ((ret = __dbc_close(new_dbc)) != 0) - goto err; - /* - * We have the range within one subtree. Now estimate - * what part of the whole range that subtree is. Figure - * out how many levels each part has and how many entries - * in the level below the root. - */ - empty = less_elems = greater_elems = 0; - for (id = 0; id < part->nparts; id++) { - if (id == part_id) { - empty = 0; - continue; - } - GET_PART_CURSOR(dbc, new_dbc, id); - cp = (BTREE_CURSOR *)new_dbc->internal; - if ((ret = __memp_fget(new_dbc->dbp->mpf, &cp->root, - new_dbc->thread_info, new_dbc->txn, 0, &h)) != 0) - goto c_err; - - elems = NUM_ENT(h); - levels = LEVEL(h); - if (levels == 1) - elems /= 2; - - if ((ret = __memp_fput(new_dbc->dbp->mpf, - new_dbc->thread_info, h, new_dbc->priority)) != 0) - goto c_err; - - if ((ret = __dbc_close(new_dbc)) != 0) - goto err; - - /* If the tree is empty, ignore it. */ - if (elems == 0) { - empty++; - continue; - } - - /* - * If a tree has fewer levels than the max just count - * it as a single element in the higher level. - */ - if (id < part_id) { - if (levels > max_levels) { - max_levels = levels; - less_elems = id + elems - empty; - } else if (levels < max_levels) - less_elems++; - else - less_elems += elems; - } else { - if (levels > max_levels) { - max_levels = levels; - greater_elems = (id - part_id) + elems - empty; - } else if (levels < max_levels) - greater_elems++; - else - greater_elems += elems; - } - - } - - if (my_levels < max_levels) { - /* - * The subtree containing the key is not the tallest one. - * Reduce its share by the number of records at the highest - * level. Scale the greater and lesser components up - * by the number of records on either side of this - * subtree. - */ - total_elems = 1 + greater_elems + less_elems; - kp->equal /= total_elems; - kp->less /= total_elems; - kp->less += less_elems/total_elems; - kp->greater /= total_elems; - kp->greater += greater_elems/total_elems; - } else if (my_levels == max_levels) { - /* - * The key is in one of the tallest subtrees. We will - * scale the values by the ratio of the records at the - * top of this stubtree to the number of records at the - * highest level. - */ - total_elems = greater_elems + less_elems; - if (total_elems != 0) { - /* - * First scale down by the fraction of elements - * in this subtree. - */ - total_elems += my_elems; - kp->equal *= my_elems; - kp->equal /= total_elems; - kp->less *= my_elems; - kp->less /= total_elems; - kp->greater *= my_elems; - kp->greater /= total_elems; - /* - * Proportially add weight from the subtrees to the - * left and right of this one. - */ - kp->less += less_elems / total_elems; - kp->greater += greater_elems / total_elems; - } - } - - if (0) { -c_err: (void)__dbc_close(new_dbc); - } - -err: return (ret); -} -#endif - -/* - * __part_remove -- - * Remove method for a partitioned database. - * - * PUBLIC: int __part_remove __P((DB *, DB_THREAD_INFO *, - * PUBLIC: DB_TXN *, const char *, const char *, u_int32_t)); - */ -int -__part_remove(dbp, ip, txn, name, subdb, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb; - u_int32_t flags; -{ - return (__part_rr(dbp, ip, txn, name, subdb, NULL, flags)); -} - -/* - * __part_rename -- - * Rename method for a partitioned database. - * - * PUBLIC: int __part_rename __P((DB *, DB_THREAD_INFO *, - * PUBLIC: DB_TXN *, const char *, const char *, const char *)); - */ -int -__part_rename(dbp, ip, txn, name, subdb, newname) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb, *newname; -{ - return (__part_rr(dbp, ip, txn, name, subdb, newname, 0)); -} - -/* - * __part_rr -- - * Remove/Rename method for a partitioned database. - */ -static int -__part_rr(dbp, ip, txn, name, subdb, newname, flags) - DB *dbp; - DB_THREAD_INFO *ip; - DB_TXN *txn; - const char *name, *subdb, *newname; - u_int32_t flags; -{ - DB **pdbp, *ptmpdbp, *tmpdbp; - DB_PARTITION *part; - ENV *env; - u_int32_t i; - int ret, t_ret; - char *np; - - env = dbp->env; - ret = 0; - - if (subdb != NULL && name != NULL) { - __db_errx(env, - "A partitioned database can not be in a multiple databases file"); - return (EINVAL); - } - ENV_GET_THREAD_INFO(env, ip); - - /* - * Since rename no longer opens the database, we have - * to do it here. - */ - if ((ret = __db_create_internal(&tmpdbp, env, 0)) != 0) - return (ret); - - /* - * We need to make sure we don't self-deadlock, so give - * this dbp the same locker as the incoming one. - */ - tmpdbp->locker = dbp->locker; - if ((ret = __db_open(tmpdbp, ip, txn, name, NULL, dbp->type, - DB_RDWRMASTER | DB_RDONLY, 0, PGNO_BASE_MD)) != 0) - goto err; - - part = tmpdbp->p_internal; - pdbp = part->handles; - COMPQUIET(np, NULL); - if (newname != NULL && (ret = __os_malloc(env, - strlen(newname) + PART_LEN + 1, &np)) != 0) { - __db_errx(env, Alloc_err, strlen(newname) + PART_LEN + 1); - goto err; - } - for (i = 0; i < part->nparts; i++, pdbp++) { - if ((ret = __db_create_internal(&ptmpdbp, env, 0)) != 0) - break; - ptmpdbp->locker = (*pdbp)->locker; - if (newname == NULL) - ret = __db_remove_int(ptmpdbp, - ip, txn, (*pdbp)->fname, NULL, flags); - else { - DB_ASSERT(env, np != NULL); - (void)sprintf(np, PART_NAME, newname, i); - ret = __db_rename_int(ptmpdbp, - ip, txn, (*pdbp)->fname, NULL, np); - } - ptmpdbp->locker = NULL; - (void)__db_close(ptmpdbp, NULL, DB_NOSYNC); - if (ret != 0) - break; - } - - if (newname != NULL) - __os_free(env, np); - - if (!F_ISSET(dbp, DB_AM_OPEN_CALLED)) { -err: /* - * Since we copied the locker ID from the dbp, we'd better not - * free it here. - */ - tmpdbp->locker = NULL; - - /* We need to remove the lock event we associated with this. */ - if (txn != NULL) - __txn_remlock(env, - txn, &tmpdbp->handle_lock, DB_LOCK_INVALIDID); - - if ((t_ret = __db_close(tmpdbp, - txn, DB_NOSYNC)) != 0 && ret == 0) - ret = t_ret; - } - return (ret); -} -#ifdef HAVE_VERIFY -/* - * __part_verify -- - * Verify a partitioned database. - * - * PUBLIC: int __part_verify __P((DB *, VRFY_DBINFO *, const char *, - * PUBLIC: void *, int (*)(void *, const void *), u_int32_t)); - */ -int -__part_verify(dbp, vdp, fname, handle, callback, flags) - DB *dbp; - VRFY_DBINFO *vdp; - const char *fname; - void *handle; - int (*callback) __P((void *, const void *)); - u_int32_t flags; -{ - BINTERNAL *lp, *rp; - DB **pdbp; - DB_PARTITION *part; - DBC *dbc; - DBT *key; - ENV *env; - DB_THREAD_INFO *ip; - u_int32_t i; - int ret, t_ret; - - env = dbp->env; - lp = rp = NULL; - dbc = NULL; - ip = vdp->thread_info; - - if (dbp->type == DB_BTREE) { - if ((ret = __bam_open(dbp, ip, - NULL, fname, PGNO_BASE_MD, flags)) != 0) - goto err; - } -#ifdef HAVE_HASH - else if ((ret = __ham_open(dbp, ip, - NULL, fname, PGNO_BASE_MD, flags)) != 0) - goto err; -#endif - - /* - * Initalize partition db handles and get the names. Set DB_RDWRMASTER - * because we may not have the partition callback, but we can still - * look at the structure of the tree. - */ - if ((ret = __partition_open(dbp, - ip, NULL, fname, dbp->type, flags | DB_RDWRMASTER, 0, 0)) != 0) - goto err; - part = dbp->p_internal; - - if (LF_ISSET(DB_SALVAGE)) { - /* If we are being aggressive we don't want to dump the keys. */ - if (LF_ISSET(DB_AGGRESSIVE)) - dbp->p_internal = NULL; - ret = __db_prheader(dbp, - NULL, 0, 0, handle, callback, vdp, PGNO_BASE_MD); - dbp->p_internal = part; - if (ret != 0) - goto err; - } - - if ((ret = __db_cursor(dbp, ip, NULL, &dbc, 0)) != 0) - goto err; - - pdbp = part->handles; - for (i = 0; i < part->nparts; i++, pdbp++) { - if (!F_ISSET(part, PART_RANGE) || part->keys == NULL) - goto vrfy; - if (lp != NULL) - __os_free(env, lp); - lp = rp; - rp = NULL; - if (i + 1 < part->nparts) { - key = &part->keys[i + 1]; - if ((ret = __os_malloc(env, - BINTERNAL_SIZE(key->size), &rp)) != 0) - goto err; - rp->len = key->size; - memcpy(rp->data, key->data, key->size); - B_TSET(rp->type, B_KEYDATA); - } -vrfy: if ((t_ret = __db_verify(*pdbp, ip, (*pdbp)->fname, - NULL, handle, callback, - lp, rp, flags | DB_VERIFY_PARTITION)) != 0 && ret == 0) - ret = t_ret; - } - -err: if (lp != NULL) - __os_free(env, lp); - if (rp != NULL) - __os_free(env, rp); - return (ret); -} -#endif - -#ifdef CONFIG_TEST -/* - * __part_testdocopy -- copy all partitions for testing purposes. - * - * PUBLIC: int __part_testdocopy __P((DB *, const char *)); - */ -int -__part_testdocopy(dbp, name) - DB *dbp; - const char *name; -{ - DB **pdbp; - DB_PARTITION *part; - u_int32_t i; - int ret; - - if ((ret = __db_testdocopy(dbp->env, name)) != 0) - return (ret); - - part = dbp->p_internal; - pdbp = part->handles; - for (i = 0; i < part->nparts; i++, pdbp++) - if ((ret = __db_testdocopy(dbp->env, (*pdbp)->fname)) != 0) - return (ret); - - return (0); -} -#endif -#else -/* - * __db_nopartition -- - * Error when a Berkeley DB build doesn't include partitioning. - * - * PUBLIC: int __db_no_partition __P((ENV *)); - */ -int -__db_no_partition(env) - ENV *env; -{ - __db_errx(env, - "library build did not include support for the database partitioning"); - return (DB_OPNOTSUP); -} -/* - * __partition_set -- - * Set the partitioning keys or callback function. - * This routine must be called prior to creating the database. - * PUBLIC: int __partition_set __P((DB *, u_int32_t, DBT *, - * PUBLIC: u_int32_t (*callback)(DB *, DBT *key))); - */ - -int -__partition_set(dbp, parts, keys, callback) - DB *dbp; - u_int32_t parts; - DBT *keys; - u_int32_t (*callback)(DB *, DBT *key); -{ - COMPQUIET(parts, 0); - COMPQUIET(keys, NULL); - COMPQUIET(callback, NULL); - - return (__db_no_partition(dbp->env)); -} - -/* - * __partition_get_callback -- - * Set the partition callback function. This routine must be called - * prior to opening a partition database that requires a function. - * PUBLIC: int __partition_get_callback __P((DB *, - * PUBLIC: u_int32_t *, u_int32_t (**callback)(DB *, DBT *key))); - */ -int -__partition_get_callback(dbp, parts, callback) - DB *dbp; - u_int32_t *parts; - u_int32_t (**callback)(DB *, DBT *key); -{ - COMPQUIET(parts, NULL); - COMPQUIET(callback, NULL); - - return (__db_no_partition(dbp->env)); -} - -/* - * __partition_get_dirs -- - * Get partition dirs. - * PUBLIC: int __partition_get_dirs __P((DB *, const char ***)); - */ -int -__partition_get_dirs(dbp, dirpp) - DB *dbp; - const char ***dirpp; -{ - COMPQUIET(dirpp, NULL); - return (__db_no_partition(dbp->env)); -} - -/* - * __partition_get_keys -- - * Get partition keys. - * PUBLIC: int __partition_get_keys __P((DB *, u_int32_t *, DBT **)); - */ -int -__partition_get_keys(dbp, parts, keys) - DB *dbp; - u_int32_t *parts; - DBT **keys; -{ - COMPQUIET(parts, NULL); - COMPQUIET(keys, NULL); - - return (__db_no_partition(dbp->env)); -} -/* - * __partition_init -- - * Initialize the partition structure. - * Called when the meta data page is read in during database open or - * when partition keys or a callback are set. - * - * PUBLIC: int __partition_init __P((DB *, u_int32_t)); - */ -int -__partition_init(dbp, flags) - DB *dbp; - u_int32_t flags; -{ - COMPQUIET(flags, 0); - - return (__db_no_partition(dbp->env)); -} -/* - * __part_fileid_reset -- - * reset the fileid on each partition. - * - * PUBLIC: int __part_fileid_reset - * PUBLIC: __P((ENV *, DB_THREAD_INFO *, const char *, u_int32_t, int)); - */ -int -__part_fileid_reset(env, ip, fname, nparts, encrypted) - ENV *env; - DB_THREAD_INFO *ip; - const char *fname; - u_int32_t nparts; - int encrypted; -{ - COMPQUIET(ip, NULL); - COMPQUIET(fname, NULL); - COMPQUIET(nparts, 0); - COMPQUIET(encrypted, 0); - - return (__db_no_partition(env)); -} -/* - * __partition_set_dirs -- - * Set the directories for creating the partition databases. - * They must be in the environment. - * PUBLIC: int __partition_set_dirs __P((DB *, const char **)); - */ -int -__partition_set_dirs(dbp, dirp) - DB *dbp; - const char **dirp; -{ - COMPQUIET(dirp, NULL); - - return (__db_no_partition(dbp->env)); -} -#endif @@ -0,0 +1,205 @@ +BT_CLR ../btree/btree.h /^#define BT_CLR(t) (t->bt_sp = t->bt_stack)$/ +BT_POP ../btree/btree.h /^#define BT_POP(t) (t->bt_sp == t->bt_stack ? NULL / +BT_PUSH ../btree/btree.h /^#define BT_PUSH(t, p, i) { \\$/ +BUCKET_TO_PAGE ../hash/hash.h /^#define BUCKET_TO_PAGE(B) \\$/ +BUF_INSERT ../hash/hash_buf.c /^#define BUF_INSERT(B, P) { \\$/ +BUF_REMOVE ../hash/hash_buf.c /^#define BUF_REMOVE(B) { \\$/ +CLRBIT ../hash/hash.h /^#define CLRBIT(A, N) ((A)[(N)\/BITS_PER_MAP] &= ~(/ +DODISK ../hash/hash.h /^#define DODISK(X) ((X) = (char *)((ptrdiff_t)(X)|0/ +DOMOD ../hash/hash.h /^#define DOMOD(X) ((X) = (char *)((ptrdiff_t)(X)|0x/ +FREESPACE ../hash/page.h /^#define FREESPACE(P) ((P)[(P)[0]+1])$/ +F_CLR ../btree/btree.h /^#define F_CLR(p, f) (p)->flags &= ~(f)$/ +F_ISSET ../btree/btree.h /^#define F_ISSET(p, f) ((p)->flags & (f))$/ +F_SET ../btree/btree.h /^#define F_SET(p, f) (p)->flags |= (f)$/ +GETBINTERNAL ../btree/btree.h /^#define GETBINTERNAL(pg, indx) \\$/ +GETBLEAF ../btree/btree.h /^#define GETBLEAF(pg, indx) \\$/ +GETRINTERNAL ../btree/btree.h /^#define GETRINTERNAL(pg, indx) \\$/ +GETRLEAF ../btree/btree.h /^#define GETRLEAF(pg, indx) \\$/ +HASHKEY ../include/mpool.h /^#define HASHKEY(pgno) ((pgno - 1) % HASHSIZE)$/ +ISDISK ../hash/hash.h /^#define ISDISK(X) ((u_int32_t)(ptrdiff_t)(X)&0x2)$/ +ISMOD ../hash/hash.h /^#define ISMOD(X) ((u_int32_t)(ptrdiff_t)(X)&0x1)$/ +ISSET ../hash/hash.h /^#define ISSET(A, N) ((A)[(N)\/BITS_PER_MAP] & (1<</ +IS_BUCKET ../hash/hash.h /^#define IS_BUCKET(X) ((X) & BUF_BUCKET)$/ +KEYSIZE ../hash/page.h /^#define KEYSIZE(K) (4*sizeof(u_int16_t) + (K)->siz/ +LALIGN ../btree/btree.h /^#define LALIGN(n) (((n) + sizeof(pgno_t) - 1) & ~(/ +LRU_INSERT ../hash/hash_buf.c /^#define LRU_INSERT(B) BUF_INSERT((B), LRU)$/ +MOD ../hash/hash.c /^#define MOD(x, y) ((x) & ((y) - 1))$/ +MRU_INSERT ../hash/hash_buf.c /^#define MRU_INSERT(B) BUF_INSERT((B), &hashp->bufh/ +M_16_SWAP ../include/db.h /^#define M_16_SWAP(a) { \\$/ +M_32_SWAP ../include/db.h /^#define M_32_SWAP(a) { \\$/ +NBINTERNAL ../btree/btree.h /^#define NBINTERNAL(len) \\$/ +NBLEAF ../btree/btree.h /^#define NBLEAF(p) NBLEAFDBT((p)->ksize, (p)->dsize/ +NBLEAFDBT ../btree/btree.h /^#define NBLEAFDBT(ksize, dsize) \\$/ +NEXTINDEX ../btree/btree.h /^#define NEXTINDEX(p) (((p)->lower - BTDATAOFF) \/ / +NRLEAF ../btree/btree.h /^#define NRLEAF(p) NRLEAFDBT((p)->dsize)$/ +NRLEAFDBT ../btree/btree.h /^#define NRLEAFDBT(dsize) \\$/ +OADDR_OF ../hash/hash.h /^#define OADDR_OF(S,O) ((u_int32_t)((u_int32_t)(S) / +OADDR_TO_PAGE ../hash/hash.h /^#define OADDR_TO_PAGE(B) \\$/ +OFFSET ../hash/page.h /^#define OFFSET(P) ((P)[(P)[0]+2])$/ +OPAGENUM ../hash/hash.h /^#define OPAGENUM(N) ((N) & SPLITMASK)$/ +PAGE_INIT ../hash/hash_page.c /^#define PAGE_INIT(P) { \\$/ +PAGE_META ../hash/page.h /^#define PAGE_META(N) (((N)+3) * sizeof(u_int16_t))/ +PAIRFITS ../hash/page.h /^#define PAIRFITS(P,K,D) \\$/ +PAIRSIZE ../hash/page.h /^#define PAIRSIZE(K,D) (2*sizeof(u_int16_t) + (K)->/ +PTROF ../hash/hash.h /^#define PTROF(X) ((BUFHEAD *)((ptrdiff_t)(X)&~0x3)/ +P_16_COPY ../include/db.h /^#define P_16_COPY(a, b) { \\$/ +P_16_SWAP ../include/db.h /^#define P_16_SWAP(a) { \\$/ +P_32_COPY ../include/db.h /^#define P_32_COPY(a, b) { \\$/ +P_32_SWAP ../include/db.h /^#define P_32_SWAP(a) { \\$/ +RETURN_ERROR ../hash/hash.c /^#define RETURN_ERROR(ERR, LOC) { save_errno = ERR;/ +SETBIT ../hash/hash.h /^#define SETBIT(A, N) ((A)[(N)\/BITS_PER_MAP] |= (1/ +SPLITNUM ../hash/hash.h /^#define SPLITNUM(N) (((u_int32_t)(N)) >> SPLITSHIF/ +WR_BINTERNAL ../btree/btree.h /^#define WR_BINTERNAL(p, size, pgno, flags) { \\/ +WR_BLEAF ../btree/btree.h /^#define WR_BLEAF(p, key, data, flags) { \\$/ +WR_RINTERNAL ../btree/btree.h /^#define WR_RINTERNAL(p, nrecs, pgno) { \\$/ +WR_RLEAF ../btree/btree.h /^#define WR_RLEAF(p, data, flags) { \\$/ +X ../btree/bt_debug.c /^#define X(flag, name) \\$/ +__add_ovflpage ../hash/hash_page.c /^__add_ovflpage(hashp, bufp)$/ +__addel ../hash/hash_page.c /^__addel(hashp, bufp, key, val)$/ +__big_delete ../hash/hash_bigkey.c /^__big_delete(hashp, bufp)$/ +__big_insert ../hash/hash_bigkey.c /^__big_insert(hashp, bufp, key, val)$/ +__big_keydata ../hash/hash_bigkey.c /^__big_keydata(hashp, bufp, key, val, set)$/ +__big_return ../hash/hash_bigkey.c /^__big_return(hashp, bufp, ndx, val, set_current)$/ +__big_split ../hash/hash_bigkey.c /^__big_split(hashp, op, np, big_keyp, addr, obucket/ +__bt_bdelete ../btree/bt_delete.c /^__bt_bdelete(t, key)$/ +__bt_close ../btree/bt_close.c /^__bt_close(dbp)$/ +__bt_cmp ../btree/bt_utils.c /^__bt_cmp(t, k1, e)$/ +__bt_curdel ../btree/bt_delete.c /^__bt_curdel(t, key, h, index)$/ +__bt_defcmp ../btree/bt_utils.c /^__bt_defcmp(a, b)$/ +__bt_defpfx ../btree/bt_utils.c /^__bt_defpfx(a, b)$/ +__bt_delete ../btree/bt_delete.c /^__bt_delete(dbp, key, flags)$/ +__bt_dleaf ../btree/bt_delete.c /^__bt_dleaf(t, key, h, index)$/ +__bt_dmpage ../btree/bt_debug.c /^__bt_dmpage(h)$/ +__bt_dnpage ../btree/bt_debug.c /^__bt_dnpage(dbp, pgno)$/ +__bt_dpage ../btree/bt_debug.c /^__bt_dpage(h)$/ +__bt_dump ../btree/bt_debug.c /^__bt_dump(dbp)$/ +__bt_fd ../btree/bt_open.c /^__bt_fd(dbp)$/ +__bt_first ../btree/bt_seq.c /^__bt_first(t, key, erval, exactp)$/ +__bt_free ../btree/bt_page.c /^__bt_free(t, h)$/ +__bt_get ../btree/bt_get.c /^__bt_get(dbp, key, data, flags)$/ +__bt_new ../btree/bt_page.c /^__bt_new(t, npg)$/ +__bt_open ../btree/bt_open.c /^__bt_open(fname, flags, mode, openinfo, dflags)$/ +__bt_pdelete ../btree/bt_delete.c /^__bt_pdelete(t, h)$/ +__bt_pgin ../btree/bt_conv.c /^__bt_pgin(t, pg, pp)$/ +__bt_pgout ../btree/bt_conv.c /^__bt_pgout(t, pg, pp)$/ +__bt_put ../btree/bt_put.c /^__bt_put(dbp, key, data, flags)$/ +__bt_relink ../btree/bt_delete.c /^__bt_relink(t, h)$/ +__bt_ret ../btree/bt_utils.c /^__bt_ret(t, e, key, rkey, data, rdata, copy)$/ +__bt_search ../btree/bt_search.c /^__bt_search(t, key, exactp)$/ +__bt_seq ../btree/bt_seq.c /^__bt_seq(dbp, key, data, flags)$/ +__bt_seqadv ../btree/bt_seq.c /^__bt_seqadv(t, ep, flags)$/ +__bt_seqset ../btree/bt_seq.c /^__bt_seqset(t, ep, key, flags)$/ +__bt_setcur ../btree/bt_seq.c /^__bt_setcur(t, pgno, index)$/ +__bt_snext ../btree/bt_search.c /^__bt_snext(t, h, key, exactp)$/ +__bt_split ../btree/bt_split.c /^__bt_split(t, sp, key, data, flags, ilen, argskip)/ +__bt_sprev ../btree/bt_search.c /^__bt_sprev(t, h, key, exactp)$/ +__bt_stat ../btree/bt_debug.c /^__bt_stat(dbp)$/ +__bt_stkacq ../btree/bt_delete.c /^__bt_stkacq(t, hp, c)$/ +__bt_sync ../btree/bt_close.c /^__bt_sync(dbp, flags)$/ +__buf_free ../hash/hash_buf.c /^__buf_free(hashp, do_free, to_disk)$/ +__buf_init ../hash/hash_buf.c /^__buf_init(hashp, nbytes)$/ +__call_hash ../hash/hash.c /^__call_hash(hashp, k, len)$/ +__dberr ../db/db.c /^__dberr()$/ +__dbpanic ../db/db.c /^__dbpanic(dbp)$/ +__delpair ../hash/hash_page.c /^__delpair(hashp, bufp, ndx)$/ +__expand_table ../hash/hash.c /^__expand_table(hashp)$/ +__find_bigpair ../hash/hash_bigkey.c /^__find_bigpair(hashp, bufp, ndx, key, size)$/ +__find_last_page ../hash/hash_bigkey.c /^__find_last_page(hashp, bpp)$/ +__free_ovflpage ../hash/hash_page.c /^__free_ovflpage(hashp, obufp)$/ +__get_buf ../hash/hash_buf.c /^__get_buf(hashp, addr, prev_bp, newpage)$/ +__get_page ../hash/hash_page.c /^__get_page(hashp, p, bucket, is_bucket, is_disk, i/ +__hash_open ../hash/hash.c /^__hash_open(file, flags, mode, info, dflags)$/ +__ibitmap ../hash/hash_page.c /^__ibitmap(hashp, pnum, nbits, ndx)$/ +__log2 ../hash/hash_log2.c /^__log2(num)$/ +__ovfl_delete ../btree/bt_overflow.c /^__ovfl_delete(t, p)$/ +__ovfl_get ../btree/bt_overflow.c /^__ovfl_get(t, p, ssz, buf, bufsz)$/ +__ovfl_put ../btree/bt_overflow.c /^__ovfl_put(t, dbt, pg)$/ +__put_page ../hash/hash_page.c /^__put_page(hashp, p, bucket, is_bucket, is_bitmap)/ +__rec_close ../recno/rec_close.c /^__rec_close(dbp)$/ +__rec_delete ../recno/rec_delete.c /^__rec_delete(dbp, key, flags)$/ +__rec_dleaf ../recno/rec_delete.c /^__rec_dleaf(t, h, index)$/ +__rec_fd ../recno/rec_open.c /^__rec_fd(dbp)$/ +__rec_fmap ../recno/rec_get.c /^__rec_fmap(t, top)$/ +__rec_fpipe ../recno/rec_get.c /^__rec_fpipe(t, top)$/ +__rec_get ../recno/rec_get.c /^__rec_get(dbp, key, data, flags)$/ +__rec_iput ../recno/rec_put.c /^__rec_iput(t, nrec, data, flags)$/ +__rec_open ../recno/rec_open.c /^__rec_open(fname, flags, mode, openinfo, dflags)$/ +__rec_put ../recno/rec_put.c /^__rec_put(dbp, key, data, flags)$/ +__rec_ret ../recno/rec_utils.c /^__rec_ret(t, e, nrec, key, data)$/ +__rec_search ../recno/rec_search.c /^__rec_search(t, recno, op)$/ +__rec_seq ../recno/rec_seq.c /^__rec_seq(dbp, key, data, flags)$/ +__rec_sync ../recno/rec_close.c /^__rec_sync(dbp, flags)$/ +__rec_vmap ../recno/rec_get.c /^__rec_vmap(t, top)$/ +__rec_vpipe ../recno/rec_get.c /^__rec_vpipe(t, top)$/ +__reclaim_buf ../hash/hash_buf.c /^__reclaim_buf(hashp, bp)$/ +__split_page ../hash/hash_page.c /^__split_page(hashp, obucket, nbucket)$/ +alloc_segs ../hash/hash.c /^alloc_segs(hashp, nsegs)$/ +bt_broot ../btree/bt_split.c /^bt_broot(t, h, l, r)$/ +bt_fast ../btree/bt_put.c /^bt_fast(t, key, data, exactp)$/ +bt_meta ../btree/bt_close.c /^bt_meta(t)$/ +bt_page ../btree/bt_split.c /^bt_page(t, h, lp, rp, skip, ilen)$/ +bt_preserve ../btree/bt_split.c /^bt_preserve(t, pg)$/ +bt_psplit ../btree/bt_split.c /^bt_psplit(t, h, l, r, pskip, ilen)$/ +bt_root ../btree/bt_split.c /^bt_root(t, h, lp, rp, skip, ilen)$/ +bt_rroot ../btree/bt_split.c /^bt_rroot(t, h, l, r)$/ +byteorder ../btree/bt_open.c /^byteorder()$/ +collect_data ../hash/hash_bigkey.c /^collect_data(hashp, bufp, len, set)$/ +collect_key ../hash/hash_bigkey.c /^collect_key(hashp, bufp, len, val, set)$/ +dbm_clearerr ../hash/ndbm.c /^dbm_clearerr(db)$/ +dbm_close ../hash/ndbm.c /^dbm_close(db)$/ +dbm_delete ../hash/ndbm.c /^dbm_delete(db, key)$/ +dbm_dirfno ../hash/ndbm.c /^dbm_dirfno(db)$/ +dbm_error ../hash/ndbm.c /^dbm_error(db)$/ +dbm_fetch ../hash/ndbm.c /^dbm_fetch(db, key)$/ +dbm_firstkey ../hash/ndbm.c /^dbm_firstkey(db)$/ +dbm_nextkey ../hash/ndbm.c /^dbm_nextkey(db)$/ +dbm_open ../hash/ndbm.c /^dbm_open(file, flags, mode)$/ +dbm_store ../hash/ndbm.c /^dbm_store(db, key, content, flags)$/ +dbopen ../db/db.c /^dbopen(fname, flags, mode, type, openinfo)$/ +dcharhash ../hash/hash_func.c /^#define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x/ +fetch_bitmap ../hash/hash_page.c /^fetch_bitmap(hashp, ndx)$/ +first_free ../hash/hash_page.c /^first_free(map)$/ +flush_meta ../hash/hash.c /^flush_meta(hashp)$/ +hash2 ../hash/hash_func.c /^hash2(keyarg, len)$/ +hash3 ../hash/hash_func.c /^hash3(keyarg, len)$/ +hash4 ../hash/hash_func.c /^hash4(keyarg, len)$/ +hash_access ../hash/hash.c /^hash_access(hashp, action, key, val)$/ +hash_close ../hash/hash.c /^hash_close(dbp)$/ +hash_delete ../hash/hash.c /^hash_delete(dbp, key, flag)$/ +hash_fd ../hash/hash.c /^hash_fd(dbp)$/ +hash_get ../hash/hash.c /^hash_get(dbp, key, data, flag)$/ +hash_put ../hash/hash.c /^hash_put(dbp, key, data, flag)$/ +hash_realloc ../hash/hash.c /^hash_realloc(p_ptr, oldsize, newsize)$/ +hash_seq ../hash/hash.c /^hash_seq(dbp, key, data, flag)$/ +hash_sync ../hash/hash.c /^hash_sync(dbp, flags)$/ +hcreate ../hash/hsearch.c /^hcreate(nel)$/ +hdestroy ../hash/hash.c /^hdestroy(hashp)$/ +hsearch ../hash/hsearch.c /^hsearch(item, action)$/ +init_hash ../hash/hash.c /^init_hash(hashp, file, info)$/ +init_htab ../hash/hash.c /^init_htab(hashp, nelem)$/ +mpool_bkt ../mpool/mpool.c /^mpool_bkt(mp)$/ +mpool_close ../mpool/mpool.c /^mpool_close(mp)$/ +mpool_filter ../mpool/mpool.c /^mpool_filter(mp, pgin, pgout, pgcookie)$/ +mpool_get ../mpool/mpool.c /^mpool_get(mp, pgno, flags)$/ +mpool_look ../mpool/mpool.c /^mpool_look(mp, pgno)$/ +mpool_new ../mpool/mpool.c /^mpool_new(mp, pgnoaddr)$/ +mpool_open ../mpool/mpool.c /^mpool_open(key, fd, pagesize, maxcache)$/ +mpool_put ../mpool/mpool.c /^mpool_put(mp, page, flags)$/ +mpool_stat ../mpool/mpool.c /^mpool_stat(mp)$/ +mpool_sync ../mpool/mpool.c /^mpool_sync(mp)$/ +mpool_write ../mpool/mpool.c /^mpool_write(mp, bp)$/ +mswap ../btree/bt_conv.c /^mswap(pg)$/ +newbuf ../hash/hash_buf.c /^newbuf(hashp, addr, prev_bp)$/ +nroot ../btree/bt_open.c /^nroot(t)$/ +open_temp ../hash/hash_page.c /^open_temp(hashp)$/ +overflow_page ../hash/hash_page.c /^overflow_page(hashp)$/ +print_chain ../hash/hash_page.c /^print_chain(addr)$/ +putpair ../hash/hash_page.c /^putpair(p, key, val)$/ +rec_rdelete ../recno/rec_delete.c /^rec_rdelete(t, nrec)$/ +rec_total ../btree/bt_split.c /^rec_total(h)$/ +squeeze_key ../hash/hash_page.c /^squeeze_key(sp, key, val)$/ +swap_header ../hash/hash.c /^swap_header(hashp)$/ +swap_header_copy ../hash/hash.c /^swap_header_copy(srcp, destp)$/ +tmp ../btree/bt_open.c /^tmp()$/ +u_int32_t ../hash/extern.h /^extern u_int32_t (*__default_hash) __P((const void/ +ugly_split ../hash/hash_page.c /^ugly_split(hashp, obucket, old_bufp, new_bufp, cop/ |